Crystalcareai commited on
Commit
7d42e86
·
verified ·
1 Parent(s): 7b0e256

Update modeling_quiet.py

Browse files
Files changed (1) hide show
  1. modeling_quiet.py +0 -9
modeling_quiet.py CHANGED
@@ -1045,15 +1045,6 @@ class QuietModel(QuietPreTrainedModel):
1045
  if inputs_embeds is None:
1046
  inputs_embeds = self.embed_tokens(input_ids)
1047
 
1048
- if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
1049
- is_padding_right = (attention_mask[:, -1] == 0).any().item()
1050
- if is_padding_right:
1051
- raise ValueError(
1052
- "You are attempting to perform batched generation with padding_side='right'"
1053
- " this may lead to unexpected behaviour for Flash Attention version of Quiet. Make sure to "
1054
- " call `tokenizer.padding_side = 'left'` before tokenizing the input. "
1055
- )
1056
-
1057
  if self._attn_implementation == "flash_attention_2":
1058
  # 2d mask is passed through the layers
1059
  attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
 
1045
  if inputs_embeds is None:
1046
  inputs_embeds = self.embed_tokens(input_ids)
1047
 
 
 
 
 
 
 
 
 
 
1048
  if self._attn_implementation == "flash_attention_2":
1049
  # 2d mask is passed through the layers
1050
  attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None