Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 30, 2024

Commit

7d42e86

·

verified ·

1 Parent(s): 7b0e256

Update modeling_quiet.py

Files changed (1) hide show

modeling_quiet.py +0 -9

modeling_quiet.py CHANGED Viewed

@@ -1045,15 +1045,6 @@ class QuietModel(QuietPreTrainedModel):
         if inputs_embeds is None:
             inputs_embeds = self.embed_tokens(input_ids)
-        if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
-            is_padding_right = (attention_mask[:, -1] == 0).any().item()
-            if is_padding_right:
-                raise ValueError(
-                    "You are attempting to perform batched generation with padding_side='right'"
-                    " this may lead to unexpected behaviour for Flash Attention version of Quiet. Make sure to "
-                    " call `tokenizer.padding_side  = 'left'` before tokenizing the input. "
-                )
         if self._attn_implementation == "flash_attention_2":
             # 2d mask is passed through the layers
             attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None

         if inputs_embeds is None:
             inputs_embeds = self.embed_tokens(input_ids)
         if self._attn_implementation == "flash_attention_2":
             # 2d mask is passed through the layers
             attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None