Crystalcareai
commited on
Update modeling_quiet.py
Browse files- modeling_quiet.py +0 -9
modeling_quiet.py
CHANGED
@@ -1045,15 +1045,6 @@ class QuietModel(QuietPreTrainedModel):
|
|
1045 |
if inputs_embeds is None:
|
1046 |
inputs_embeds = self.embed_tokens(input_ids)
|
1047 |
|
1048 |
-
if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
|
1049 |
-
is_padding_right = (attention_mask[:, -1] == 0).any().item()
|
1050 |
-
if is_padding_right:
|
1051 |
-
raise ValueError(
|
1052 |
-
"You are attempting to perform batched generation with padding_side='right'"
|
1053 |
-
" this may lead to unexpected behaviour for Flash Attention version of Quiet. Make sure to "
|
1054 |
-
" call `tokenizer.padding_side = 'left'` before tokenizing the input. "
|
1055 |
-
)
|
1056 |
-
|
1057 |
if self._attn_implementation == "flash_attention_2":
|
1058 |
# 2d mask is passed through the layers
|
1059 |
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|
|
|
1045 |
if inputs_embeds is None:
|
1046 |
inputs_embeds = self.embed_tokens(input_ids)
|
1047 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1048 |
if self._attn_implementation == "flash_attention_2":
|
1049 |
# 2d mask is passed through the layers
|
1050 |
attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
|