Crystalcareai
/

Quiet-Star-Custom

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 31, 2024

Commit

5049df3

·

verified ·

1 Parent(s): 11aeabd

Update modeling_quiet.py

Files changed (1) hide show

modeling_quiet.py +2 -2

modeling_quiet.py CHANGED Viewed

@@ -540,7 +540,7 @@ class QuietFlashAttention2(QuietAttention):
             value_states = value_states.to(target_dtype)
         # Compute the causal mask
-        causal = self.config.causal
         if causal:
             if self._flash_attn_uses_top_left_mask:
                 # Compute the causal mask
@@ -768,7 +768,7 @@ class QuietSdpaAttention(QuietAttention):
             attn_mask=attention_mask.to(query_states.device) if attention_mask is not None else None,
             dropout_p=self.attention_dropout if self.training else 0.0,
             # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1.
-            causal=self.is_causal and attention_mask is None and q_len > 1,
         )
         attn_output = attn_output.transpose(1, 2).contiguous()

             value_states = value_states.to(target_dtype)
         # Compute the causal mask
+        causal = self.config.is_causal
         if causal:
             if self._flash_attn_uses_top_left_mask:
                 # Compute the causal mask
             attn_mask=attention_mask.to(query_states.device) if attention_mask is not None else None,
             dropout_p=self.attention_dropout if self.training else 0.0,
             # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1.
+            is_causal=self.is_causal and attention_mask is None and q_len > 1,
         )
         attn_output = attn_output.transpose(1, 2).contiguous()