Update modeling.py
#19
by
JesusCrist
- opened
- modeling.py +2 -2
modeling.py
CHANGED
@@ -897,11 +897,11 @@ class NewModel(NewPreTrainedModel):
|
|
897 |
|
898 |
if unpad_inputs:
|
899 |
assert self.config.use_memory_efficient_attention
|
900 |
-
attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length)
|
901 |
else:
|
902 |
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
|
903 |
# ourselves in which case we just need to make it broadcastable to all heads.
|
904 |
-
attention_bias = self.get_extended_attention_mask(attention_mask, input_shape)
|
905 |
if self.config.use_memory_efficient_attention:
|
906 |
# Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
|
907 |
attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)
|
|
|
897 |
|
898 |
if unpad_inputs:
|
899 |
assert self.config.use_memory_efficient_attention
|
900 |
+
attention_bias = xops.fmha.attn_bias.BlockDiagonalMask.from_seqlens(length,device=self.device)
|
901 |
else:
|
902 |
# We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
|
903 |
# ourselves in which case we just need to make it broadcastable to all heads.
|
904 |
+
attention_bias = self.get_extended_attention_mask(attention_mask, input_shape,device=self.device)
|
905 |
if self.config.use_memory_efficient_attention:
|
906 |
# Invalid shape for attention bias: torch.Size([48, 1, 1, 512]) (expected (48, 12, 512, 512))
|
907 |
attention_bias = attention_bias.expand(-1, self.config.num_attention_heads, seq_length, -1)
|