cerebras
/

btlm-3b-8k-base

Text Generation

Model card Files Files and versions Community

Faisal AlKhateeb commited on Jul 24, 2023

Commit

b566562

·

1 Parent(s): e94a265

update ALiBi with kv caching

Files changed (1) hide show

modeling_btlm.py +12 -3

modeling_btlm.py CHANGED Viewed

@@ -74,9 +74,14 @@ class AlibiPositionEmbeddingLayer(nn.Module):
         self,
         seq_length,
         key_length,
     ):
-        context_position = torch.arange(seq_length, device=self.slopes.device)[:, None]
-        memory_position = torch.arange(key_length, device=self.slopes.device)[None, :]
         relative_position = memory_position - context_position
         relative_position = torch.abs(relative_position).unsqueeze(0).expand(self.num_heads, -1, -1)
         alibi = (self.slopes * -1.0).unsqueeze(1) * relative_position
@@ -946,7 +951,11 @@ class BTLMModel(BTLMPreTrainedModel):
         if self.relative_pe is not None:
             length = input_ids.shape[1]
-            position_bias = self.relative_pe(length, length)
         else:
             position_bias = None

         self,
         seq_length,
         key_length,
+        cached_qk_len,
     ):
+        context_position = torch.arange(
+            cached_qk_len, cached_qk_len + seq_length, device=self.slopes.device
+        )[:, None]
+        memory_position = torch.arange(
+            key_length + cached_qk_len, device=self.slopes.device
+        )[None, :]
         relative_position = memory_position - context_position
         relative_position = torch.abs(relative_position).unsqueeze(0).expand(self.num_heads, -1, -1)
         alibi = (self.slopes * -1.0).unsqueeze(1) * relative_position
         if self.relative_pe is not None:
             length = input_ids.shape[1]
+            cached_kv_length = 0
+            cached_kv = past_key_values[0]
+            if cached_kv is not None:
+                cached_kv_length = cached_kv[0].shape[-2]
+            position_bias = self.relative_pe(length, length, cached_kv_length)
         else:
             position_bias = None