jupyterjazz
commited on
Commit
·
4e13c90
1
Parent(s):
acffa62
refactor: kwargs comprehension
Browse filesSigned-off-by: jupyterjazz <[email protected]>
- embedding.py +1 -3
- mha.py +3 -6
- mlp.py +1 -3
- modeling_xlm_roberta.py +2 -6
embedding.py
CHANGED
@@ -47,9 +47,7 @@ class XLMRobertaEmbeddings(nn.Module):
|
|
47 |
token_type_ids: (batch, seqlen)
|
48 |
"""
|
49 |
batch_size, seqlen = input_ids.shape
|
50 |
-
lora_kwargs = {}
|
51 |
-
if task is not None:
|
52 |
-
lora_kwargs['task'] = task
|
53 |
embeddings = self.word_embeddings(input_ids, **lora_kwargs)
|
54 |
if self.max_position_embeddings > 0:
|
55 |
if position_ids is None:
|
|
|
47 |
token_type_ids: (batch, seqlen)
|
48 |
"""
|
49 |
batch_size, seqlen = input_ids.shape
|
50 |
+
lora_kwargs = {'task': task} if task is not None else {}
|
|
|
|
|
51 |
embeddings = self.word_embeddings(input_ids, **lora_kwargs)
|
52 |
if self.max_position_embeddings > 0:
|
53 |
if position_ids is None:
|
mha.py
CHANGED
@@ -645,14 +645,11 @@ class MHA(nn.Module):
|
|
645 |
batch, seqlen = x.shape[:2]
|
646 |
if not self.cross_attn and self.num_heads_kv == self.num_heads:
|
647 |
assert x_kv is None and mixer_subset is None
|
648 |
-
lora_kwargs = {}
|
649 |
-
if task is not None:
|
650 |
-
lora_kwargs['task'] = task
|
651 |
-
lora_kwargs['residual'] = self.return_residual
|
652 |
-
|
653 |
if not self.return_residual:
|
654 |
qkv = self.Wqkv(x, **lora_kwargs)
|
655 |
else:
|
|
|
656 |
qkv, x = self.Wqkv(x, **lora_kwargs)
|
657 |
|
658 |
if self.dwconv:
|
@@ -739,6 +736,6 @@ class MHA(nn.Module):
|
|
739 |
context = self._update_kvcache_attention(q, kv, inference_params)
|
740 |
else:
|
741 |
context = self._apply_rotary_update_kvcache_attention(q, kv, inference_params)
|
742 |
-
|
743 |
out = self.out_proj(rearrange(context, "... h d -> ... (h d)"), **lora_kwargs)
|
744 |
return out if not self.return_residual else (out, x)
|
|
|
645 |
batch, seqlen = x.shape[:2]
|
646 |
if not self.cross_attn and self.num_heads_kv == self.num_heads:
|
647 |
assert x_kv is None and mixer_subset is None
|
648 |
+
lora_kwargs = {'task': task} if task is not None else {}
|
|
|
|
|
|
|
|
|
649 |
if not self.return_residual:
|
650 |
qkv = self.Wqkv(x, **lora_kwargs)
|
651 |
else:
|
652 |
+
lora_kwargs['residual'] = True
|
653 |
qkv, x = self.Wqkv(x, **lora_kwargs)
|
654 |
|
655 |
if self.dwconv:
|
|
|
736 |
context = self._update_kvcache_attention(q, kv, inference_params)
|
737 |
else:
|
738 |
context = self._apply_rotary_update_kvcache_attention(q, kv, inference_params)
|
739 |
+
|
740 |
out = self.out_proj(rearrange(context, "... h d -> ... (h d)"), **lora_kwargs)
|
741 |
return out if not self.return_residual else (out, x)
|
mlp.py
CHANGED
@@ -48,9 +48,7 @@ class Mlp(nn.Module):
|
|
48 |
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias2, **factory_kwargs)
|
49 |
|
50 |
def forward(self, x, task):
|
51 |
-
lora_kwargs = {}
|
52 |
-
if task is not None:
|
53 |
-
lora_kwargs['task'] = task
|
54 |
y = self.fc1(x, **lora_kwargs)
|
55 |
y = self.activation(y)
|
56 |
y = self.fc2(y, **lora_kwargs)
|
|
|
48 |
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias2, **factory_kwargs)
|
49 |
|
50 |
def forward(self, x, task):
|
51 |
+
lora_kwargs = {'task': task} if task is not None else {}
|
|
|
|
|
52 |
y = self.fc1(x, **lora_kwargs)
|
53 |
y = self.activation(y)
|
54 |
y = self.fc2(y, **lora_kwargs)
|
modeling_xlm_roberta.py
CHANGED
@@ -313,9 +313,7 @@ class XLMRobertaPooler(nn.Module):
|
|
313 |
def forward(self, hidden_states, pool=True, task=None):
|
314 |
# We "pool" the model by simply taking the hidden state corresponding
|
315 |
# to the first token.
|
316 |
-
lora_kwargs = {}
|
317 |
-
if task is not None:
|
318 |
-
lora_kwargs['task'] = task
|
319 |
|
320 |
first_token_tensor = hidden_states[:, 0] if pool else hidden_states
|
321 |
pooled_output = self.dense(first_token_tensor, **lora_kwargs)
|
@@ -550,9 +548,7 @@ class XLMRobertaModel(XLMRobertaPreTrainedModel):
|
|
550 |
)
|
551 |
else:
|
552 |
range_iter = range(0, len(sentences), batch_size)
|
553 |
-
lora_kwargs = {}
|
554 |
-
if task is not None:
|
555 |
-
lora_kwargs['task'] = task
|
556 |
for i in range_iter:
|
557 |
encoded_input = self.tokenizer(
|
558 |
sentences[i : i + batch_size],
|
|
|
313 |
def forward(self, hidden_states, pool=True, task=None):
|
314 |
# We "pool" the model by simply taking the hidden state corresponding
|
315 |
# to the first token.
|
316 |
+
lora_kwargs = {'task': task} if task is not None else {}
|
|
|
|
|
317 |
|
318 |
first_token_tensor = hidden_states[:, 0] if pool else hidden_states
|
319 |
pooled_output = self.dense(first_token_tensor, **lora_kwargs)
|
|
|
548 |
)
|
549 |
else:
|
550 |
range_iter = range(0, len(sentences), batch_size)
|
551 |
+
lora_kwargs = {'task': task} if task is not None else {}
|
|
|
|
|
552 |
for i in range_iter:
|
553 |
encoded_input = self.tokenizer(
|
554 |
sentences[i : i + batch_size],
|