small fix
Browse files- README.md +1 -1
- modeling_lsg_camembert.py +12 -6
README.md
CHANGED
@@ -9,7 +9,7 @@ pipeline_tag: fill-mask
|
|
9 |
Conversion script is available at this [link](https://github.com/ccdv-ai/convert_checkpoint_to_lsg).
|
10 |
|
11 |
# LSG model
|
12 |
-
**Transformers >= 4.
|
13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
15 |
|
|
|
9 |
Conversion script is available at this [link](https://github.com/ccdv-ai/convert_checkpoint_to_lsg).
|
10 |
|
11 |
# LSG model
|
12 |
+
**Transformers >= 4.36.1**\
|
13 |
**This model relies on a custom modeling file, you need to add trust_remote_code=True**\
|
14 |
**See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
|
15 |
|
modeling_lsg_camembert.py
CHANGED
@@ -411,13 +411,11 @@ class LSGCamembertEmbeddings(CamembertEmbeddings):
|
|
411 |
|
412 |
def forward(
|
413 |
self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None, past_key_values_length=0
|
414 |
-
|
415 |
if position_ids is None:
|
416 |
if input_ids is not None:
|
417 |
# Create the position ids from the input token ids. Any padded tokens remain padded.
|
418 |
-
position_ids = create_position_ids_from_input_ids(
|
419 |
-
input_ids, self.padding_idx, past_key_values_length
|
420 |
-
).to(input_ids.device)
|
421 |
else:
|
422 |
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
|
423 |
|
@@ -426,10 +424,18 @@ class LSGCamembertEmbeddings(CamembertEmbeddings):
|
|
426 |
else:
|
427 |
input_shape = inputs_embeds.size()[:-1]
|
428 |
|
429 |
-
seq_length = input_shape[
|
430 |
|
|
|
|
|
|
|
431 |
if token_type_ids is None:
|
432 |
-
|
|
|
|
|
|
|
|
|
|
|
433 |
|
434 |
if inputs_embeds is None:
|
435 |
inputs_embeds = self.word_embeddings(input_ids)
|
|
|
411 |
|
412 |
def forward(
|
413 |
self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None, past_key_values_length=0
|
414 |
+
):
|
415 |
if position_ids is None:
|
416 |
if input_ids is not None:
|
417 |
# Create the position ids from the input token ids. Any padded tokens remain padded.
|
418 |
+
position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx, past_key_values_length)
|
|
|
|
|
419 |
else:
|
420 |
position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
|
421 |
|
|
|
424 |
else:
|
425 |
input_shape = inputs_embeds.size()[:-1]
|
426 |
|
427 |
+
seq_length = input_shape[1]
|
428 |
|
429 |
+
# Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually occurs
|
430 |
+
# when its auto-generated, registered buffer helps users when tracing the model without passing token_type_ids, solves
|
431 |
+
# issue #5664
|
432 |
if token_type_ids is None:
|
433 |
+
if hasattr(self, "token_type_ids"):
|
434 |
+
buffered_token_type_ids = self.token_type_ids[:, :seq_length]
|
435 |
+
buffered_token_type_ids_expanded = buffered_token_type_ids.expand(input_shape[0], seq_length)
|
436 |
+
token_type_ids = buffered_token_type_ids_expanded
|
437 |
+
else:
|
438 |
+
token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
|
439 |
|
440 |
if inputs_embeds is None:
|
441 |
inputs_embeds = self.word_embeddings(input_ids)
|