ccdv commited on
Commit
20e5f76
·
1 Parent(s): 61fb6ae
Files changed (2) hide show
  1. README.md +1 -1
  2. modeling_lsg_camembert.py +12 -6
README.md CHANGED
@@ -9,7 +9,7 @@ pipeline_tag: fill-mask
9
  Conversion script is available at this [link](https://github.com/ccdv-ai/convert_checkpoint_to_lsg).
10
 
11
  # LSG model
12
- **Transformers >= 4.35.2**\
13
  **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
14
  **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
15
 
 
9
  Conversion script is available at this [link](https://github.com/ccdv-ai/convert_checkpoint_to_lsg).
10
 
11
  # LSG model
12
+ **Transformers >= 4.36.1**\
13
  **This model relies on a custom modeling file, you need to add trust_remote_code=True**\
14
  **See [\#13467](https://github.com/huggingface/transformers/pull/13467)**
15
 
modeling_lsg_camembert.py CHANGED
@@ -411,13 +411,11 @@ class LSGCamembertEmbeddings(CamembertEmbeddings):
411
 
412
  def forward(
413
  self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None, past_key_values_length=0
414
- ):
415
  if position_ids is None:
416
  if input_ids is not None:
417
  # Create the position ids from the input token ids. Any padded tokens remain padded.
418
- position_ids = create_position_ids_from_input_ids(
419
- input_ids, self.padding_idx, past_key_values_length
420
- ).to(input_ids.device)
421
  else:
422
  position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
423
 
@@ -426,10 +424,18 @@ class LSGCamembertEmbeddings(CamembertEmbeddings):
426
  else:
427
  input_shape = inputs_embeds.size()[:-1]
428
 
429
- seq_length = input_shape[-1]
430
 
 
 
 
431
  if token_type_ids is None:
432
- token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
 
 
 
 
 
433
 
434
  if inputs_embeds is None:
435
  inputs_embeds = self.word_embeddings(input_ids)
 
411
 
412
  def forward(
413
  self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None, past_key_values_length=0
414
+ ):
415
  if position_ids is None:
416
  if input_ids is not None:
417
  # Create the position ids from the input token ids. Any padded tokens remain padded.
418
+ position_ids = create_position_ids_from_input_ids(input_ids, self.padding_idx, past_key_values_length)
 
 
419
  else:
420
  position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
421
 
 
424
  else:
425
  input_shape = inputs_embeds.size()[:-1]
426
 
427
+ seq_length = input_shape[1]
428
 
429
+ # Setting the token_type_ids to the registered buffer in constructor where it is all zeros, which usually occurs
430
+ # when its auto-generated, registered buffer helps users when tracing the model without passing token_type_ids, solves
431
+ # issue #5664
432
  if token_type_ids is None:
433
+ if hasattr(self, "token_type_ids"):
434
+ buffered_token_type_ids = self.token_type_ids[:, :seq_length]
435
+ buffered_token_type_ids_expanded = buffered_token_type_ids.expand(input_shape[0], seq_length)
436
+ token_type_ids = buffered_token_type_ids_expanded
437
+ else:
438
+ token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
439
 
440
  if inputs_embeds is None:
441
  inputs_embeds = self.word_embeddings(input_ids)