KoichiYasuoka commited on
Commit
5db1855
·
1 Parent(s): 70ac70b

model improved for transformers 4.42

Browse files
config.json CHANGED
@@ -4,22 +4,11 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
- "auto_map": {
8
- "AutoModelForTokenClassification": "upos.LlamaForTokenClassification"
9
- },
10
  "bos_token_id": 128000,
11
  "custom_pipelines": {
12
  "upos": {
13
  "impl": "upos.BellmanFordTokenClassificationPipeline",
14
  "pt": "AutoModelForTokenClassification"
15
- },
16
- "token-classification": {
17
- "impl": "upos.RawTokenClassificationPipeline",
18
- "pt": "AutoModelForTokenClassification"
19
- },
20
- "ner": {
21
- "impl": "upos.RawTokenClassificationPipeline",
22
- "pt": "AutoModelForTokenClassification"
23
  }
24
  },
25
  "eos_token_id": 128001,
@@ -146,7 +135,7 @@
146
  "tie_word_embeddings": false,
147
  "tokenizer_class": "LlamaTokenizerFast",
148
  "torch_dtype": "float32",
149
- "transformers_version": "4.41.2",
150
  "use_cache": true,
151
  "vocab_size": 128259
152
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
 
 
7
  "bos_token_id": 128000,
8
  "custom_pipelines": {
9
  "upos": {
10
  "impl": "upos.BellmanFordTokenClassificationPipeline",
11
  "pt": "AutoModelForTokenClassification"
 
 
 
 
 
 
 
 
12
  }
13
  },
14
  "eos_token_id": 128001,
 
135
  "tie_word_embeddings": false,
136
  "tokenizer_class": "LlamaTokenizerFast",
137
  "torch_dtype": "float32",
138
+ "transformers_version": "4.42.4",
139
  "use_cache": true,
140
  "vocab_size": 128259
141
  }
maker.sh CHANGED
@@ -30,45 +30,9 @@ cat << 'EOF' > $TMPB
30
  #! /usr/bin/env deepspeed
31
  src="exLlama-3-Swallow-8B"
32
  tgt="KoichiYasuoka/Llama-3-Swallow-8B-upos"
33
- from transformers import LlamaTokenizerFast,LlamaModel,LlamaPreTrainedModel,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
34
- from transformers.modeling_outputs import TokenClassifierOutput
35
  from tokenizers.normalizers import Replace
36
 
37
- class LlamaForTokenClassification(LlamaPreTrainedModel):
38
- def __init__(self,config):
39
- from torch import nn
40
- super().__init__(config)
41
- self.num_labels=config.num_labels
42
- self.model=LlamaModel(config)
43
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
44
- classifier_dropout=config.classifier_dropout
45
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
46
- classifier_dropout=config.hidden_dropout
47
- else:
48
- classifier_dropout=0.1
49
- self.dropout=nn.Dropout(classifier_dropout)
50
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
51
- self.post_init()
52
- def get_input_embeddings(self):
53
- return self.model.embed_tokens
54
- def set_input_embeddings(self,value):
55
- self.model.embed_tokens=value
56
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
57
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
58
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
59
- hidden_states=transformer_outputs[0]
60
- hidden_states=self.dropout(hidden_states)
61
- logits=self.classifier(hidden_states)
62
- loss=None
63
- if labels is not None:
64
- from torch import nn
65
- loss_fct=nn.CrossEntropyLoss()
66
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
67
- if not return_dict:
68
- output=(logits,)+transformer_outputs[1:]
69
- return ((loss,)+output) if loss is not None else output
70
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
71
-
72
  class UPOSFileDataset(object):
73
  def __init__(self,conllu,tokenizer):
74
  self.conllu=open(conllu,"r",encoding="utf-8")
 
30
  #! /usr/bin/env deepspeed
31
  src="exLlama-3-Swallow-8B"
32
  tgt="KoichiYasuoka/Llama-3-Swallow-8B-upos"
33
+ from transformers import LlamaTokenizerFast,LlamaForTokenClassification,AutoConfig,DataCollatorForTokenClassification,TrainingArguments,Trainer
 
34
  from tokenizers.normalizers import Replace
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  class UPOSFileDataset(object):
37
  def __init__(self,conllu,tokenizer):
38
  self.conllu=open(conllu,"r",encoding="utf-8")
pytorch_model-00001-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e34daa1be0869a7a71578c229f82f13fb4d3f6a003f6d31c3dd04ddf1a7265d7
3
  size 4886522810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1eccfc4ac55455e0a90a3b934694dffb05491ca344aaa9989d700ee54ca04c
3
  size 4886522810
pytorch_model-00002-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bb82ab6110006a07cb361674e4be76d1368e1e3de31035d1ae6bc6c7f20914e
3
  size 4832018324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428443ac934b1722a49a914f362522c1acb8f918d6305f2d984f20ea56a99367
3
  size 4832018324
pytorch_model-00003-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55a70605a22a3d99fb732610d2963dcbccfbbf99d57256c352af105c2c50f29c
3
  size 4999825256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891d943d00315ce1e956adf438324229dc789de5b3913c6eb4a09d513b8db62a
3
  size 4999825256
pytorch_model-00004-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b749ea15607ac13e6e31ebf31a66b8a41657554642fdd09d604a93e90b38eed
3
  size 4999825316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a93f360a7ea98e786708c001c4bf6abbfcf412f020a08693d3683163d2be3c05
3
  size 4999825316
pytorch_model-00005-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e45d5b53b00968ffba32eb631834374a2c7a4dfa6f8cec3ca9d9c64e7adcb4e8
3
  size 4832018324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:453231206832ee14a9716179bb4cc1558e78cbde9aaf36108b5a51f28f923771
3
  size 4832018324
pytorch_model-00006-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9ff08e6f96f400379245683d36c33831a28ad76999e75e3ce9bf9a2623db31
3
  size 4999825320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abae140281b41a91fb15418608a306eaa6e4e10d3ba6deb7688c4365fd627a2
3
  size 4999825320
pytorch_model-00007-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f111d151726dd3487111bc0e08278c08987ec551ea1c84ba56823b264b53d17b
3
  size 470650155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccff44101836996c47f5b14426a0ae020f74435191637344d427639eb6ea9ecf
3
  size 470650155
pytorch_model.bin.index.json CHANGED
@@ -3,8 +3,6 @@
3
  "total_size": 30020583628
4
  },
5
  "weight_map": {
6
- "classifier.bias": "pytorch_model-00007-of-00007.bin",
7
- "classifier.weight": "pytorch_model-00007-of-00007.bin",
8
  "model.embed_tokens.weight": "pytorch_model-00001-of-00007.bin",
9
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00007.bin",
10
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin",
@@ -294,6 +292,8 @@
294
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin",
295
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin",
296
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin",
297
- "model.norm.weight": "pytorch_model-00007-of-00007.bin"
 
 
298
  }
299
  }
 
3
  "total_size": 30020583628
4
  },
5
  "weight_map": {
 
 
6
  "model.embed_tokens.weight": "pytorch_model-00001-of-00007.bin",
7
  "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00007.bin",
8
  "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin",
 
292
  "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin",
293
  "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin",
294
  "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin",
295
+ "model.norm.weight": "pytorch_model-00007-of-00007.bin",
296
+ "score.bias": "pytorch_model-00007-of-00007.bin",
297
+ "score.weight": "pytorch_model-00007-of-00007.bin"
298
  }
299
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "128000": {
6
  "content": "<|begin_of_text|>",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "128000": {
7
  "content": "<|begin_of_text|>",
upos.py CHANGED
@@ -1,5 +1,4 @@
1
- from transformers import TokenClassificationPipeline,LlamaModel,LlamaPreTrainedModel
2
- from transformers.modeling_outputs import TokenClassifierOutput
3
 
4
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
5
  def __init__(self,**kwargs):
@@ -40,41 +39,3 @@ class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
40
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
41
  return w
42
 
43
- class RawTokenClassificationPipeline(TokenClassificationPipeline):
44
- def check_model_type(self,supported_models):
45
- pass
46
-
47
- class LlamaForTokenClassification(LlamaPreTrainedModel):
48
- def __init__(self,config):
49
- from torch import nn
50
- super().__init__(config)
51
- self.num_labels=config.num_labels
52
- self.model=LlamaModel(config)
53
- if hasattr(config,"classifier_dropout") and config.classifier_dropout is not None:
54
- classifier_dropout=config.classifier_dropout
55
- elif hasattr(config,"hidden_dropout") and config.hidden_dropout is not None:
56
- classifier_dropout=config.hidden_dropout
57
- else:
58
- classifier_dropout=0.1
59
- self.dropout=nn.Dropout(classifier_dropout)
60
- self.classifier=nn.Linear(config.hidden_size,config.num_labels)
61
- self.post_init()
62
- def get_input_embeddings(self):
63
- return self.model.embed_tokens
64
- def set_input_embeddings(self,value):
65
- self.model.embed_tokens=value
66
- def forward(self,input_ids=None,past_key_values=None,attention_mask=None,position_ids=None,inputs_embeds=None,labels=None,use_cache=None,output_attentions=None,output_hidden_states=None,return_dict=None):
67
- return_dict=return_dict if return_dict is not None else self.config.use_return_dict
68
- transformer_outputs=self.model(input_ids,past_key_values=past_key_values,attention_mask=attention_mask,position_ids=position_ids,inputs_embeds=inputs_embeds,use_cache=use_cache,output_attentions=output_attentions,output_hidden_states=output_hidden_states,return_dict=return_dict)
69
- hidden_states=transformer_outputs[0]
70
- hidden_states=self.dropout(hidden_states)
71
- logits=self.classifier(hidden_states)
72
- loss=None
73
- if labels is not None:
74
- from torch import nn
75
- loss_fct=nn.CrossEntropyLoss()
76
- loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))
77
- if not return_dict:
78
- output=(logits,)+transformer_outputs[2:]
79
- return ((loss,)+output) if loss is not None else output
80
- return TokenClassifierOutput(loss=loss,logits=logits,hidden_states=transformer_outputs.hidden_states,attentions=transformer_outputs.attentions)
 
1
+ from transformers import TokenClassificationPipeline
 
2
 
3
  class BellmanFordTokenClassificationPipeline(TokenClassificationPipeline):
4
  def __init__(self,**kwargs):
 
39
  t["text"]=model_outputs["sentence"][t["start"]:t["end"]]
40
  return w
41