Wenda Xu commited on
Commit
e31eb42
·
1 Parent(s): dbfa41c

add running codes

Browse files
InstructScore.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from typing import Dict
3
+ import transformers
4
+ from transformers import LlamaForCausalLM, LlamaTokenizer
5
+
6
+ DEFAULT_PAD_TOKEN = "[PAD]"
7
+ DEFAULT_EOS_TOKEN = "</s>"
8
+ DEFAULT_BOS_TOKEN = "</s>"
9
+ DEFAULT_UNK_TOKEN = "</s>"
10
+ MAX_SOURCE_LENGTH = 512
11
+ MAX_TARGET_LENGTH = 512
12
+ print("Max source length: ", MAX_SOURCE_LENGTH)
13
+ print("MAX target length: ", MAX_TARGET_LENGTH)
14
+
15
+ def smart_tokenizer_and_embedding_resize(
16
+ special_tokens_dict: Dict,
17
+ tokenizer: transformers.PreTrainedTokenizer,
18
+ ):
19
+ """Resize tokenizer and embedding.
20
+ Note: This is the unoptimized version that may make your embedding size not be divisible by 64.
21
+ """
22
+ tokenizer.add_special_tokens(special_tokens_dict)
23
+ tokenizer.add_special_tokens(
24
+ {
25
+ "eos_token": DEFAULT_EOS_TOKEN,
26
+ "bos_token": DEFAULT_BOS_TOKEN,
27
+ "unk_token": DEFAULT_UNK_TOKEN,
28
+ }
29
+ )
30
+
31
+ device_id = (
32
+ torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
33
+ )
34
+
35
+ class InstructScore:
36
+ def __init__(self):
37
+ self.tokenizer = LlamaTokenizer.from_pretrained(
38
+ "InstructScore_Tok", model_max_length=MAX_SOURCE_LENGTH, use_fast=False
39
+ )
40
+ # enable batch inference by left padding
41
+ self.tokenizer.padding_side = "left"
42
+
43
+ smart_tokenizer_and_embedding_resize(
44
+ special_tokens_dict=dict(pad_token=DEFAULT_PAD_TOKEN),
45
+ tokenizer=self.tokenizer,
46
+ )
47
+ self.model = LlamaForCausalLM.from_pretrained('InstructScore_English').to(device_id)
48
+ self.model.eval()
49
+ def score(self, ref_ls, out_ls):
50
+ prompt_ls=\
51
+ [f"You are evaluating Chinese-to-English Machine translation task. The correct translation is \"{ref}\". The model generated translation is \"{out}\". Please identify all errors within each model output, up to a maximum of five. For each error, please give me the corresponding error type, major/minor label, error location of the model generated translation and explanation for the error. Major errors can confuse or mislead the reader due to significant change in meaning, while minor\
52
+ errors don't lead to loss of meaning but will be noticed." for ref, out in zip(ref_ls, out_ls)]
53
+
54
+ with torch.no_grad():
55
+ inputs = self.tokenizer(
56
+ prompt_ls,
57
+ return_tensors="pt",
58
+ padding=True,
59
+ truncation=True,
60
+ max_length=MAX_SOURCE_LENGTH,
61
+ )
62
+ outputs = self.model.generate(
63
+ inputs["input_ids"].to(device_id),
64
+ attention_mask=inputs["attention_mask"].to(device_id),
65
+ max_new_tokens=MAX_TARGET_LENGTH,
66
+ )
67
+ batch_outputs = self.tokenizer.batch_decode(
68
+ outputs,
69
+ skip_special_tokens=True,
70
+ clean_up_tokenization_spaces=True,
71
+ )
72
+ scores_ls = [(-1) * output.count("Major/minor: Minor") + (-5) * output.count("Major/minor: Major") for output in batch_outputs]
73
+ return batch_outputs, scores_ls
74
+
75
+ def main():
76
+ refs = ["SEScore is a simple but effective next generation text generation evaluation metric", "SEScore it really works"]
77
+ outs = ["SEScore is a simple effective text evaluation metric for next generation", "SEScore is not working"]
78
+
79
+ scorer = InstructScore()
80
+ batch_outputs, scores_ls = scorer.score(refs, outs)
81
+ print(batch_outputs)
82
+ print(scores_ls)
83
+
84
+ if __name__ == "__main__":
85
+ main()
InstructScore_Tok/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
InstructScore_Tok/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
InstructScore_Tok/tokenizer_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "",
3
+ "clean_up_tokenization_spaces": false,
4
+ "eos_token": "",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "special_tokens_map_file": "/mnt/data3/wendaxu/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/special_tokens_map.json",
7
+ "tokenizer_class": "LlamaTokenizer",
8
+ "unk_token": ""
9
+ }