tomo1222 commited on
Commit
94b3406
·
verified ·
1 Parent(s): 33c3ba2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +26 -21
README.md CHANGED
@@ -97,30 +97,35 @@ def search_ref_input(input, k=10):
97
  return text
98
 
99
  """# Prompt"""
100
-
101
  output_data=[]
102
 
103
  for i, task in enumerate(tasks):
104
- text = search_ref_input(task["input"],16)+f"### 質問:\n{task['input']}\n\n### 回答:\n"
105
- print(task["input"])
106
- inputs = tokenizer(text, return_tensors="pt").to("cuda")
107
- print(len(inputs['input_ids'][0]))
108
- output = model.generate(**inputs, max_new_tokens=1024,repetition_penalty=1.2,use_cache=True,
109
- bad_words_ids = [tokenizer.encode("質問", add_special_tokens=False),
110
- tokenizer.encode("###", add_special_tokens=False),
111
- tokenizer.encode("#", add_special_tokens=False),
112
- tokenizer.encode("##", add_special_tokens=False),
113
- tokenizer.encode("---", add_special_tokens=False),
114
- tokenizer.encode("<h3>", add_special_tokens=False),
115
- tokenizer.encode("filepath", add_special_tokens=False),
116
- tokenizer.encode("> ", add_special_tokens=False),
117
- ]
118
- )
119
-
120
- output_text = tokenizer.decode(output[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
121
- print(i,output_text)
122
- print("---")
123
- output_data.append({"task_id":i,"output":output_text})
 
 
 
 
 
 
124
 
125
  with open("output.jsonl","w",encoding="utf-8") as f:
126
  for result in output_data:
 
97
  return text
98
 
99
  """# Prompt"""
 
100
  output_data=[]
101
 
102
  for i, task in enumerate(tasks):
103
+ text = (
104
+ search_ref_input(task["input"], 20)
105
+ + "あなたは日本語が堪能な優秀な人間です。\n"
106
+ + "**文脈**を踏まえて、改行と箇条書きを駆使して、日本語で**詳細に**書きなさい。\n"
107
+ + "優秀な人間になりきって、推測をいれずに根拠をもってわかりやすく答えてください。"
108
+ + f"### 質問:\n{task['input']}\n\n### 回答:\n"
109
+ )
110
+ print(task["input"])
111
+ inputs = tokenizer(text, return_tensors="pt").to("cuda")
112
+ print(len(inputs['input_ids'][0]))
113
+ output = model.generate(**inputs, max_new_tokens=1024,repetition_penalty=1.1,use_cache=True,
114
+ bad_words_ids = [tokenizer.encode("質問", add_special_tokens=False),
115
+ tokenizer.encode("###", add_special_tokens=False),
116
+ tokenizer.encode("#", add_special_tokens=False),
117
+ tokenizer.encode("##", add_special_tokens=False),
118
+ tokenizer.encode("---", add_special_tokens=False),
119
+ tokenizer.encode("<h3>", add_special_tokens=False),
120
+ tokenizer.encode("filepath", add_special_tokens=False),
121
+ tokenizer.encode("> ", add_special_tokens=False),
122
+ ]
123
+ )
124
+
125
+ output_text = tokenizer.decode(output[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
126
+ print(i,output_text)
127
+ print("---")
128
+ output_data.append({"task_id":i,"output":output_text})
129
 
130
  with open("output.jsonl","w",encoding="utf-8") as f:
131
  for result in output_data: