Update README.md
Browse files
README.md
CHANGED
@@ -97,30 +97,35 @@ def search_ref_input(input, k=10):
|
|
97 |
return text
|
98 |
|
99 |
"""# Prompt"""
|
100 |
-
|
101 |
output_data=[]
|
102 |
|
103 |
for i, task in enumerate(tasks):
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
with open("output.jsonl","w",encoding="utf-8") as f:
|
126 |
for result in output_data:
|
|
|
97 |
return text
|
98 |
|
99 |
"""# Prompt"""
|
|
|
100 |
output_data=[]
|
101 |
|
102 |
for i, task in enumerate(tasks):
|
103 |
+
text = (
|
104 |
+
search_ref_input(task["input"], 20)
|
105 |
+
+ "あなたは日本語が堪能な優秀な人間です。\n"
|
106 |
+
+ "**文脈**を踏まえて、改行と箇条書きを駆使して、日本語で**詳細に**書きなさい。\n"
|
107 |
+
+ "優秀な人間になりきって、推測をいれずに根拠をもってわかりやすく答えてください。"
|
108 |
+
+ f"### 質問:\n{task['input']}\n\n### 回答:\n"
|
109 |
+
)
|
110 |
+
print(task["input"])
|
111 |
+
inputs = tokenizer(text, return_tensors="pt").to("cuda")
|
112 |
+
print(len(inputs['input_ids'][0]))
|
113 |
+
output = model.generate(**inputs, max_new_tokens=1024,repetition_penalty=1.1,use_cache=True,
|
114 |
+
bad_words_ids = [tokenizer.encode("質問", add_special_tokens=False),
|
115 |
+
tokenizer.encode("###", add_special_tokens=False),
|
116 |
+
tokenizer.encode("#", add_special_tokens=False),
|
117 |
+
tokenizer.encode("##", add_special_tokens=False),
|
118 |
+
tokenizer.encode("---", add_special_tokens=False),
|
119 |
+
tokenizer.encode("<h3>", add_special_tokens=False),
|
120 |
+
tokenizer.encode("filepath", add_special_tokens=False),
|
121 |
+
tokenizer.encode("> ", add_special_tokens=False),
|
122 |
+
]
|
123 |
+
)
|
124 |
+
|
125 |
+
output_text = tokenizer.decode(output[0][inputs.input_ids.size(1):], skip_special_tokens=True).strip()
|
126 |
+
print(i,output_text)
|
127 |
+
print("---")
|
128 |
+
output_data.append({"task_id":i,"output":output_text})
|
129 |
|
130 |
with open("output.jsonl","w",encoding="utf-8") as f:
|
131 |
for result in output_data:
|