takeofuture
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -37,7 +37,42 @@
|
|
37 |
snapshot_download(repo_id=model_name, local_dir=local_dir, local_dir_use_symlinks=False)
|
38 |
print(f"Model downloaded to: {local_dir}")
|
39 |
```
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
## **GOOGLE COLABORATORYでのelyza_100_tvでの推論方法**
|
42 |
以下のノートを参照してください
|
43 |
- [Model_Inference_Template_unsloth_20241127.ipynb](./Model_Inference_Template_unsloth_20241127.ipynb)
|
|
|
37 |
snapshot_download(repo_id=model_name, local_dir=local_dir, local_dir_use_symlinks=False)
|
38 |
print(f"Model downloaded to: {local_dir}")
|
39 |
```
|
40 |
+
- 推論
|
41 |
+
```
|
42 |
+
from unsloth import FastLanguageModel
|
43 |
+
from peft import PeftModel
|
44 |
+
import torch
|
45 |
+
import json
|
46 |
+
from tqdm import tqdm
|
47 |
+
import re
|
48 |
+
#ベースとなるモデルと学習したLoRAのアダプタ(Hugging FaceのIDを指定)。
|
49 |
+
#model_id = "llm-jp/llm-jp-3-13 #HUGGINGFACEをよりダウンロードするときはこちらを使いください
|
50 |
+
local_model_dir = "./models/llm-jp/llm-jp-3-13b" # 事前にダウンロードしたモデルのローカルディレクトリ
|
51 |
+
adapter_id = "takeofuture/llm-jp-3-13b-finetune-22_lora"
|
52 |
+
HF_TOKEN = "HUGGINGFACEのTOKENを入れてください"
|
53 |
+
#unslothのFastLanguageModelで元のモデルをロード。
|
54 |
+
dtype = None # Noneにしておけば自動で設定
|
55 |
+
load_in_4bit = True # 今回は13Bモデルを扱うためTrue
|
56 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
57 |
+
#model_name=model_id,
|
58 |
+
model_name=local_model_dir,
|
59 |
+
dtype=dtype,
|
60 |
+
load_in_4bit=load_in_4bit,
|
61 |
+
trust_remote_code=True,
|
62 |
+
)
|
63 |
+
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
|
64 |
+
#推論モードに切り替え
|
65 |
+
FastLanguageModel.for_inference(model)
|
66 |
+
prompt = f"""### 指示\n名古屋の有名なグルメは何ですか?\n### 回答\n"""
|
67 |
+
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
|
68 |
+
#不要な `token_type_ids` を削除
|
69 |
+
if "token_type_ids" in inputs:
|
70 |
+
del inputs["token_type_ids"]
|
71 |
+
outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
|
72 |
+
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]
|
73 |
+
print(prediction)
|
74 |
+
```
|
75 |
+
|
76 |
## **GOOGLE COLABORATORYでのelyza_100_tvでの推論方法**
|
77 |
以下のノートを参照してください
|
78 |
- [Model_Inference_Template_unsloth_20241127.ipynb](./Model_Inference_Template_unsloth_20241127.ipynb)
|