huangyi
commited on
Commit
·
f82faaa
1
Parent(s):
711fc0a
readme: Update inference code
Browse filesSigned-off-by: huangyi <[email protected]>
- README.md +8 -7
- README_zh.md +10 -10
README.md
CHANGED
@@ -171,9 +171,13 @@ import torch
|
|
171 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
172 |
from transformers.generation.utils import GenerationConfig
|
173 |
|
174 |
-
tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
177 |
|
178 |
model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
|
179 |
messages = [{"role": "user", "content": "Hello, what is your name? "}]
|
@@ -190,10 +194,7 @@ device, you can use something like `export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`
|
|
190 |
```shell
|
191 |
|
192 |
# base model
|
193 |
-
CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B --tokenizer OrionStarAI/Orion-MOE8x7B --prompt hello
|
194 |
-
|
195 |
-
# chat model
|
196 |
-
CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-MOE8x7B-Chat --tokenizer OrionStarAI/Orion-MOE8x7B-Chat --prompt hi
|
197 |
|
198 |
```
|
199 |
|
|
|
171 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
172 |
from transformers.generation.utils import GenerationConfig
|
173 |
|
174 |
+
tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
175 |
+
use_fast=False,
|
176 |
+
trust_remote_code=True)
|
177 |
+
model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
178 |
+
device_map="auto",
|
179 |
+
torch_dtype=torch.bfloat16,
|
180 |
+
trust_remote_code=True)
|
181 |
|
182 |
model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
|
183 |
messages = [{"role": "user", "content": "Hello, what is your name? "}]
|
|
|
194 |
```shell
|
195 |
|
196 |
# base model
|
197 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B-Base --tokenizer OrionStarAI/Orion-MOE8x7B-Base --prompt hello
|
|
|
|
|
|
|
198 |
|
199 |
```
|
200 |
|
README_zh.md
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
<!-- markdownlint-disable first-line-h1 -->
|
3 |
<!-- markdownlint-disable html -->
|
4 |
<div align="center">
|
@@ -14,7 +13,7 @@
|
|
14 |
<div align="center">
|
15 |
|
16 |
<div align="center">
|
17 |
-
<b>🇨🇳中文</b> | <a href="./README.md">🌐English</a>
|
18 |
</div>
|
19 |
|
20 |
<h4 align="center">
|
@@ -170,9 +169,13 @@ import torch
|
|
170 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
171 |
from transformers.generation.utils import GenerationConfig
|
172 |
|
173 |
-
tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
174 |
-
|
175 |
-
|
|
|
|
|
|
|
|
|
176 |
|
177 |
model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
|
178 |
messages = [{"role": "user", "content": "你好! 你叫什么名字!"}]
|
@@ -188,17 +191,14 @@ print(response)
|
|
188 |
## 4.2. 脚本直接推理
|
189 |
|
190 |
```shell
|
191 |
-
# base model
|
192 |
-
CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt 你好,你叫什么名字
|
193 |
|
194 |
-
#
|
195 |
-
CUDA_VISIBLE_DEVICES=0 python demo/
|
196 |
|
197 |
```
|
198 |
|
199 |
|
200 |
|
201 |
-
|
202 |
<a name="zh_declarations-license"></a><br>
|
203 |
# 5. 声明、协议
|
204 |
|
|
|
|
|
1 |
<!-- markdownlint-disable first-line-h1 -->
|
2 |
<!-- markdownlint-disable html -->
|
3 |
<div align="center">
|
|
|
13 |
<div align="center">
|
14 |
|
15 |
<div align="center">
|
16 |
+
<b>🇨🇳中文</b> | <a href="./README.md">🌐English</a>
|
17 |
</div>
|
18 |
|
19 |
<h4 align="center">
|
|
|
169 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
170 |
from transformers.generation.utils import GenerationConfig
|
171 |
|
172 |
+
tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
173 |
+
use_fast=False,
|
174 |
+
trust_remote_code=True)
|
175 |
+
model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
|
176 |
+
device_map="auto",
|
177 |
+
torch_dtype=torch.bfloat16,
|
178 |
+
trust_remote_code=True)
|
179 |
|
180 |
model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
|
181 |
messages = [{"role": "user", "content": "你好! 你叫什么名字!"}]
|
|
|
191 |
## 4.2. 脚本直接推理
|
192 |
|
193 |
```shell
|
|
|
|
|
194 |
|
195 |
+
# base model
|
196 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B-Base --tokenizer OrionStarAI/Orion-MOE8x7B-Base --prompt 你好,你叫什么名字
|
197 |
|
198 |
```
|
199 |
|
200 |
|
201 |
|
|
|
202 |
<a name="zh_declarations-license"></a><br>
|
203 |
# 5. 声明、协议
|
204 |
|