huangyi commited on
Commit
f82faaa
·
1 Parent(s): 711fc0a

readme: Update inference code

Browse files

Signed-off-by: huangyi <[email protected]>

Files changed (2) hide show
  1. README.md +8 -7
  2. README_zh.md +10 -10
README.md CHANGED
@@ -171,9 +171,13 @@ import torch
171
  from transformers import AutoModelForCausalLM, AutoTokenizer
172
  from transformers.generation.utils import GenerationConfig
173
 
174
- tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base", use_fast=False, trust_remote_code=True)
175
- model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B", device_map="auto",
176
- torch_dtype=torch.bfloat16, trust_remote_code=True)
 
 
 
 
177
 
178
  model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
179
  messages = [{"role": "user", "content": "Hello, what is your name? "}]
@@ -190,10 +194,7 @@ device, you can use something like `export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`
190
  ```shell
191
 
192
  # base model
193
- CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B --tokenizer OrionStarAI/Orion-MOE8x7B --prompt hello
194
-
195
- # chat model
196
- CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-MOE8x7B-Chat --tokenizer OrionStarAI/Orion-MOE8x7B-Chat --prompt hi
197
 
198
  ```
199
 
 
171
  from transformers import AutoModelForCausalLM, AutoTokenizer
172
  from transformers.generation.utils import GenerationConfig
173
 
174
+ tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
175
+ use_fast=False,
176
+ trust_remote_code=True)
177
+ model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
178
+ device_map="auto",
179
+ torch_dtype=torch.bfloat16,
180
+ trust_remote_code=True)
181
 
182
  model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
183
  messages = [{"role": "user", "content": "Hello, what is your name? "}]
 
194
  ```shell
195
 
196
  # base model
197
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B-Base --tokenizer OrionStarAI/Orion-MOE8x7B-Base --prompt hello
 
 
 
198
 
199
  ```
200
 
README_zh.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
  <!-- markdownlint-disable first-line-h1 -->
3
  <!-- markdownlint-disable html -->
4
  <div align="center">
@@ -14,7 +13,7 @@
14
  <div align="center">
15
 
16
  <div align="center">
17
- <b>🇨🇳中文</b> | <a href="./README.md">🌐English</a>
18
  </div>
19
 
20
  <h4 align="center">
@@ -170,9 +169,13 @@ import torch
170
  from transformers import AutoModelForCausalLM, AutoTokenizer
171
  from transformers.generation.utils import GenerationConfig
172
 
173
- tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base", use_fast=False, trust_remote_code=True)
174
- model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base", device_map="auto",
175
- torch_dtype=torch.bfloat16, trust_remote_code=True)
 
 
 
 
176
 
177
  model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
178
  messages = [{"role": "user", "content": "你好! 你叫什么名字!"}]
@@ -188,17 +191,14 @@ print(response)
188
  ## 4.2. 脚本直接推理
189
 
190
  ```shell
191
- # base model
192
- CUDA_VISIBLE_DEVICES=0 python demo/text_generation_base.py --model OrionStarAI/Orion-14B --tokenizer OrionStarAI/Orion-14B --prompt 你好,你叫什么名字
193
 
194
- # chat model
195
- CUDA_VISIBLE_DEVICES=0 python demo/text_generation.py --model OrionStarAI/Orion-14B-Chat --tokenizer OrionStarAI/Orion-14B-Chat --prompt 你好,你叫什么名字
196
 
197
  ```
198
 
199
 
200
 
201
-
202
  <a name="zh_declarations-license"></a><br>
203
  # 5. 声明、协议
204
 
 
 
1
  <!-- markdownlint-disable first-line-h1 -->
2
  <!-- markdownlint-disable html -->
3
  <div align="center">
 
13
  <div align="center">
14
 
15
  <div align="center">
16
+ <b>🇨🇳中文</b> | <a href="./README.md">🌐English</a>
17
  </div>
18
 
19
  <h4 align="center">
 
169
  from transformers import AutoModelForCausalLM, AutoTokenizer
170
  from transformers.generation.utils import GenerationConfig
171
 
172
+ tokenizer = AutoTokenizer.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
173
+ use_fast=False,
174
+ trust_remote_code=True)
175
+ model = AutoModelForCausalLM.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base",
176
+ device_map="auto",
177
+ torch_dtype=torch.bfloat16,
178
+ trust_remote_code=True)
179
 
180
  model.generation_config = GenerationConfig.from_pretrained("OrionStarAI/Orion-MOE8x7B-Base")
181
  messages = [{"role": "user", "content": "你好! 你叫什么名字!"}]
 
191
  ## 4.2. 脚本直接推理
192
 
193
  ```shell
 
 
194
 
195
+ # base model
196
+ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python demo/text_generation_base.py --model OrionStarAI/Orion-MOE8x7B-Base --tokenizer OrionStarAI/Orion-MOE8x7B-Base --prompt 你好,你叫什么名字
197
 
198
  ```
199
 
200
 
201
 
 
202
  <a name="zh_declarations-license"></a><br>
203
  # 5. 声明、协议
204