cicdatopea
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -3,7 +3,9 @@ datasets:
|
|
3 |
- NeelNanda/pile-10k
|
4 |
base_model:
|
5 |
- deepseek-ai/DeepSeek-V3
|
|
|
6 |
---
|
|
|
7 |
## Model Details
|
8 |
|
9 |
This model is an int4 model with group_size 128 and symmetric quantization of [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm.
|
@@ -16,18 +18,32 @@ Please follow the license of the original model.
|
|
16 |
|
17 |
## How To Use
|
18 |
|
19 |
-
### INT4 Inference
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
23 |
import torch
|
24 |
-
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc-preview"
|
25 |
|
|
|
|
|
|
|
|
|
26 |
model = AutoModelForCausalLM.from_pretrained(
|
27 |
quantized_model_dir,
|
28 |
torch_dtype=torch.float16,
|
29 |
trust_remote_code=True,
|
30 |
-
device_map="
|
|
|
|
|
31 |
|
32 |
)
|
33 |
|
@@ -56,8 +72,6 @@ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_
|
|
56 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
57 |
print(response)
|
58 |
|
59 |
-
|
60 |
-
## The following result is inferred on CPU with qbits backend
|
61 |
prompt = "9.11和9.8哪个数字大"
|
62 |
|
63 |
##INT4
|
@@ -146,8 +160,50 @@ prompt = "There is a girl who likes adventure,"
|
|
146 |
prompt = "Please give a brief introduction of DeepSeek company."
|
147 |
##INT4:
|
148 |
"""DeepSeek Artificial Intelligence Co., Ltd. (referred to as "DeepSeek" or "深度求索") , founded in 2023, is a Chinese company dedicated to making AGI a reality"""
|
|
|
|
|
149 |
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
````
|
152 |
|
153 |
### Evaluate the model
|
|
|
3 |
- NeelNanda/pile-10k
|
4 |
base_model:
|
5 |
- deepseek-ai/DeepSeek-V3
|
6 |
+
|
7 |
---
|
8 |
+
|
9 |
## Model Details
|
10 |
|
11 |
This model is an int4 model with group_size 128 and symmetric quantization of [deepseek-ai/DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3) generated by [intel/auto-round](https://github.com/intel/auto-round) algorithm.
|
|
|
18 |
|
19 |
## How To Use
|
20 |
|
21 |
+
### INT4 Inference on CPU with Qbits
|
22 |
|
23 |
+
INT4 Inference on CPU with Qbits
|
24 |
+
|
25 |
+
pip3 install auto-round **(it will install intel-extension-for-pytorch and intel-extension-for-transformers both)**. For intel cpu, it will use intel-extension-for-pytorch , for others, it will use intel-extension-for-transformers.
|
26 |
+
|
27 |
+
**To make sure to use qbits with intel-extension-for-transformers, please uninstall intel-extension-for-pytorch, which we have not tested for this model yet.**
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
~~~python
|
32 |
+
from auto_round import AutoRoundConfig ##must import for autoround format
|
33 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
34 |
import torch
|
|
|
35 |
|
36 |
+
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc-preview"
|
37 |
+
quantization_config = AutoRoundConfig(
|
38 |
+
backend="cpu"
|
39 |
+
)
|
40 |
model = AutoModelForCausalLM.from_pretrained(
|
41 |
quantized_model_dir,
|
42 |
torch_dtype=torch.float16,
|
43 |
trust_remote_code=True,
|
44 |
+
device_map="cpu",
|
45 |
+
revision="8fe0735",##use autoround format, the only difference is config.json
|
46 |
+
quantization_config = quantization_config, ##cpu only machine could not set this
|
47 |
|
48 |
)
|
49 |
|
|
|
72 |
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
73 |
print(response)
|
74 |
|
|
|
|
|
75 |
prompt = "9.11和9.8哪个数字大"
|
76 |
|
77 |
##INT4
|
|
|
160 |
prompt = "Please give a brief introduction of DeepSeek company."
|
161 |
##INT4:
|
162 |
"""DeepSeek Artificial Intelligence Co., Ltd. (referred to as "DeepSeek" or "深度求索") , founded in 2023, is a Chinese company dedicated to making AGI a reality"""
|
163 |
+
~~~
|
164 |
+
|
165 |
|
166 |
|
167 |
+
### INT4 Inference on CUDA(have not tested, maybe need 8X80G GPU)
|
168 |
+
|
169 |
+
````python
|
170 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
171 |
+
import torch
|
172 |
+
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc-preview"
|
173 |
+
|
174 |
+
model = AutoModelForCausalLM.from_pretrained(
|
175 |
+
quantized_model_dir,
|
176 |
+
torch_dtype=torch.float16,
|
177 |
+
trust_remote_code=True,
|
178 |
+
device_map="auto"
|
179 |
+
|
180 |
+
)
|
181 |
+
|
182 |
+
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir,trust_remote_code=True)
|
183 |
+
prompt = "There is a girl who likes adventure,"
|
184 |
+
messages = [
|
185 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
186 |
+
{"role": "user", "content": prompt}
|
187 |
+
]
|
188 |
+
text = tokenizer.apply_chat_template(
|
189 |
+
messages,
|
190 |
+
tokenize=False,
|
191 |
+
add_generation_prompt=True
|
192 |
+
)
|
193 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
194 |
+
|
195 |
+
generated_ids = model.generate(
|
196 |
+
model_inputs.input_ids,
|
197 |
+
max_new_tokens=200, ##change this to align with the official usage
|
198 |
+
do_sample=False ##change this to align with the official usage
|
199 |
+
)
|
200 |
+
generated_ids = [
|
201 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
202 |
+
]
|
203 |
+
|
204 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
205 |
+
print(response)
|
206 |
+
|
207 |
````
|
208 |
|
209 |
### Evaluate the model
|