cicdatopea
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -26,7 +26,7 @@ On CUDA devices, the computation dtype is typically FP16 for int4 , which may le
|
|
26 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
27 |
import torch
|
28 |
|
29 |
-
quantized_model_dir = "/
|
30 |
|
31 |
## directly use device_map='auto' if you have enough GPUs
|
32 |
max_memory = {i: "75GiB" for i in range(7)}
|
|
|
26 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
27 |
import torch
|
28 |
|
29 |
+
quantized_model_dir = "OPEA/DeepSeek-V3-int4-sym-gptq-inc"
|
30 |
|
31 |
## directly use device_map='auto' if you have enough GPUs
|
32 |
max_memory = {i: "75GiB" for i in range(7)}
|