Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -45,17 +45,25 @@ def generate_response(instruction, input_text):
|
|
45 |
|
46 |
# 在函数内部导入需要 GPU 的库
|
47 |
import torch
|
48 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
49 |
from peft import PeftModel
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# 加载分词器
|
52 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
|
53 |
|
54 |
# 加载基础模型
|
55 |
base_model = AutoModelForCausalLM.from_pretrained(
|
56 |
base_model_name,
|
|
|
57 |
device_map="auto",
|
58 |
-
torch_dtype=torch.float16,
|
59 |
use_auth_token=hf_token,
|
60 |
trust_remote_code=True
|
61 |
)
|
|
|
45 |
|
46 |
# 在函数内部导入需要 GPU 的库
|
47 |
import torch
|
48 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
49 |
from peft import PeftModel
|
50 |
|
51 |
+
# 创建量化配置
|
52 |
+
bnb_config = BitsAndBytesConfig(
|
53 |
+
load_in_4bit=True,
|
54 |
+
bnb_4bit_use_double_quant=True,
|
55 |
+
bnb_4bit_quant_type="nf4",
|
56 |
+
bnb_4bit_compute_dtype=torch.float16
|
57 |
+
)
|
58 |
+
|
59 |
# 加载分词器
|
60 |
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
|
61 |
|
62 |
# 加载基础模型
|
63 |
base_model = AutoModelForCausalLM.from_pretrained(
|
64 |
base_model_name,
|
65 |
+
quantization_config=bnb_config,
|
66 |
device_map="auto",
|
|
|
67 |
use_auth_token=hf_token,
|
68 |
trust_remote_code=True
|
69 |
)
|