larry1129 commited on
Commit
7011baa
·
verified ·
1 Parent(s): 0d5b53f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -45,17 +45,25 @@ def generate_response(instruction, input_text):
45
 
46
  # 在函数内部导入需要 GPU 的库
47
  import torch
48
- from transformers import AutoTokenizer, AutoModelForCausalLM
49
  from peft import PeftModel
50
 
 
 
 
 
 
 
 
 
51
  # 加载分词器
52
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
53
 
54
  # 加载基础模型
55
  base_model = AutoModelForCausalLM.from_pretrained(
56
  base_model_name,
 
57
  device_map="auto",
58
- torch_dtype=torch.float16,
59
  use_auth_token=hf_token,
60
  trust_remote_code=True
61
  )
 
45
 
46
  # 在函数内部导入需要 GPU 的库
47
  import torch
48
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
49
  from peft import PeftModel
50
 
51
+ # 创建量化配置
52
+ bnb_config = BitsAndBytesConfig(
53
+ load_in_4bit=True,
54
+ bnb_4bit_use_double_quant=True,
55
+ bnb_4bit_quant_type="nf4",
56
+ bnb_4bit_compute_dtype=torch.float16
57
+ )
58
+
59
  # 加载分词器
60
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_auth_token=hf_token)
61
 
62
  # 加载基础模型
63
  base_model = AutoModelForCausalLM.from_pretrained(
64
  base_model_name,
65
+ quantization_config=bnb_config,
66
  device_map="auto",
 
67
  use_auth_token=hf_token,
68
  trust_remote_code=True
69
  )