Blancior commited on
Commit
1a99148
·
verified ·
1 Parent(s): 9e70f8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -1,21 +1,28 @@
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  def load_model():
6
- model_name = "TheBloke/Llama-2-13B-chat-GPTQ"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
 
 
 
 
 
 
 
 
 
8
  model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
  device_map="auto",
11
- trust_remote_code=True,
12
- revision="main",
13
- quantization_config={"load_in_4bit": True} # Dodajemy kwantyzację 4-bitową
14
  )
15
  return model, tokenizer
16
 
17
- # Reszta kodu pozostaje bez zmian
18
-
19
  def generate_response(prompt, max_length=100):
20
  try:
21
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -36,7 +43,6 @@ print("Ładowanie modelu...")
36
  model, tokenizer = load_model()
37
  print("Model załadowany!")
38
 
39
- # Interfejs Gradio
40
  iface = gr.Interface(
41
  fn=generate_response,
42
  inputs=[
 
1
+ # app.py
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  import torch
5
 
6
  def load_model():
7
+ model_name = "meta-llama/Llama-2-13b-chat-hf"
8
+
9
+ # Konfiguracja kwantyzacji 4-bitowej
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_compute_dtype=torch.float16,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_use_double_quant=True
15
+ )
16
+
17
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
18
  model = AutoModelForCausalLM.from_pretrained(
19
  model_name,
20
  device_map="auto",
21
+ quantization_config=quantization_config,
22
+ trust_remote_code=True
 
23
  )
24
  return model, tokenizer
25
 
 
 
26
  def generate_response(prompt, max_length=100):
27
  try:
28
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
43
  model, tokenizer = load_model()
44
  print("Model załadowany!")
45
 
 
46
  iface = gr.Interface(
47
  fn=generate_response,
48
  inputs=[