EITD commited on
Commit
e823295
·
1 Parent(s): 43f9110
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -1,22 +1,26 @@
1
  import gradio as gr
2
  # from huggingface_hub import InferenceClient
3
- from peft import AutoPeftModelForCausalLM
4
- from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
 
5
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
  # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
10
 
11
- model_name = "EITD/lora_model"
12
- bnb_config = BitsAndBytesConfig(
13
- load_in_4bit=True
14
- )
15
- model = AutoPeftModelForCausalLM.from_pretrained(
16
- model_name,
17
- quantization_config=bnb_config
18
- )
19
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
20
 
21
  def respond(
22
  message,
 
1
  import gradio as gr
2
  # from huggingface_hub import InferenceClient
3
+ # from peft import AutoPeftModelForCausalLM
4
+ # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
6
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
10
  # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
11
 
12
+ # model_name = "lora_model"
13
+ # model = AutoPeftModelForCausalLM.from_pretrained(
14
+ # model_name,
15
+ # load_in_4bit = True,
16
+ # )
17
+ # tokenizer = AutoTokenizer.from_pretrained(model_name)
18
+
19
+ model_id = "EITD/model"
20
+ filename = "unsloth.Q4_K_M.gguf"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
23
+ model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
24
 
25
  def respond(
26
  message,