EITD commited on
Commit
22d22f7
·
1 Parent(s): db267f5
Files changed (2) hide show
  1. app.py +10 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,7 +3,8 @@ import gradio as gr
3
  # from peft import AutoPeftModelForCausalLM
4
  # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
6
-
 
7
  """
8
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
  """
@@ -22,6 +23,12 @@ filename = "unsloth.Q4_K_M.gguf"
22
  tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
23
  model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
24
 
 
 
 
 
 
 
25
  def respond(
26
  message,
27
  history: list[tuple[str, str]],
@@ -88,5 +95,5 @@ demo = gr.ChatInterface(
88
  )
89
 
90
 
91
- if __name__ == "__main__":
92
- demo.launch()
 
3
  # from peft import AutoPeftModelForCausalLM
4
  # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
6
+ from unsloth.chat_templates import get_chat_template
7
+ from unsloth import FastLanguageModel
8
  """
9
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
10
  """
 
23
  tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
24
  model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
25
 
26
+ tokenizer = get_chat_template(
27
+ tokenizer,
28
+ chat_template = "llama-3.1",
29
+ )
30
+ FastLanguageModel.for_inference(model)
31
+
32
  def respond(
33
  message,
34
  history: list[tuple[str, str]],
 
95
  )
96
 
97
 
98
+ # if __name__ == "__main__":
99
+ # demo.launch()
requirements.txt CHANGED
@@ -6,4 +6,5 @@ torch==2.5.1
6
  gguf==0.10.0
7
  sentencepiece==0.2.0
8
  numpy<2.0.0
9
- accelerate==1.1.1
 
 
6
  gguf==0.10.0
7
  sentencepiece==0.2.0
8
  numpy<2.0.0
9
+ accelerate==1.1.1
10
+ unsloth==2024.11.9