Spaces:
Runtime error
Runtime error
test
Browse files- app.py +10 -3
- requirements.txt +2 -1
app.py
CHANGED
@@ -3,7 +3,8 @@ import gradio as gr
|
|
3 |
# from peft import AutoPeftModelForCausalLM
|
4 |
# from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
|
6 |
-
|
|
|
7 |
"""
|
8 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
9 |
"""
|
@@ -22,6 +23,12 @@ filename = "unsloth.Q4_K_M.gguf"
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
|
23 |
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def respond(
|
26 |
message,
|
27 |
history: list[tuple[str, str]],
|
@@ -88,5 +95,5 @@ demo = gr.ChatInterface(
|
|
88 |
)
|
89 |
|
90 |
|
91 |
-
if __name__ == "__main__":
|
92 |
-
|
|
|
3 |
# from peft import AutoPeftModelForCausalLM
|
4 |
# from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
|
6 |
+
from unsloth.chat_templates import get_chat_template
|
7 |
+
from unsloth import FastLanguageModel
|
8 |
"""
|
9 |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
10 |
"""
|
|
|
23 |
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
|
24 |
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
|
25 |
|
26 |
+
tokenizer = get_chat_template(
|
27 |
+
tokenizer,
|
28 |
+
chat_template = "llama-3.1",
|
29 |
+
)
|
30 |
+
FastLanguageModel.for_inference(model)
|
31 |
+
|
32 |
def respond(
|
33 |
message,
|
34 |
history: list[tuple[str, str]],
|
|
|
95 |
)
|
96 |
|
97 |
|
98 |
+
# if __name__ == "__main__":
|
99 |
+
# demo.launch()
|
requirements.txt
CHANGED
@@ -6,4 +6,5 @@ torch==2.5.1
|
|
6 |
gguf==0.10.0
|
7 |
sentencepiece==0.2.0
|
8 |
numpy<2.0.0
|
9 |
-
accelerate==1.1.1
|
|
|
|
6 |
gguf==0.10.0
|
7 |
sentencepiece==0.2.0
|
8 |
numpy<2.0.0
|
9 |
+
accelerate==1.1.1
|
10 |
+
unsloth==2024.11.9
|