Spaces:

ID2223Lab2
/

Lab2

Runtime error

EITD commited on Nov 24, 2024

Commit

e823295

1 Parent(s): 43f9110

gguf

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,22 +1,26 @@
 import gradio as gr
 # from huggingface_hub import InferenceClient
-from peft import AutoPeftModelForCausalLM
-from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
-model_name = "EITD/lora_model"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True
-)
-model = AutoPeftModelForCausalLM.from_pretrained(
-    model_name,
-    quantization_config=bnb_config
-)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 def respond(
     message,

 import gradio as gr
 # from huggingface_hub import InferenceClient
+# from peft import AutoPeftModelForCausalLM
+# from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
+# model_name = "lora_model"
+# model = AutoPeftModelForCausalLM.from_pretrained(
+#     model_name,
+#     load_in_4bit = True,
+# )
+# tokenizer = AutoTokenizer.from_pretrained(model_name)
+model_id = "EITD/model"
+filename = "unsloth.Q4_K_M.gguf"
+tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
+model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
 def respond(
     message,