Spaces:

ID2223Lab2
/

Lab2

Runtime error

App Files Files Community

EITD commited on Nov 27, 2024

Commit

1975e5f

1 Parent(s): 7b75d63

try

Browse files

Files changed (2) hide show

app.py +42 -49
requirements.txt +8 -8

app.py CHANGED Viewed

@@ -1,25 +1,29 @@
-import gradio as gr
-# from huggingface_hub import InferenceClient
-# from peft import AutoPeftModelForCausalLM
-# from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
-from transformers import AutoTokenizer, AutoModelForCausalLM
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
-# model_name = "lora_model"
-# model = AutoPeftModelForCausalLM.from_pretrained(
-#     model_name,
-#     load_in_4bit = True,
 # )
-# tokenizer = AutoTokenizer.from_pretrained(model_name)
-model_id = "EITD/model"
-filename = "unsloth.Q4_K_M.gguf"
-tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
-model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
 def respond(
     message,
@@ -53,24 +57,13 @@ def respond(
     #     response += token
     #     yield response
-    # inputs = tokenizer.apply_chat_template(
-    #     messages,
-    #     tokenize = True,
-    #     add_generation_prompt = True, # Must add for generation
-    #     return_tensors = "pt",
-    # )
-    conversation = ''
-    for msg in messages:
-        if msg['role'] == 'user':
-            conversation += f"User: {msg['content']}\n"
-        elif msg['role'] == 'assistant':
-            conversation += f"Assistant: {msg['content']}\n"
-    conversation += "Assistant: "
-    inputs = tokenizer.encode(conversation, return_tensors='pt')
-    # text_streamer = TextStreamer(tokenizer, skip_prompt = True)
-    # for response in model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_tokens, use_cache = True,
-    #                         temperature = temperature, min_p = top_p):
-    #     yield response
     outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
                          temperature = temperature, min_p = top_p)
     return tokenizer.batch_decode(outputs)
@@ -79,22 +72,22 @@ def respond(
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

+from peft import AutoPeftModelForCausalLM
+from transformers import AutoTokenizer
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
+model = AutoPeftModelForCausalLM.from_pretrained(
+        "EITD/lora_model_1", # YOUR MODEL YOU USED FOR TRAINING
+        load_in_4bit = False,
+    )
+tokenizer = AutoTokenizer.from_pretrained("EITD/lora_model_1")
+# messages = [{"role": "user", "content": "Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,"},]
+# inputs = tokenizer.apply_chat_template(
+#     messages,
+#     tokenize = True,
+#     add_generation_prompt = True, # Must add for generation
+#     return_tensors = "pt",
 # )
+# outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
+#                         temperature = 1.5, min_p = 0.1)
+# print(tokenizer.batch_decode(outputs))
 def respond(
     message,
     #     response += token
     #     yield response
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        tokenize = True,
+        add_generation_prompt = True, # Must add for generation
+        return_tensors = "pt",
+    )
     outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
                          temperature = temperature, min_p = top_p)
     return tokenizer.batch_decode(outputs)
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
+# demo = gr.ChatInterface(
+#     respond,
+#     additional_inputs=[
+#         gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+#         gr.Slider(
+#             minimum=0.1,
+#             maximum=1.0,
+#             value=0.95,
+#             step=0.05,
+#             label="Top-p (nucleus sampling)",
+#         ),
+#     ],
+# )
+# if __name__ == "__main__":
+#     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,10 +1,10 @@
-huggingface_hub==0.25.2
-# peft==0.13.2
 transformers==4.46.3
 # bitsandbytes==0.42.0
-torch==2.5.1
-gguf==0.10.0
-sentencepiece==0.2.0
-numpy<2.0.0
-accelerate==1.1.1
-unsloth==2024.11.9

+# huggingface_hub==0.25.2
+peft==0.13.2
 transformers==4.46.3
 # bitsandbytes==0.42.0
+# torch==2.5.1
+# gguf==0.10.0
+# sentencepiece==0.2.0
+# numpy==1.26.4
+# accelerate==1.1.1
+# unsloth==2024.11.9