Spaces:

macadeliccc
/

laser-dolphin-mixtral-chat

Running on Zero

App Files Files Community

macadeliccc commited on Nov 28, 2023

Commit

b1f30ac

1 Parent(s): 4911f6e

test

Browse files

Files changed (1) hide show

app.py +20 -23

app.py CHANGED Viewed

@@ -4,52 +4,49 @@ import torch
 from gradio import State
 from transformers import AutoTokenizer, AutoModelForCausalLM
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 @spaces.GPU
 def generate_response(user_input, chat_history):
-    prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
-    if chat_history:
-        prompt = chat_history + prompt
-    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
-    # Ensure all tensors are moved to the model's device
-    inputs = {k: v.to(model.device) for k, v in inputs.items()}
-    with torch.no_grad():
-        # Generate the model's output
-        output = model.generate(**inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Update chat history
-    new_history = prompt + response
-    return response, new_history
 # Gradio Interface
 def clear_chat():
     return "", ""
 with gr.Blocks(gr.themes.Soft()) as app:
     with gr.Row():
         chatbot = gr.Chatbot()
     with gr.Row():
         user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
         send = gr.Button("Send")
         clear = gr.Button("Clear")
     chat_history = gr.State()  # Holds the chat history
     send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
     clear.click(clear_chat, outputs=[chatbot, chat_history])
-app.launch()

 from gradio import State
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# Select the device (GPU if available, else CPU)
+device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 @spaces.GPU
 def generate_response(user_input, chat_history):
+    try:
+        prompt = "GPT4 Correct User: " + user_input + "<|end_of_turn|>" + "GPT4 Correct Assistant: "
+        if chat_history:
+            prompt = chat_history[-1024:] + prompt  # Keep last 1024 tokens of history
+        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
+        inputs = {k: v.to(device) for k, v in inputs.items()}  # Move input tensors to the same device as the model
+        with torch.no_grad():
+            output = model.generate(**inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
+        response = tokenizer.decode(output[0], skip_special_tokens=True)
+        new_history = chat_history + prompt + response
+        return response, new_history[-1024:]  # Return last 1024 tokens of history
+    except Exception as e:
+        return f"Error occurred: {e}", chat_history
 # Gradio Interface
 def clear_chat():
     return "", ""
 with gr.Blocks(gr.themes.Soft()) as app:
     with gr.Row():
         chatbot = gr.Chatbot()
     with gr.Row():
         user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
         send = gr.Button("Send")
         clear = gr.Button("Clear")
     chat_history = gr.State()  # Holds the chat history
     send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
     clear.click(clear_chat, outputs=[chatbot, chat_history])
+app.launch()