Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
import torch | |
from gradio import State | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha") | |
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha") | |
# Ensure the model is in evaluation mode | |
model.eval() | |
# Move model to GPU if available | |
if torch.cuda.is_available(): | |
model = model.to("cuda") | |
def generate_response(user_input, chat_history): | |
prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: " | |
if chat_history: | |
prompt = chat_history + prompt | |
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024) | |
# Move tensors to the same device as model | |
inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
with torch.no_grad(): | |
output = model.generate(**inputs, max_length=1024, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) | |
response = tokenizer.decode(output[0], skip_special_tokens=True) | |
# Update chat history | |
new_history = prompt + response | |
return response, new_history | |
# Gradio Interface | |
def clear_chat(): | |
return "", "" | |
with gr.Blocks(gr.themes.Soft()) as app: | |
with gr.Row(): | |
with gr.Column(): | |
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...") | |
send = gr.Button("Send") | |
clear = gr.Button("Clear") | |
with gr.Column(): | |
chatbot = gr.Chatbot() | |
chat_history = gr.State() # Holds the chat history | |
send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history]) | |
clear.click(clear_chat, outputs=[chatbot, chat_history]) | |
app.launch() | |