macadeliccc's picture
test
a9a8422
raw
history blame
2.01 kB
import spaces
import gradio as gr
import torch
from gradio import State
from transformers import AutoTokenizer, AutoModelForCausalLM
# Select the device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
model.eval() # Set the model to evaluation mode
@spaces.GPU
def generate_response(user_input, chat_history):
try:
prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
if chat_history:
prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
inputs = {k: v.to(device) for k, v in inputs.items()} # Move input tensors to the same device as the model
with torch.no_grad():
output = model.generate(**inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
response = tokenizer.decode(output[0], skip_special_tokens=True)
new_history = chat_history + prompt + response
return response, new_history[-1024:] # Return last 1024 tokens of history
except Exception as e:
return f"Error occurred: {e}", chat_history
# Gradio Interface
def clear_chat():
return "", ""
with gr.Blocks(gr.themes.Soft()) as app:
with gr.Row():
chatbot = gr.Chatbot()
with gr.Row():
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
send = gr.Button("Send")
clear = gr.Button("Clear")
chat_history = gr.State() # Holds the chat history
send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
clear.click(clear_chat, outputs=[chatbot, chat_history])
app.launch()