Spaces:

macadeliccc
/

laser-dolphin-mixtral-chat

Running on Zero

App Files Files Community

laser-dolphin-mixtral-chat / app.py

macadeliccc

test

c291324 about 1 year ago

raw

history blame

2.33 kB

	import spaces
	import gradio as gr
	import torch
	from gradio import State
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Select the device (GPU if available, else CPU)
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load the tokenizer and model
	tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
	model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
	model.eval() # Set the model to evaluation mode

	@spaces.GPU
	def generate_response(user_input, chat_history):
	try:
	prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
	if chat_history:
	prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history

	inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
	inputs = {k: v.to(device) for k, v in inputs.items()} # Move input tensors to the same device as the model

	with torch.no_grad():
	output = model.generate(**inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

	response = tokenizer.decode(output[0], skip_special_tokens=True)
	new_history = chat_history + prompt + response
	return response, new_history[-1024:] # Return last 1024 tokens of history

	except Exception as e:
	return f"Error occurred: {e}", chat_history

	# Gradio Interface
	def clear_chat():
	return "", ""

	with gr.Blocks(gr.themes.Soft()) as app:
	with gr.Row():
	gr.Markdown("## Starling Chatbot")
	gr.Markdown("Run with your own hardware. This application exceeds 24GB VRAM")
	gr.Markdown("```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
	registry.hf.space/macadeliccc-starling-lm-7b-alpha-chat:latest python app.py```")
	with gr.Row():
	chatbot = gr.Chatbot()

	with gr.Row():
	user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
	send = gr.Button("Send")
	clear = gr.Button("Clear")

	chat_history = gr.State() # Holds the chat history

	send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
	clear.click(clear_chat, outputs=[chatbot, chat_history])

	app.launch()