macadeliccc's picture
test
ef2a575
raw
history blame
2.89 kB
import spaces
import gradio as gr
import torch
import subprocess
import numpy as np
import requests
# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
# Start the server in a separate process
try:
subprocess.Popen(command)
return "ochat server started successfully"
except Exception as e:
return f"Failed to start ochat server: {e}"
start_ochat_server()
# Function to send a message to the ochat server and get a response
def chat_with_ochat(message):
url = "http://0.0.0.0:18888/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
try:
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status_code}"
except requests.RequestException as e:
return f"Error: {e}"
def code_chat(message, condition="Code"):
url = "http://localhost:18888/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"condition": condition,
"messages": [{"role": "user", "content": message}]
}
try:
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json()
else:
return f"Error: Server responded with status code {response.status_code}"
except requests.RequestException as e:
return f"Error: {e}"
# Chat history variable
chat_history = []
# Create a Gradio Blocks interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("## vLLM OpenChat-3.5 Interface")
gr.Markdown("Type your message and get a response from the ochat server.")
with gr.Row():
input_text = gr.Textbox(
label="Your Message",
placeholder="Type your message here",
)
submit_button = gr.Button("Send")
output_text = gr.Textbox(label="ochat Response", interactive=True, scale=4)
def update_output(input_message):
global chat_history
user_message = f"You: {input_message}"
server_response = f"ochat: {chat_with_ochat(input_message)}"
chat_history.extend([user_message, server_response])
return "\n".join(chat_history)
submit_button.click(fn=update_output, inputs=[input_text], outputs=[output_text])
app.launch()