Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
import torch | |
import subprocess | |
import numpy as np | |
import requests | |
# Function to start the ochat server | |
def start_ochat_server(): | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
command = [ | |
"python", "-m", "ochat.serving.openai_api_server", | |
"--model", "openchat/openchat_3.5" | |
] | |
# Start the server in a separate process | |
try: | |
subprocess.Popen(command) | |
return "ochat server started successfully" | |
except Exception as e: | |
return f"Failed to start ochat server: {e}" | |
start_ochat_server() | |
# Function to send a message to the ochat server and get a response | |
def chat_with_ochat(message): | |
url = "http://0.0.0.0:18888/v1/chat/completions" | |
headers = {"Content-Type": "application/json"} | |
data = { | |
"model": "openchat_3.5", | |
"messages": [{"role": "user", "content": message}] | |
} | |
try: | |
response = requests.post(url, json=data, headers=headers) | |
if response.status_code == 200: | |
return response.json()['choices'][0]['message']['content'] | |
else: | |
return f"Error: Server responded with status code {response.status_code}" | |
except requests.RequestException as e: | |
return f"Error: {e}" | |
def code_chat(message, condition="Code"): | |
url = "http://localhost:18888/v1/chat/completions" | |
headers = {"Content-Type": "application/json"} | |
data = { | |
"model": "openchat_3.5", | |
"condition": condition, | |
"messages": [{"role": "user", "content": message}] | |
} | |
try: | |
response = requests.post(url, json=data, headers=headers) | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return f"Error: Server responded with status code {response.status_code}" | |
except requests.RequestException as e: | |
return f"Error: {e}" | |
# Chat history variable | |
chat_history = [] | |
# Create a Gradio Blocks interface | |
with gr.Blocks(theme=gr.themes.Soft()) as app: | |
gr.Markdown("## vLLM OpenChat-3.5 Interface") | |
gr.Markdown("Type your message and get a response from the ochat server.") | |
with gr.Row(): | |
input_text = gr.Textbox( | |
label="Your Message", | |
placeholder="Type your message here", | |
) | |
submit_button = gr.Button("Send") | |
output_text = gr.Textbox(label="ochat Response", interactive=True, scale=4) | |
def update_output(input_message): | |
global chat_history | |
user_message = f"You: {input_message}" | |
server_response = f"ochat: {chat_with_ochat(input_message)}" | |
chat_history.extend([user_message, server_response]) | |
return "\n".join(chat_history) | |
submit_button.click(fn=update_output, inputs=[input_text], outputs=[output_text]) | |
app.launch() |