Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,360 Bytes
5956319 74995d7 5956319 3bc8972 a505b42 9c1d271 76789b2 5956319 76789b2 5956319 9c1d271 5956319 434dec3 9a9a69d 3bc8972 89bcd26 1d9cd75 3bc8972 89bcd26 3bc8972 9c1d271 3bc8972 ef2a575 a505b42 ef2a575 ba9f5b4 b6b6848 a505b42 395f92e d21b363 9a9a69d 4e58f83 a505b42 d2b7f91 9a9a69d 8d68072 9a9a69d a505b42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import spaces
import gradio as gr
import torch
import subprocess
import requests
from gradio import State
# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
print(f"Is CUDA available: {torch.cuda.is_available()}")
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
command = [
"python", "-m", "ochat.serving.openai_api_server",
"--model", "openchat/openchat_3.5"
]
# Start the server in a separate process
try:
subprocess.Popen(command)
return "ochat server started successfully"
except Exception as e:
return f"Failed to start ochat server: {e}"
start_ochat_server()
def user(message, history):
return "", history + [[message, None]]
def bot(history):
return chat_with_ochat(history[-1][0]), history + [[None, chat_with_ochat(history[-1][0])]]
# Function to send a message to the ochat server and get a response
def chat_with_ochat(message):
url = "http://0.0.0.0:18888/v1/chat/completions"
headers = {"Content-Type": "application/json"}
data = {
"model": "openchat_3.5",
"messages": [{"role": "user", "content": message}]
}
try:
response = requests.post(url, json=data, headers=headers)
if response.status_code == 200:
return response.json()['choices'][0]['message']['content']
else:
return f"Error: Server responded with status code {response.status_code}"
except requests.RequestException as e:
return f"Error: {e}"
# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
gr.Markdown("## vLLM OpenChat-3.5 Interface")
gr.Markdown("Run on your own machine using this command: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")
with gr.Row():
message = gr.Textbox(label="Your Message", placeholder="Type your message here")
chatbot = gr.Chatbot()
clear = gr.Button("Clear")
chat_history = State([]) # Session state for chat history
message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(
chat_with_ochat, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
app.launch()
|