File size: 2,360 Bytes
5956319
74995d7
5956319
 
3bc8972
a505b42
9c1d271
 
76789b2
5956319
76789b2
 
 
5956319
 
 
 
 
9c1d271
5956319
 
 
 
 
 
434dec3
 
9a9a69d
 
 
 
 
 
 
3bc8972
89bcd26
1d9cd75
3bc8972
 
 
89bcd26
3bc8972
9c1d271
3bc8972
 
 
 
 
 
ef2a575
 
 
a505b42
ef2a575
ba9f5b4
b6b6848
a505b42
395f92e
d21b363
9a9a69d
 
 
4e58f83
a505b42
d2b7f91
9a9a69d
 
8d68072
9a9a69d
a505b42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import spaces
import gradio as gr
import torch
import subprocess
import requests
from gradio import State

# Function to start the ochat server
@spaces.GPU
def start_ochat_server():
    print(f"Is CUDA available: {torch.cuda.is_available()}")
    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")

    command = [
        "python", "-m", "ochat.serving.openai_api_server", 
        "--model", "openchat/openchat_3.5"
    ]

    # Start the server in a separate process
    try:
        subprocess.Popen(command)
        return "ochat server started successfully"
    except Exception as e:
        return f"Failed to start ochat server: {e}"

start_ochat_server()

def user(message, history):
    return "", history + [[message, None]]


def bot(history):
    return chat_with_ochat(history[-1][0]), history + [[None, chat_with_ochat(history[-1][0])]]

# Function to send a message to the ochat server and get a response
def chat_with_ochat(message):
    url = "http://0.0.0.0:18888/v1/chat/completions"
    headers = {"Content-Type": "application/json"}
    data = {
        "model": "openchat_3.5",
        "messages": [{"role": "user", "content": message}]
    }

    try:
        response = requests.post(url, json=data, headers=headers)
        if response.status_code == 200:
            return response.json()['choices'][0]['message']['content']
        else:
            return f"Error: Server responded with status code {response.status_code}"
    except requests.RequestException as e:
        return f"Error: {e}"

# Create a Gradio Blocks interface with session state
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("## vLLM OpenChat-3.5 Interface")
    gr.Markdown("Run on your own machine using this command: ```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
    registry.hf.space/macadeliccc-openchat-3-5-chatbot:latest python app.py```")

    with gr.Row():
        message = gr.Textbox(label="Your Message", placeholder="Type your message here")
        chatbot = gr.Chatbot()
        clear = gr.Button("Clear")

    chat_history = State([])  # Session state for chat history

    message.submit(user, [message, chatbot], [message, chatbot], queue=False).then(
        chat_with_ochat, chatbot, chatbot
    )
    clear.click(lambda: None, None, chatbot, queue=False)
app.launch()