File size: 1,822 Bytes
5956319
74995d7
5956319
a505b42
f316cfc
9c1d271
ebcc5ea
f316cfc
 
 
a1908d6
f316cfc
 
9c1d271
f316cfc
 
 
395f92e
f316cfc
 
 
 
 
 
c375c59
f316cfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1e1f85a
f316cfc
 
 
 
0e4adfe
a505b42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import spaces
import gradio as gr
import torch
from gradio import State
from transformers import AutoTokenizer, AutoModelForCausalLM


# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")

# Ensure the model is in evaluation mode
model.eval()

# Move model to GPU if available
if torch.cuda.is_available():
    model = model.to("cuda")

@spaces.GPU
def generate_response(user_input, chat_history):
    prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
    if chat_history:
        prompt = chat_history + prompt
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
    
    # Move tensors to the same device as model
    inputs = {k: v.to(model.device) for k, v in inputs.items()}

    with torch.no_grad():
        output = model.generate(**inputs, max_length=1024, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    # Update chat history
    new_history = prompt + response
    return response, new_history

# Gradio Interface
def clear_chat():
    return "", ""

with gr.Blocks(gr.themes.Soft()) as app:
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
            send = gr.Button("Send")
            clear = gr.Button("Clear")
        with gr.Column():
            chatbot = gr.Chatbot()
    
    chat_history = gr.State()  # Holds the chat history

    send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
    clear.click(clear_chat, outputs=[chatbot, chat_history])

app.launch()