Spaces:
Running
on
Zero
Running
on
Zero
macadeliccc
commited on
Commit
·
a9a8422
1
Parent(s):
b1f30ac
test
Browse files
app.py
CHANGED
@@ -8,13 +8,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
8 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
|
10 |
# Load the tokenizer and model
|
11 |
-
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
|
12 |
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
|
|
|
13 |
|
14 |
@spaces.GPU
|
15 |
def generate_response(user_input, chat_history):
|
16 |
try:
|
17 |
-
prompt = "GPT4 Correct User: " + user_input + "
|
18 |
if chat_history:
|
19 |
prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history
|
20 |
|
@@ -49,4 +50,4 @@ with gr.Blocks(gr.themes.Soft()) as app:
|
|
49 |
send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
|
50 |
clear.click(clear_chat, outputs=[chatbot, chat_history])
|
51 |
|
52 |
-
app.launch()
|
|
|
8 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
9 |
|
10 |
# Load the tokenizer and model
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
|
12 |
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
|
13 |
+
model.eval() # Set the model to evaluation mode
|
14 |
|
15 |
@spaces.GPU
|
16 |
def generate_response(user_input, chat_history):
|
17 |
try:
|
18 |
+
prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
|
19 |
if chat_history:
|
20 |
prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history
|
21 |
|
|
|
50 |
send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
|
51 |
clear.click(clear_chat, outputs=[chatbot, chat_history])
|
52 |
|
53 |
+
app.launch()
|