Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import gradio as gr
|
|
4 |
from transformers import AutoTokenizer
|
5 |
from vllm import LLM, SamplingParams
|
6 |
|
|
|
7 |
model = os.environ.get("MODEL_ID")
|
8 |
model_name = model.split("/")[-1]
|
9 |
|
@@ -42,7 +43,6 @@ def generate(message, history, system, max_tokens, temperature, top_p, top_k, pe
|
|
42 |
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
|
43 |
conversation.append({"role": "user", "content": message})
|
44 |
|
45 |
-
print(f"Conversation is -\n{conversation}")
|
46 |
|
47 |
text = tokenizer.apply_chat_template(
|
48 |
conversation,
|
@@ -70,7 +70,7 @@ def generate(message, history, system, max_tokens, temperature, top_p, top_k, pe
|
|
70 |
|
71 |
|
72 |
|
73 |
-
chatbot = gr.Chatbot(height=
|
74 |
|
75 |
with gr.Blocks(css=css) as demo:
|
76 |
gr.HTML(DESCRIPTION)
|
|
|
4 |
from transformers import AutoTokenizer
|
5 |
from vllm import LLM, SamplingParams
|
6 |
|
7 |
+
MODELS = ["Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int8"]
|
8 |
model = os.environ.get("MODEL_ID")
|
9 |
model_name = model.split("/")[-1]
|
10 |
|
|
|
43 |
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
|
44 |
conversation.append({"role": "user", "content": message})
|
45 |
|
|
|
46 |
|
47 |
text = tokenizer.apply_chat_template(
|
48 |
conversation,
|
|
|
70 |
|
71 |
|
72 |
|
73 |
+
chatbot = gr.Chatbot(height=800)
|
74 |
|
75 |
with gr.Blocks(css=css) as demo:
|
76 |
gr.HTML(DESCRIPTION)
|