Spaces:

vilarin
/

vllm-chat

Paused

vilarin commited on Jun 7, 2024

Commit

0a56d56

verified ·

1 Parent(s): 3279b8c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import gradio as gr
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 model = os.environ.get("MODEL_ID")
 model_name = model.split("/")[-1]
@@ -42,7 +43,6 @@ def generate(message, history, system, max_tokens, temperature, top_p, top_k, pe
         conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
     conversation.append({"role": "user", "content": message})
-    print(f"Conversation is -\n{conversation}")
     text = tokenizer.apply_chat_template(
         conversation,
@@ -70,7 +70,7 @@ def generate(message, history, system, max_tokens, temperature, top_p, top_k, pe
-chatbot = gr.Chatbot(height=450)
 with gr.Blocks(css=css) as demo:
     gr.HTML(DESCRIPTION)

 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
+MODELS = ["Qwen/Qwen2-1.5B-Instruct", "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int8"]
 model = os.environ.get("MODEL_ID")
 model_name = model.split("/")[-1]
         conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
     conversation.append({"role": "user", "content": message})
     text = tokenizer.apply_chat_template(
         conversation,
+chatbot = gr.Chatbot(height=800)
 with gr.Blocks(css=css) as demo:
     gr.HTML(DESCRIPTION)