Spaces:
Running
on
A10G
Running
on
A10G
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
|
|
4 |
import gradio as gr
|
5 |
from threading import Thread
|
6 |
|
7 |
-
MODEL = "tiiuae/falcon3-7b-1.58bit"
|
8 |
|
9 |
TITLE = "<h1><center>Falcon3-1.58 bit playground</center></h1>"
|
10 |
SUB_TITLE = """<center>This interface has been created for quick validation purposes, do not use it for production. Bear also in mind the model is a pretrained model.</center>"""
|
@@ -46,7 +46,20 @@ def stream_chat(
|
|
46 |
penalty: float = 1.2,
|
47 |
):
|
48 |
print(f'message: {message}')
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
51 |
|
52 |
generate_kwargs = dict(
|
|
|
4 |
import gradio as gr
|
5 |
from threading import Thread
|
6 |
|
7 |
+
MODEL = "tiiuae/falcon3-7b-instruct-1.58bit"
|
8 |
|
9 |
TITLE = "<h1><center>Falcon3-1.58 bit playground</center></h1>"
|
10 |
SUB_TITLE = """<center>This interface has been created for quick validation purposes, do not use it for production. Bear also in mind the model is a pretrained model.</center>"""
|
|
|
46 |
penalty: float = 1.2,
|
47 |
):
|
48 |
print(f'message: {message}')
|
49 |
+
print(f'history: {history}')
|
50 |
+
|
51 |
+
conversation = []
|
52 |
+
for prompt, answer in history:
|
53 |
+
conversation.extend([
|
54 |
+
{"role": "user", "content": prompt},
|
55 |
+
{"role": "assistant", "content": answer},
|
56 |
+
])
|
57 |
+
|
58 |
+
|
59 |
+
conversation.append({"role": "user", "content": message})
|
60 |
+
input_text = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt = True)
|
61 |
+
|
62 |
+
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
63 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
64 |
|
65 |
generate_kwargs = dict(
|