Spaces:
Sleeping
Sleeping
artificialguybr
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ model = model.eval()
|
|
36 |
|
37 |
@spaces.GPU()
|
38 |
def stream_chat(
|
39 |
-
message: str,
|
40 |
history: list,
|
41 |
system_prompt: str,
|
42 |
temperature: float = 0.5,
|
@@ -44,15 +44,19 @@ def stream_chat(
|
|
44 |
top_p: float = 1.0,
|
45 |
top_k: int = 50,
|
46 |
):
|
|
|
|
|
|
|
47 |
full_prompt = f"<<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
|
48 |
for prompt, answer in history:
|
49 |
full_prompt += f"[INST]{prompt}[/INST]{answer}"
|
50 |
full_prompt += f"[INST]{message}[/INST]"
|
51 |
|
52 |
inputs = tokenizer(full_prompt, truncation=False, return_tensors="pt").to(device)
|
53 |
-
|
|
|
54 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
55 |
-
|
56 |
generate_kwargs = dict(
|
57 |
inputs=inputs.input_ids,
|
58 |
max_new_tokens=max_new_tokens,
|
@@ -81,10 +85,12 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
81 |
fn=stream_chat,
|
82 |
chatbot=chatbot,
|
83 |
fill_height=True,
|
|
|
84 |
additional_inputs=[
|
85 |
gr.Textbox(
|
86 |
value="You are a helpful assistant capable of generating long-form content.",
|
87 |
label="System Prompt",
|
|
|
88 |
),
|
89 |
gr.Slider(
|
90 |
minimum=0,
|
@@ -92,6 +98,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
92 |
step=0.1,
|
93 |
value=0.5,
|
94 |
label="Temperature",
|
|
|
95 |
),
|
96 |
gr.Slider(
|
97 |
minimum=1024,
|
@@ -99,6 +106,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
99 |
step=1024,
|
100 |
value=32768,
|
101 |
label="Max new tokens",
|
|
|
102 |
),
|
103 |
gr.Slider(
|
104 |
minimum=0.0,
|
@@ -106,6 +114,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
106 |
step=0.1,
|
107 |
value=1.0,
|
108 |
label="Top p",
|
|
|
109 |
),
|
110 |
gr.Slider(
|
111 |
minimum=1,
|
@@ -113,6 +122,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
|
|
113 |
step=1,
|
114 |
value=50,
|
115 |
label="Top k",
|
|
|
116 |
),
|
117 |
],
|
118 |
examples=[
|
|
|
36 |
|
37 |
@spaces.GPU()
|
38 |
def stream_chat(
|
39 |
+
message: str,
|
40 |
history: list,
|
41 |
system_prompt: str,
|
42 |
temperature: float = 0.5,
|
|
|
44 |
top_p: float = 1.0,
|
45 |
top_k: int = 50,
|
46 |
):
|
47 |
+
print(f'message: {message}')
|
48 |
+
print(f'history: {history}')
|
49 |
+
|
50 |
full_prompt = f"<<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
|
51 |
for prompt, answer in history:
|
52 |
full_prompt += f"[INST]{prompt}[/INST]{answer}"
|
53 |
full_prompt += f"[INST]{message}[/INST]"
|
54 |
|
55 |
inputs = tokenizer(full_prompt, truncation=False, return_tensors="pt").to(device)
|
56 |
+
context_length = inputs.input_ids.shape[-1]
|
57 |
+
|
58 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
59 |
+
|
60 |
generate_kwargs = dict(
|
61 |
inputs=inputs.input_ids,
|
62 |
max_new_tokens=max_new_tokens,
|
|
|
85 |
fn=stream_chat,
|
86 |
chatbot=chatbot,
|
87 |
fill_height=True,
|
88 |
+
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
|
89 |
additional_inputs=[
|
90 |
gr.Textbox(
|
91 |
value="You are a helpful assistant capable of generating long-form content.",
|
92 |
label="System Prompt",
|
93 |
+
render=False,
|
94 |
),
|
95 |
gr.Slider(
|
96 |
minimum=0,
|
|
|
98 |
step=0.1,
|
99 |
value=0.5,
|
100 |
label="Temperature",
|
101 |
+
render=False,
|
102 |
),
|
103 |
gr.Slider(
|
104 |
minimum=1024,
|
|
|
106 |
step=1024,
|
107 |
value=32768,
|
108 |
label="Max new tokens",
|
109 |
+
render=False,
|
110 |
),
|
111 |
gr.Slider(
|
112 |
minimum=0.0,
|
|
|
114 |
step=0.1,
|
115 |
value=1.0,
|
116 |
label="Top p",
|
117 |
+
render=False,
|
118 |
),
|
119 |
gr.Slider(
|
120 |
minimum=1,
|
|
|
122 |
step=1,
|
123 |
value=50,
|
124 |
label="Top k",
|
125 |
+
render=False,
|
126 |
),
|
127 |
],
|
128 |
examples=[
|