Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ try:
|
|
20 |
model = OlmoeForCausalLM.from_pretrained(
|
21 |
model_name,
|
22 |
trust_remote_code=True,
|
23 |
-
torch_dtype=torch.
|
24 |
low_cpu_mem_usage=True,
|
25 |
device_map="auto",
|
26 |
_attn_implementation="flash_attention_2" # Enable Flash Attention 2
|
@@ -88,7 +88,7 @@ with gr.Blocks(css=css) as demo:
|
|
88 |
msg = gr.Textbox(label="Meow")
|
89 |
with gr.Row():
|
90 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
|
91 |
-
max_new_tokens = gr.Slider(minimum=50, maximum=
|
92 |
clear = gr.Button("Clear")
|
93 |
|
94 |
def user(user_message, history):
|
|
|
20 |
model = OlmoeForCausalLM.from_pretrained(
|
21 |
model_name,
|
22 |
trust_remote_code=True,
|
23 |
+
torch_dtype=torch.bfloat16, # Using float16 for lower precision
|
24 |
low_cpu_mem_usage=True,
|
25 |
device_map="auto",
|
26 |
_attn_implementation="flash_attention_2" # Enable Flash Attention 2
|
|
|
88 |
msg = gr.Textbox(label="Meow")
|
89 |
with gr.Row():
|
90 |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
|
91 |
+
max_new_tokens = gr.Slider(minimum=50, maximum=8000, value=2000, step=50, label="Max New Tokens")
|
92 |
clear = gr.Button("Clear")
|
93 |
|
94 |
def user(user_message, history):
|