Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ import torch
|
|
11 |
import gradio as gr
|
12 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
|
13 |
|
14 |
-
model_id = "BEE-spoke-data/tFINE-900m-e16-d32-
|
15 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
logging.info(f"Running on device:\t {torch_device}")
|
17 |
logging.info(f"CPU threads:\t {torch.get_num_threads()}")
|
@@ -22,7 +22,7 @@ if torch_device == "cuda":
|
|
22 |
model_id, load_in_8bit=True, device_map="auto"
|
23 |
)
|
24 |
else:
|
25 |
-
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
26 |
try:
|
27 |
model = torch.compile(model)
|
28 |
except Exception as e:
|
@@ -165,4 +165,4 @@ with gr.Blocks() as demo:
|
|
165 |
model_output,
|
166 |
)
|
167 |
|
168 |
-
demo.queue(max_size=
|
|
|
11 |
import gradio as gr
|
12 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
|
13 |
|
14 |
+
model_id = "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e"
|
15 |
torch_device = "cuda" if torch.cuda.is_available() else "cpu"
|
16 |
logging.info(f"Running on device:\t {torch_device}")
|
17 |
logging.info(f"CPU threads:\t {torch.get_num_threads()}")
|
|
|
22 |
model_id, load_in_8bit=True, device_map="auto"
|
23 |
)
|
24 |
else:
|
25 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
26 |
try:
|
27 |
model = torch.compile(model)
|
28 |
except Exception as e:
|
|
|
165 |
model_output,
|
166 |
)
|
167 |
|
168 |
+
demo.queue(max_size=10).launch()
|