Spaces:

pszemraj
/

small-instruct-streaming

Sleeping

pszemraj commited on Oct 2, 2024

Commit

58acd65

verified ·

1 Parent(s): 8120b87

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import torch
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
-model_id = "BEE-spoke-data/tFINE-900m-e16-d32-instruct"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 logging.info(f"Running on device:\t {torch_device}")
 logging.info(f"CPU threads:\t {torch.get_num_threads()}")
@@ -22,7 +22,7 @@ if torch_device == "cuda":
         model_id, load_in_8bit=True, device_map="auto"
     )
 else:
-    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
 try:
     model = torch.compile(model)
 except Exception as e:
@@ -165,4 +165,4 @@ with gr.Blocks() as demo:
         model_output,
     )
-    demo.queue(max_size=32).launch()

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
+model_id = "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e"
 torch_device = "cuda" if torch.cuda.is_available() else "cpu"
 logging.info(f"Running on device:\t {torch_device}")
 logging.info(f"CPU threads:\t {torch.get_num_threads()}")
         model_id, load_in_8bit=True, device_map="auto"
     )
 else:
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
 try:
     model = torch.compile(model)
 except Exception as e:
         model_output,
     )
+    demo.queue(max_size=10).launch()