Spaces:
Paused
Paused
Danielrahmai1991
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -17,17 +17,27 @@ model, tokenizer = FastLanguageModel.from_pretrained(
|
|
17 |
trust_remote_code=True,
|
18 |
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
19 |
)
|
20 |
-
|
21 |
print("model loaded")
|
22 |
|
23 |
|
24 |
streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
|
25 |
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
generate_kwargs = dict(
|
30 |
-
|
31 |
max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
|
32 |
)
|
33 |
|
@@ -39,7 +49,8 @@ def generate_text(prompt, max_length, top_p, top_k):
|
|
39 |
for text in streamer:
|
40 |
generated_text.append(text)
|
41 |
yield "".join(generated_text)
|
42 |
-
|
|
|
43 |
|
44 |
description = """
|
45 |
# Deploy our LLM
|
@@ -49,6 +60,7 @@ inputs = [
|
|
49 |
gr.Textbox(label="max-lenth generation", value=100),
|
50 |
gr.Slider(0.0, 1.0, label="top-p value", value=0.95),
|
51 |
gr.Textbox(label="top-k", value=50,),
|
|
|
52 |
]
|
53 |
outputs = [gr.Textbox(label="Generated Text")]
|
54 |
|
|
|
17 |
trust_remote_code=True,
|
18 |
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
19 |
)
|
20 |
+
FastLanguageModel.for_inference(model)
|
21 |
print("model loaded")
|
22 |
|
23 |
|
24 |
streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
|
25 |
|
26 |
+
stored_message = gr.State([])
|
27 |
+
|
28 |
+
def generate_text(prompt, max_length, top_p, top_k, messages):
|
29 |
+
|
30 |
+
messages.append({"role": "user", "content": prompt})
|
31 |
+
|
32 |
+
input_ids = tokenizer.apply_chat_template(
|
33 |
+
messages,
|
34 |
+
add_generation_prompt = True,
|
35 |
+
return_tensors = "pt",
|
36 |
+
)
|
37 |
+
|
38 |
|
39 |
generate_kwargs = dict(
|
40 |
+
input_ids,
|
41 |
max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
|
42 |
)
|
43 |
|
|
|
49 |
for text in streamer:
|
50 |
generated_text.append(text)
|
51 |
yield "".join(generated_text)
|
52 |
+
|
53 |
+
messages.append({"role": "assistant", "content": "".join(generated_text)})
|
54 |
|
55 |
description = """
|
56 |
# Deploy our LLM
|
|
|
60 |
gr.Textbox(label="max-lenth generation", value=100),
|
61 |
gr.Slider(0.0, 1.0, label="top-p value", value=0.95),
|
62 |
gr.Textbox(label="top-k", value=50,),
|
63 |
+
stored_message
|
64 |
]
|
65 |
outputs = [gr.Textbox(label="Generated Text")]
|
66 |
|