Danielrahmai1991 commited on
Commit
d95871f
·
verified ·
1 Parent(s): d1d4918

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -17,17 +17,27 @@ model, tokenizer = FastLanguageModel.from_pretrained(
17
  trust_remote_code=True,
18
  # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
19
  )
20
-
21
  print("model loaded")
22
 
23
 
24
  streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
25
 
26
- def generate_text(prompt, max_length, top_p, top_k):
27
- inputs = tokenizer([prompt], return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
28
 
29
  generate_kwargs = dict(
30
- inputs,
31
  max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
32
  )
33
 
@@ -39,7 +49,8 @@ def generate_text(prompt, max_length, top_p, top_k):
39
  for text in streamer:
40
  generated_text.append(text)
41
  yield "".join(generated_text)
42
-
 
43
 
44
  description = """
45
  # Deploy our LLM
@@ -49,6 +60,7 @@ inputs = [
49
  gr.Textbox(label="max-lenth generation", value=100),
50
  gr.Slider(0.0, 1.0, label="top-p value", value=0.95),
51
  gr.Textbox(label="top-k", value=50,),
 
52
  ]
53
  outputs = [gr.Textbox(label="Generated Text")]
54
 
 
17
  trust_remote_code=True,
18
  # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
19
  )
20
+ FastLanguageModel.for_inference(model)
21
  print("model loaded")
22
 
23
 
24
  streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
25
 
26
+ stored_message = gr.State([])
27
+
28
+ def generate_text(prompt, max_length, top_p, top_k, messages):
29
+
30
+ messages.append({"role": "user", "content": prompt})
31
+
32
+ input_ids = tokenizer.apply_chat_template(
33
+ messages,
34
+ add_generation_prompt = True,
35
+ return_tensors = "pt",
36
+ )
37
+
38
 
39
  generate_kwargs = dict(
40
+ input_ids,
41
  max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
42
  )
43
 
 
49
  for text in streamer:
50
  generated_text.append(text)
51
  yield "".join(generated_text)
52
+
53
+ messages.append({"role": "assistant", "content": "".join(generated_text)})
54
 
55
  description = """
56
  # Deploy our LLM
 
60
  gr.Textbox(label="max-lenth generation", value=100),
61
  gr.Slider(0.0, 1.0, label="top-p value", value=0.95),
62
  gr.Textbox(label="top-k", value=50,),
63
+ stored_message
64
  ]
65
  outputs = [gr.Textbox(label="Generated Text")]
66