Danielrahmai1991 commited on
Commit
3066997
·
verified ·
1 Parent(s): aec652c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -15
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from threading import Thread
2
- from transformers import TextStreamer
3
  from unsloth import FastLanguageModel
4
  import torch
5
  import gradio as gr
@@ -21,7 +21,10 @@ FastLanguageModel.for_inference(model)
21
  print("model loaded")
22
 
23
 
24
- streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
 
 
 
25
 
26
  messages = []
27
 
@@ -36,25 +39,26 @@ def generate_text(prompt, max_length, top_p, top_k):
36
  )
37
 
38
 
39
- # generate_kwargs = dict(
40
- # input_ids,
41
- # max_length=int(max_length),top_p=float(top_p), do_sample=True, top_k=int(top_k), streamer=streamer
42
- # )
 
43
 
44
- _ = model.generate(input_ids, streamer = streamer, max_new_tokens = int(max_length), pad_token_id = tokenizer.eos_token_id,
45
- temperature=0.6, # Adjust this value
46
- top_k=int(top_k), # Adjust this value
47
- top_p=float(top_p), # Adjust this value
48
- repetition_penalty=1.2
49
- )
50
- # t = Thread(target=model.generate, kwargs=generate_kwargs)
51
- # t.start()
52
 
53
  generated_text=[]
54
 
55
  for text in streamer:
56
  generated_text.append(text)
57
- print(generated_text)
58
  yield "".join(generated_text)
59
 
60
  messages.append({"role": "assistant", "content": "".join(generated_text)})
 
1
  from threading import Thread
2
+ from transformers import TextStreamer, TextIteratorStreamer
3
  from unsloth import FastLanguageModel
4
  import torch
5
  import gradio as gr
 
21
  print("model loaded")
22
 
23
 
24
+ streamer = TextIteratorStreamer(
25
+ tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True
26
+ )
27
+ # streamer = TextStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens = True)
28
 
29
  messages = []
30
 
 
39
  )
40
 
41
 
42
+ generate_kwargs = dict(
43
+ max_length=int(max_length),top_p=float(top_p), do_sample=True,
44
+ top_k=int(top_k), streamer=streamer, temperature=0.6, repetition_penalty=1.2
45
+
46
+ )
47
 
48
+ # _ = model.generate(input_ids, streamer = streamer, max_new_tokens = int(max_length), pad_token_id = tokenizer.eos_token_id,
49
+ # temperature=0.6, # Adjust this value
50
+ # top_k=int(top_k), # Adjust this value
51
+ # top_p=float(top_p), # Adjust this value
52
+ # repetition_penalty=1.2
53
+ # )
54
+ t = Thread(target=model.generate, args=(input_ids,), kwargs=generate_kwargs)
55
+ t.start()
56
 
57
  generated_text=[]
58
 
59
  for text in streamer:
60
  generated_text.append(text)
61
+ # print(generated_text)
62
  yield "".join(generated_text)
63
 
64
  messages.append({"role": "assistant", "content": "".join(generated_text)})