han1997 commited on
Commit
1ff6ff4
·
verified ·
1 Parent(s): c2cb867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -15
app.py CHANGED
@@ -40,8 +40,10 @@ vlm.to(DEVICE, dtype=DTYPE)
40
 
41
  prompt_builder = vlm.get_prompt_builder()
42
 
43
- @spaces.GPU
44
  def bot_streaming(message, history, temperature, top_k, max_new_tokens):
 
 
45
  if len(history) == 0:
46
  prompt_builder.prompt, prompt_builder.turn_count = "", 0
47
 
@@ -61,22 +63,36 @@ def bot_streaming(message, history, temperature, top_k, max_new_tokens):
61
 
62
  prompt_builder.add_turn(role="human", message=message['text'])
63
  prompt_text = prompt_builder.get_prompt()
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Generate from the VLM
66
- with torch.no_grad():
67
- generated_text = vlm.generate(
68
- image,
69
- prompt_text,
70
- use_cache=True,
71
- do_sample=True,
72
- temperature=temperature,
73
- top_k=top_k,
74
- max_new_tokens=max_new_tokens,
75
- )
76
- prompt_builder.add_turn(role="gpt", message=generated_text)
77
 
78
- time.sleep(0.04)
79
- yield generated_text
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  demo = gr.ChatInterface(fn=bot_streaming,
 
40
 
41
  prompt_builder = vlm.get_prompt_builder()
42
 
43
+ @spaces.GPU(duration=20)
44
  def bot_streaming(message, history, temperature, top_k, max_new_tokens):
45
+ streamer = TextIteratorStreamer(processor.tokenizer, skip_special_tokens=True)
46
+
47
  if len(history) == 0:
48
  prompt_builder.prompt, prompt_builder.turn_count = "", 0
49
 
 
63
 
64
  prompt_builder.add_turn(role="human", message=message['text'])
65
  prompt_text = prompt_builder.get_prompt()
66
+
67
+ generation_kwargs = {
68
+ "pixel_values": image,
69
+ "input_ids": prompt_text,
70
+ "streamer": streamer,
71
+ "max_new_tokens": max_new_tokens,
72
+ "use_cache": True,
73
+ "temperature": temperature,
74
+ "do_sample": True,
75
+ "top_k": top_k,
76
+ }
77
 
78
+ # Generate from the VLM
79
+ thread = Thread(target=vlm.generate, kwargs=generation_kwargs)
80
+ thread.start()
 
 
 
 
 
 
 
 
 
81
 
82
+ buffer = ""
83
+ output_started = False
84
+ for new_text in streamer:
85
+ if not output_started:
86
+ if "<|assistant|>\n" in new_text:
87
+ output_started = True
88
+ continue
89
+ buffer += new_text
90
+ if len(buffer) > 1:
91
+ yield buffer
92
+
93
+ prompt_builder.add_turn(role="gpt", message=buffer)
94
+
95
+ return buffer
96
 
97
 
98
  demo = gr.ChatInterface(fn=bot_streaming,