Spaces:

YuukiAsuna
/

Vintern-1B-v2-ViTable-docvqa

Running

MiyamizuMitsuha commited on Nov 18, 2024

Commit

479d45f

1 Parent(s): 33db9ac

Update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -118,31 +118,31 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, us
 def Vintern_1B_v2_ViTable_docvqa(image, question, chat_history=[]):
-  pixel_values = load_image(image, max_num=12).to(torch.bfloat16).cuda()
-  generation_config = dict(max_new_tokens= 1024, do_sample=False, num_beams = 3, repetition_penalty=2.0)
-  # question = input("Question: ")
-  question = '<image>\n' + question
-  response = model.chat(tokenizer, pixel_values, question, generation_config)
-  print(f'User: {question}\nAssistant: {response}')
-  print("="*30)
-  # Update the chat history
-  chat_history.append((image, None))
-  chat_history.append((question, None))
-  chat_history.append((None, response))
-  return chat_history
 interface = gr.Interface(
     fn=Vintern_1B_v2_ViTable_docvqa,
     inputs=[
-        gr.Image(label="Upload Image", type="filepath", optional=True),  # Image input
-        gr.Textbox(label="Enter your question", optional=True),          # Text input
     ],
     outputs=gr.Chatbot(label="Chat History"),                           # Chatbot-style output
     title="Vintern-1B-v2-ViTable-docvqa,",
@@ -150,6 +150,7 @@ interface = gr.Interface(
     allow_flagging="never",
 )
 # Launch the chatbot
 interface.launch()

 def Vintern_1B_v2_ViTable_docvqa(image, question, chat_history=[]):
+    pixel_values = load_image(image, max_num=12).to(torch.bfloat16).cuda()
+    generation_config = dict(max_new_tokens= 1024, do_sample=False, num_beams = 3, repetition_penalty=2.0)
+    # question = input("Question: ")
+    question = '<image>\n' + question
+    response = model.chat(tokenizer, pixel_values, question, generation_config)
+    print(f'User: {question}\nAssistant: {response}')
+    print("="*30)
+    # Update the chat history
+    chat_history.append((image, None))
+    chat_history.append((question, None))
+    chat_history.append((None, response))
+    return chat_history
 interface = gr.Interface(
     fn=Vintern_1B_v2_ViTable_docvqa,
     inputs=[
+        gr.Image(label="Upload Image", type="filepath"),  # Image input
+        gr.Textbox(label="Enter your question"),          # Text input
     ],
     outputs=gr.Chatbot(label="Chat History"),                           # Chatbot-style output
     title="Vintern-1B-v2-ViTable-docvqa,",
     allow_flagging="never",
 )
 # Launch the chatbot
 interface.launch()