File size: 5,226 Bytes
513e1fb 3a81605 513e1fb 3a81605 1320b66 3a81605 513e1fb 544e91d 513e1fb 3a81605 513e1fb 1320b66 3a81605 1320b66 3a81605 513e1fb 1320b66 513e1fb 1320b66 513e1fb 1320b66 513e1fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import shutil
import gradio as gr
import torch
import os
import tempfile
from Infer import Infer
title_markdown = ("""
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<div>
<h1 >Temporal-guided Mixture-of-Experts for Zero-Shot Video Question Answering</h1>
<h5 style="margin: 0;">Under review.</h5>
</div>
</div>
<div align="center">
<div style="display:flex; gap: 0.25rem;" align="center">
<a href='https://github.com/qyx1121/T-MoENet'><img src='https://img.shields.io/badge/Github-Code-blue'></a>
</div>
</div>
""")
block_css = """
#buttons button {
min-width: min(120px,100%);
}
"""
def save_video_to_local(video_path):
filename = os.path.join('temp', next(tempfile._get_candidate_names()) + '.mp4')
shutil.copyfile(video_path, filename)
return filename
def generate(video, textbox_in, candbox_in):
video = video if video else "none"
text_en_out = handler.generate(textbox_in, eval(candbox_in), video)
textbox_out = text_en_out
#torch.cuda.empty_cache()
print(textbox_out)
return textbox_out
device = "cpu"
handler = Infer(device)
# handler.model.to(dtype=dtype)
if not os.path.exists("temp"):
os.makedirs("temp")
#print(torch.cuda.memory_allocated())
#print(torch.cuda.max_memory_allocated())
video = gr.Video(label="Input Video")
question_box = gr.Textbox(
show_label=False, placeholder="Enter question", container=False)
candidates_box = gr.Textbox(
show_label=False, placeholder="Enter a list of options", container=False
)
with gr.Blocks(title='T-MoENet', theme=gr.themes.Default(), css=block_css) as demo:
gr.Markdown(title_markdown)
state = gr.State()
state_ = gr.State()
first_run = gr.State()
images_tensor = gr.State()
cur_dir = os.path.dirname(os.path.abspath(__file__))
with gr.Column():
with gr.Column(scale=3):
gr.Interface(
generate,
[video, question_box, candidates_box],
["text"]
)
with gr.Column(scale=3):
gr.Examples(
examples=[
[
cur_dir + "/videos/3249402410.mp4",
"What did the lady in black on the left do after she finished spreading the sauce on her pizza?",
"['slice the pizza', 'cut the meat', 'point', 'put cheese', 'put on plate']"
],
[
cur_dir + "/videos/4882821564.mp4",
"Why did the boy clap his hands when he ran to the christmas tree?",
"['adjust the tree', 'get away the dust', 'dancing', 'pressed a button to activate', 'presents']"
],
[
cur_dir + "/videos/6233408665.mp4",
"What did the people on the sofa do after the lady in pink finished singing?",
"['sitting', 'give it to the girl', 'take music sheet', 'clap', 'walk in circles']"
],
],
inputs=[video, question_box, candidates_box]
)
# with gr.Row():
# with gr.Column(scale=3):
# video = gr.Video(label="Input Video")
# cur_dir = os.path.dirname(os.path.abspath(__file__))
# print(cur_dir)
# gr.Examples(
# examples=[
# [
# cur_dir + "/videos/3249402410.mp4",
# "What did the lady in black on the left do after she finished spreading the sauce on her pizza?",
# "['slice the pizza', 'cut the meat', 'point', 'put cheese', 'put on plate']"
# ],
# [
# cur_dir + "/videos/4882821564.mp4",
# "Why did the boy clap his hands when he ran to the christmas tree?",
# "['adjust the tree', 'get away the dust', 'dancing', 'pressed a button to activate', 'presents']"
# ],
# [
# cur_dir + "/videos/6233408665.mp4",
# "What did the people on the sofa do after the lady in pink finished singing?",
# "['sitting', 'give it to the girl', 'take music sheet', 'clap', 'walk in circles']"
# ],
# ],
# inputs=[video, question_box, candidates_box],
# )
# with gr.Column(scale=3):
# with gr.Row():
# with gr.Column(scale=4):
# question_box.render()
# with gr.Column(scale=4):
# candidates_box.render()
# with gr.Column(scale=1, min_width=50):
# submit_btn = gr.Button(
# value="Send", variant="primary", interactive=True
# )
#submit_btn.click(generate, [video, question_box, candidates_box], [chatbot])
demo.launch(share=True)
|