import shutil import gradio as gr import torch from fastapi import FastAPI import os import tempfile from Infer import Infer title_markdown = ("""

Temporal-guided Mixture-of-Experts for Zero-Shot Video Question Answering

Under review.
""") block_css = """ #buttons button { min-width: min(120px,100%); } """ def save_video_to_local(video_path): filename = os.path.join('temp', next(tempfile._get_candidate_names()) + '.mp4') shutil.copyfile(video_path, filename) return filename def generate(video, textbox_in, candbox_in): video = video if video else "none" text_en_out = handler.generate(textbox_in, eval(candbox_in), video) print(text_en_out) textbox_out = text_en_out #torch.cuda.empty_cache() return textbox_out device = "cpu" handler = Infer(device) # handler.model.to(dtype=dtype) if not os.path.exists("temp"): os.makedirs("temp") #print(torch.cuda.memory_allocated()) #print(torch.cuda.max_memory_allocated()) question_box = gr.Textbox( show_label=False, placeholder="Enter question", container=False ) candidates_box = gr.Textbox( show_label=False, placeholder="Enter a list of options", container=False ) with gr.Blocks(title='T-MoENet', theme=gr.themes.Default(), css=block_css) as demo: gr.Markdown(title_markdown) state = gr.State() state_ = gr.State() first_run = gr.State() images_tensor = gr.State() with gr.Row(): with gr.Column(scale=3): video = gr.Video(label="Input Video") cur_dir = os.path.dirname(os.path.abspath(__file__)) print(cur_dir) gr.Examples( examples=[ [ cur_dir + "/videos/3249402410.mp4", "What did the lady in black on the left do after she finished spreading the sauce on her pizza?", "['slice the pizza', 'cut the meat', 'point', 'put cheese', 'put on plate']" ], [ cur_dir + "/videos/4882821564.mp4", "Why did the boy clap his hands when he ran to the christmas tree?", "['adjust the tree', 'get away the dust', 'dancing', 'pressed a button to activate', 'presents']" ], [ cur_dir + "/videos/6233408665.mp4", "What did the people on the sofa do after the lady in pink finished singing?", "['sitting', 'give it to the girl', 'take music sheet', 'clap', 'walk in circles']" ], ], inputs=[video, question_box, candidates_box], ) with gr.Column(scale=3): chatbot = gr.Chatbot(label="T-MoENet", bubble_full_width=True) with gr.Row(): with gr.Column(scale=4): question_box.render() with gr.Column(scale=4): candidates_box.render() with gr.Column(scale=1, min_width=50): submit_btn = gr.Button( value="Send", variant="primary", interactive=True ) submit_btn.click(generate, [video, question_box, candidates_box], [chatbot]) demo.launch(share=True)