Spaces:
Running
on
T4
Running
on
T4
from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline | |
from tuneavideo.models.unet import UNet3DConditionModel | |
from tuneavideo.util import save_videos_grid | |
import torch | |
import gradio as gr | |
from bs4 import BeautifulSoup | |
import requests | |
def model_url_list(): | |
url_list = [] | |
for i in range(0, 5): | |
url_list.append(f"https://huggingface.co/models?other=stable-diffusion&p={i}&sort=downloads") | |
return url_list | |
def data_scraping(url_list): | |
model_list = [] | |
for url in url_list: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, "html.parser") | |
div_class = 'grid gap-5 grid-cols-1 2xl:grid-cols-2' | |
div = soup.find('div', {'class': div_class}) | |
for a in div.find_all('a', href=True): | |
model_list.append(a['href']) | |
return model_list | |
model_list = data_scraping(model_url_list()) | |
for i in range(len(model_list)): | |
model_list[i] = model_list[i][1:] | |
best_model_list = [ | |
"runwayml/stable-diffusion-v1-5", | |
"CompVis/stable-diffusion-v1-4", | |
"prompthero/openjourney", | |
"dreamlike-art/dreamlike-photoreal-2.0", | |
"dreamlike-art/dreamlike-diffusion-1.0", | |
"sd-dreambooth-library/mr-potato-head", | |
"sd-dreambooth-library/disco-diffusion-style" | |
] | |
model_list = best_model_list + model_list | |
def tune_video_predict( | |
pipe_id: str, | |
prompt: str, | |
video_length: int, | |
height: int, | |
width: int, | |
num_inference_steps: int, | |
guidance_scale: float, | |
): | |
unet = UNet3DConditionModel.from_pretrained("Tune-A-Video-library/a-man-is-surfing", subfolder='unet', torch_dtype=torch.float16).to('cuda') | |
pipe = TuneAVideoPipeline.from_pretrained(pipe_id, unet=unet, torch_dtype=torch.float16).to("cuda") | |
video = pipe(prompt, video_length=video_length, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).videos | |
output_path = save_videos_grid(video, save_path='output', path=f"{prompt}.gif") | |
return output_path | |
demo_inputs = [ | |
gr.Dropdown( | |
label="Model", | |
choices=model_list, | |
value="CompVis/stable-diffusion-v1-4", | |
), | |
gr.inputs.Textbox( | |
label="Prompt", | |
default='a flower blooming' | |
), | |
gr.inputs.Slider( | |
label="Video Length", | |
minimum=1, | |
maximum=50, | |
default=8, | |
step=1, | |
), | |
gr.inputs.Slider( | |
label="Height", | |
minimum=128, | |
maximum=1280, | |
default=416, | |
step=32, | |
), | |
gr.inputs.Slider( | |
label="Width", | |
minimum=128, | |
maximum=1280, | |
default=416, | |
step=32, | |
), | |
gr.inputs.Slider( | |
label="Num Inference Steps", | |
minimum=1, | |
maximum=100, | |
default=50, | |
step=1, | |
), | |
gr.inputs.Slider( | |
label="Guidance Scale", | |
minimum=0.0, | |
maximum=100, | |
default=7.5, | |
step=0.5, | |
) | |
] | |
demo_outputs = gr.outputs.Video(type="gif", label="Output") | |
examples = [ | |
["CompVis/stable-diffusion-v1-4", "a panda is surfing", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/disco-diffusion-style", "ddfusion style on the church", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/nasa-space-v2-768", "nasa style galaxy moving", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/mr-potato-head", "sks mr potato head, wearing a pink hat, is surfing.", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/mr-potato-head", "sks mr potato head is surfing in the forest.", 5, 416, 416, 50, 7.5], | |
] | |
description = "This is an application that generates video based on a text prompt. To get started, simply input text. The default model in the dropdown is a generic model that you can generate anything. Alternatively, for more photorealistic generations, you can use other models in the dropdown. These models are Dreambooth models, and they're trained with a specific object name, so make sure you know what the object is called. You can find an example prompt for a dreambooth model in Examples section right below the interface." | |
title = "Tune-A-Video: One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation" | |
demo_app = gr.Interface( | |
fn=tune_video_predict, | |
inputs=demo_inputs, | |
outputs=demo_outputs, | |
examples=examples, | |
cache_examples=True, | |
title=title, | |
theme="huggingface", | |
description=description | |
) | |
demo_app.launch(debug=True, enable_queue=True) | |