Spaces:
Running
Running
import torch | |
from PIL import Image | |
import os | |
from diffusers import StableVideoDiffusionPipeline | |
from .tdd_svd_scheduler import TDDSVDStochasticIterativeScheduler | |
from .utils import load_lora_weights, save_video | |
from glob import glob | |
from typing import Optional | |
# Define paths and device | |
svd_path = 'stabilityai/stable-video-diffusion-img2vid-xt-1-1' | |
lora_repo_path = 'RED-AIGC/TDD' | |
lora_weight_name = 'svd-xt-1-1_tdd_lora_weights.safetensors' | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Initialize the noise scheduler and pipeline | |
noise_scheduler = TDDSVDStochasticIterativeScheduler( | |
num_train_timesteps=250, sigma_min=0.002, sigma_max=700.0, | |
sigma_data=1.0, s_noise=1.0, rho=7, clip_denoised=False | |
) | |
pipeline = StableVideoDiffusionPipeline.from_pretrained( | |
svd_path, scheduler=noise_scheduler, torch_dtype=torch.float32 | |
).to(device) | |
load_lora_weights(pipeline.unet, lora_repo_path, weight_name=lora_weight_name) | |
# Video function definition | |
def Video( | |
image: Image, | |
seed: Optional[int] = 1, | |
randomize_seed: bool = False, | |
num_inference_steps: int = 4, | |
eta: float = 0.3, | |
min_guidance_scale: float = 1.0, | |
max_guidance_scale: float = 1.0, | |
fps: int = 7, | |
width: int = 512, | |
height: int = 512, | |
num_frames: int = 25, | |
motion_bucket_id: int = 127, | |
output_folder: str = "outputs_gradio", | |
): | |
# Set the eta value in the scheduler | |
pipeline.scheduler.set_eta(eta) | |
# Handle seed randomness | |
if randomize_seed: | |
seed = random.randint(0, 2**64 - 1) | |
generator = torch.manual_seed(seed) | |
# Ensure the image is converted to a format that the model can use | |
image = Image.fromarray(image) | |
os.makedirs(output_folder, exist_ok=True) | |
base_count = len(glob(os.path.join(output_folder, "*.mp4"))) | |
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4") | |
# Perform computation with appropriate dtype based on device | |
# if device == "cuda": | |
# # Use float16 for GPU | |
# with torch.autocast(device_type='cuda', dtype=torch.float16): | |
# frames = pipeline( | |
# image, height=height, width=width, | |
# num_inference_steps=num_inference_steps, | |
# min_guidance_scale=min_guidance_scale, | |
# max_guidance_scale=max_guidance_scale, | |
# num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id, | |
# generator=generator, | |
# ).frames[0] | |
# else: | |
# Use bfloat16 for CPU as it's supported in torch.autocast | |
# with torch.autocast(device_type='cpu', dtype=torch.bfloat16): | |
frames = pipeline( | |
image, height=height, width=width, | |
num_inference_steps=num_inference_steps, | |
min_guidance_scale=min_guidance_scale, | |
max_guidance_scale=max_guidance_scale, | |
num_frames=num_frames, fps=fps, motion_bucket_id=motion_bucket_id, | |
generator=generator, | |
).frames[0] | |
# Save the generated video | |
save_video(frames, video_path, fps=fps, quality=5.0) | |
torch.manual_seed(seed) | |
return video_path, seed |