File size: 5,320 Bytes
1e06115 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import os
import random
import shutil
import tempfile
from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import (
AudioFileClip,
CompositeVideoClip,
ImageClip,
VideoFileClip,
concatenate_videoclips,
vfx,
)
from moviepy.video.tools.subtitles import SubtitlesClip
import tqdm
from sentence_transformers import SentenceTransformer, util
# Khởi tạo model sentence transformer
model = SentenceTransformer('all-MiniLM-L6-v2')
# Tăng số lượng ảnh lên 30
NUM_IMAGES = 30
def add_transitions(clips, transition_duration=1):
"""
Thêm hiệu ứng chuyển cảnh giữa các clip.
"""
final_clips = []
for i, clip in enumerate(clips):
start_time = i * (clip.duration - transition_duration)
end_time = start_time + clip.duration
if i > 0:
# Tạo hiệu ứng fade in
fade_in = clip.fx(vfx.fadeout, duration=transition_duration)
fade_in = fade_in.set_start(start_time)
final_clips.append(fade_in)
if i < len(clips) - 1:
# Tạo hiệu ứng fade out
fade_out = clip.fx(vfx.fadein, duration=transition_duration)
fade_out = fade_out.set_end(end_time)
final_clips.append(fade_out)
# Thêm clip gốc
final_clips.append(clip.set_start(start_time).set_end(end_time))
return CompositeVideoClip(final_clips)
def create_video(sentences, audio_files, video_files, output_path="output_video.mp4"):
"""
Tạo video từ các câu, file âm thanh và file video.
"""
clips = []
for sentence, audio_path, video_path in tqdm.tqdm(zip(sentences, audio_files, video_files), desc="Tạo video"):
audio = AudioFileClip(audio_path)
video = VideoFileClip(video_path).set_duration(audio.duration)
video = video.set_audio(audio)
clips.append(video)
final_video = concatenate_videoclips(clips, method="compose")
final_video.write_videofile(output_path, fps=24)
print(f"Đã tạo video: {output_path}")
return output_path
def process_images_parallel(image_patch, clip_duration):
"""
Xử lý song song các hình ảnh.
"""
with ThreadPoolExecutor() as executor:
futures = []
for content, image_path in image_patch:
if image_path:
future = executor.submit(ImageClip, image_path)
futures.append((future, clip_duration))
clips = []
for future, duration in futures:
clip = future.result().set_duration(duration)
clips.append(clip)
return clips
def process_script_for_video(script, dataset_path, use_dataset):
"""
Xử lý script để tạo video.
"""
sentences = extract_key_contents(script)
return sentences
def create_video_func(script, audio_path, dataset_path, use_dataset):
"""
Hàm chính để tạo video.
"""
sentences = process_script_for_video(script, dataset_path, use_dataset)
# Tạo thư mục tạm thời để lưu các file âm thanh tách biệt
temp_dir = tempfile.mkdtemp()
# Tách file âm thanh thành các đoạn nhỏ
audio_clips = split_audio(audio_path, len(sentences), temp_dir)
# Lấy đường dẫn của các video từ dataset
video_files = glob.glob(os.path.join(dataset_path, "*.mp4")) if use_dataset else []
# Đảm bảo số lượng câu, âm thanh và video là bằng nhau
min_length = min(len(sentences), len(audio_clips), len(video_files))
sentences = sentences[:min_length]
audio_clips = audio_clips[:min_length]
video_files = video_files[:min_length]
output_path = "output_video.mp4"
create_video(sentences, audio_clips, video_files, output_path)
# Xóa thư mục tạm thời
shutil.rmtree(temp_dir)
return output_path, output_path
def split_audio(audio_path, num_segments, output_dir):
"""
Chia file âm thanh thành các đoạn nhỏ.
"""
audio = AudioFileClip(audio_path)
duration = audio.duration
segment_duration = duration / num_segments
audio_clips = []
for i in range(num_segments):
start = i * segment_duration
end = (i + 1) * segment_duration
segment = audio.subclip(start, end)
output_path = os.path.join(output_dir, f"segment_{i}.mp3")
segment.write_audiofile(output_path)
audio_clips.append(output_path)
return audio_clips
def find_matching_image(prompt, dataset_path, threshold=0.5):
"""
Tìm kiếm hình ảnh phù hợp với prompt trong dataset.
"""
prompt_embedding = model.encode(prompt, convert_to_tensor=True)
best_match = None
best_score = -1
for filename in os.listdir(dataset_path):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
image_path = os.path.join(dataset_path, filename)
image_name = os.path.splitext(filename)[0].replace('_', ' ')
image_embedding = model.encode(image_name, convert_to_tensor=True)
cosine_score = util.pytorch_cos_sim(prompt_embedding, image_embedding).item()
if cosine_score > best_score and cosine_score >= threshold:
best_score = cosine_score
best_match = image_path
return best_match |