File size: 5,320 Bytes
1e06115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import os
import random
import shutil
import tempfile
from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import (
    AudioFileClip,
    CompositeVideoClip,
    ImageClip,
    VideoFileClip,
    concatenate_videoclips,
    vfx,
)
from moviepy.video.tools.subtitles import SubtitlesClip
import tqdm

from sentence_transformers import SentenceTransformer, util

# Khởi tạo model sentence transformer
model = SentenceTransformer('all-MiniLM-L6-v2')

# Tăng số lượng ảnh lên 30
NUM_IMAGES = 30

def add_transitions(clips, transition_duration=1):
    """
    Thêm hiệu ứng chuyển cảnh giữa các clip.
    """
    final_clips = []
    for i, clip in enumerate(clips):
        start_time = i * (clip.duration - transition_duration)
        end_time = start_time + clip.duration

        if i > 0:
            # Tạo hiệu ứng fade in
            fade_in = clip.fx(vfx.fadeout, duration=transition_duration)
            fade_in = fade_in.set_start(start_time)
            final_clips.append(fade_in)

        if i < len(clips) - 1:
            # Tạo hiệu ứng fade out
            fade_out = clip.fx(vfx.fadein, duration=transition_duration)
            fade_out = fade_out.set_end(end_time)
            final_clips.append(fade_out)

        # Thêm clip gốc
        final_clips.append(clip.set_start(start_time).set_end(end_time))

    return CompositeVideoClip(final_clips)

def create_video(sentences, audio_files, video_files, output_path="output_video.mp4"):
    """
    Tạo video từ các câu, file âm thanh và file video.
    """
    clips = []
    for sentence, audio_path, video_path in tqdm.tqdm(zip(sentences, audio_files, video_files), desc="Tạo video"):
        audio = AudioFileClip(audio_path)
        video = VideoFileClip(video_path).set_duration(audio.duration)
        video = video.set_audio(audio)
        clips.append(video)

    final_video = concatenate_videoclips(clips, method="compose")
    final_video.write_videofile(output_path, fps=24)
    print(f"Đã tạo video: {output_path}")
    return output_path

def process_images_parallel(image_patch, clip_duration):
    """
    Xử lý song song các hình ảnh.
    """
    with ThreadPoolExecutor() as executor:
        futures = []
        for content, image_path in image_patch:
            if image_path:
                future = executor.submit(ImageClip, image_path)
                futures.append((future, clip_duration))

        clips = []
        for future, duration in futures:
            clip = future.result().set_duration(duration)
            clips.append(clip)

    return clips

def process_script_for_video(script, dataset_path, use_dataset):
    """
    Xử lý script để tạo video.
    """
    sentences = extract_key_contents(script)
    return sentences

def create_video_func(script, audio_path, dataset_path, use_dataset):
    """
    Hàm chính để tạo video.
    """
    sentences = process_script_for_video(script, dataset_path, use_dataset)

    # Tạo thư mục tạm thời để lưu các file âm thanh tách biệt
    temp_dir = tempfile.mkdtemp()

    # Tách file âm thanh thành các đoạn nhỏ
    audio_clips = split_audio(audio_path, len(sentences), temp_dir)

    # Lấy đường dẫn của các video từ dataset
    video_files = glob.glob(os.path.join(dataset_path, "*.mp4")) if use_dataset else []

    # Đảm bảo số lượng câu, âm thanh và video là bằng nhau
    min_length = min(len(sentences), len(audio_clips), len(video_files))
    sentences = sentences[:min_length]
    audio_clips = audio_clips[:min_length]
    video_files = video_files[:min_length]

    output_path = "output_video.mp4"
    create_video(sentences, audio_clips, video_files, output_path)

    # Xóa thư mục tạm thời
    shutil.rmtree(temp_dir)

    return output_path, output_path

def split_audio(audio_path, num_segments, output_dir):
    """
    Chia file âm thanh thành các đoạn nhỏ.
    """
    audio = AudioFileClip(audio_path)
    duration = audio.duration
    segment_duration = duration / num_segments

    audio_clips = []
    for i in range(num_segments):
        start = i * segment_duration
        end = (i + 1) * segment_duration
        segment = audio.subclip(start, end)
        output_path = os.path.join(output_dir, f"segment_{i}.mp3")
        segment.write_audiofile(output_path)
        audio_clips.append(output_path)

    return audio_clips

def find_matching_image(prompt, dataset_path, threshold=0.5):
    """
    Tìm kiếm hình ảnh phù hợp với prompt trong dataset.
    """
    prompt_embedding = model.encode(prompt, convert_to_tensor=True)
    best_match = None
    best_score = -1

    for filename in os.listdir(dataset_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(dataset_path, filename)
            image_name = os.path.splitext(filename)[0].replace('_', ' ')
            image_embedding = model.encode(image_name, convert_to_tensor=True)
            cosine_score = util.pytorch_cos_sim(prompt_embedding, image_embedding).item()
            if cosine_score > best_score and cosine_score >= threshold:
                best_score = cosine_score
                best_match = image_path
    return best_match