File size: 4,609 Bytes
9d86442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os, random, json
import numpy as np
from pydub import AudioSegment
from pydub.utils import make_chunks
from pydub.effects import compress_dynamic_range
from PIL import Image
import cv2
from moviepy.editor import VideoClip, AudioFileClip

# Load configuration
with open('config.json', 'r') as config_file:
    config = json.load(config_file)

# Load the images
closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])

# Create a background with the color from config
background_color = tuple(config['background_color'])
background = Image.new('RGBA', closed_mouth_img.size, background_color)

# Composite the images with the background
closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
open_mouth_img = Image.alpha_composite(background, open_mouth_img)
closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)

# Convert images to OpenCV format
closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)

# Decide whether to blink
def should_blink(t, last_blink_time):
    if t - last_blink_time > random.uniform(config['minimum_blinking_delay'],config['maximum_blinking_delay']):
        return True
    return False

blink_duration = config['blink_duration']
last_blink_time = config['initial_blink_time']

# Set parameters
frame_rate = config['frame_rate']
frame_duration_ms = config['frame_duration_ms'] // frame_rate

for audio_file in os.listdir(config['audio_path']):
    # Load the audio
    audio_path = os.path.join(config['audio_path'], audio_file)
    audio = AudioSegment.from_file(audio_path)

    # Apply compression
    compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
    
    # Normalize audio
    target_dBFS = -10.0
    change_in_dBFS = target_dBFS - compressed_audio.dBFS
    normalized_audio = compressed_audio.apply_gain(change_in_dBFS)

    # Split the audio into chunks of the same duration as the frames
    audio_chunks = make_chunks(normalized_audio, frame_duration_ms)

    # Function to calculate decibels of a chunk
    def calculate_decibels(chunk):
        return chunk.dBFS

    # Decide whether to use dynamic threshold or a fixed threshold
    if config["dynamic_threshold"] == 1:
        # Calculate average decibels
        average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
        decibel_threshold = average_dBFS + 4  # Set threshold above average
    else:
        decibel_threshold = config['decibel_threshold']
    
    # Function to generate frames
    def make_frame(t):
        global last_blink_time
        frame_index = int(t * frame_rate)

        if should_blink(t, last_blink_time):
            last_blink_time = t

        if 0 <= (t - last_blink_time) <= blink_duration:
            if frame_index < len(audio_chunks):
                chunk = audio_chunks[frame_index]
                decibels = calculate_decibels(chunk)
                
                return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
            else:
                return closed_mouth_blinking_cv
        
        if frame_index < len(audio_chunks):
            chunk = audio_chunks[frame_index]
            decibels = calculate_decibels(chunk)
            
            return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
        else:
            return closed_mouth_cv

    # Create a video clip
    video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)

    # Load the audio
    audio_clip = AudioFileClip(audio_path)

    # Set the audio of the video to the loaded audio
    video_with_audio = video_clip.set_audio(audio_clip)

    # Write the final video with audio
    output_video_path = os.path.join(config['output_path'], f"{audio_file.split('.')[0]}.mp4")
    video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])

print("Animation created successfully!")