Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .gitattributes +3 -35
- README.md +32 -12
- audio/test.wav +3 -0
- config.json +21 -0
- frames/closed_mouth.png +0 -0
- frames/closed_mouth_blinking.png +0 -0
- frames/open_mouth.png +0 -0
- frames/open_mouth_blinking.png +0 -0
- main.py +115 -0
- output/test.mp4 +0 -0
- requirements.txt +5 -0
- run.bat +1 -0
.gitattributes
CHANGED
@@ -1,35 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
+
# Auto detect text files and perform LF normalization
|
2 |
+
* text=auto
|
3 |
+
audio/test.wav filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -1,12 +1,32 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AudioMouth
|
2 |
+
|
3 |
+
AudioMouth is a simple Python app that generates animated videos by syncing mouth movements with audio decibel levels. It processes an audio file and switches between images (open and closed mouth) to create a lip-sync effect.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
- Syncs mouth images to audio based on decibel levels.
|
7 |
+
- Custom FPS.
|
8 |
+
- Outputs video with green screen background (or a custom color) for chroma keying.
|
9 |
+
|
10 |
+
## Installation
|
11 |
+
Git clone the repository and install the required dependencies. You can do this by opening the command line in the AudioMouth folder and running:
|
12 |
+
|
13 |
+
```bash
|
14 |
+
git clone https://github.com/luisesantillan/AudioMouth
|
15 |
+
cd AudioMouth
|
16 |
+
pip install -r requirements.txt
|
17 |
+
```
|
18 |
+
## Usage
|
19 |
+
Add 1-4 images in the frames folder and modify the paths in the config.json to use the images you want.
|
20 |
+
Put the audios into the audio folder. It will create as many animations as there are audios.
|
21 |
+
|
22 |
+
closed_mouth | closed_mouth_blinking | open_mouth | open_mouth_blinking
|
23 |
+
:-------------------------:|:-------------------------:|:-------------------------:|:-------------------------:
|
24 |
+
![closed_mouth](https://github.com/user-attachments/assets/3ed0c597-df0e-4165-98d4-cf978e1338bb) | ![closed_mouth_blinking](https://github.com/user-attachments/assets/1296c2a7-4304-4935-b398-4ee5e1fe8a10) | ![open_mouth](https://github.com/user-attachments/assets/4715a73a-1a27-4ac9-a20b-954dde0aac0b) | ![open_mouth_blinking](https://github.com/user-attachments/assets/b7d04648-9158-4dd2-889c-27c67a64e0b2)
|
25 |
+
|
26 |
+
If you're on Windows, now you can open run.bat and the output will be saved in the output folder.
|
27 |
+
If you're on Linux, simply run the main.py file.
|
28 |
+
|
29 |
+
https://github.com/user-attachments/assets/dcf3728c-0d3b-455d-b17e-5e9819be069b
|
30 |
+
|
31 |
+
|
32 |
+
|
audio/test.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1988fd65b06aef6657441ab017d4cf40f86ca36005bb1df3a26b3c7ad6628dd8
|
3 |
+
size 1269936
|
config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"background_color": [0, 255, 0],
|
3 |
+
"frame_paths": {
|
4 |
+
"closed_mouth": "frames/closed_mouth.png",
|
5 |
+
"open_mouth": "frames/open_mouth.png",
|
6 |
+
"closed_mouth_blinking": "frames/closed_mouth_blinking.png",
|
7 |
+
"open_mouth_blinking": "frames/open_mouth_blinking.png"
|
8 |
+
},
|
9 |
+
"output_path":"output",
|
10 |
+
"frame_duration_ms":1000,
|
11 |
+
"audio_path": "audio",
|
12 |
+
"blink_duration": 0.15,
|
13 |
+
"minimum_blinking_delay":2,
|
14 |
+
"maximum_blinking_delay": 5,
|
15 |
+
"initial_blink_time": -2,
|
16 |
+
"frame_rate": 24,
|
17 |
+
"dynamic_threshold": 1,
|
18 |
+
"decibel_threshold": -30.0,
|
19 |
+
"codec": "libx264",
|
20 |
+
"audio_codec": "aac"
|
21 |
+
}
|
frames/closed_mouth.png
ADDED
frames/closed_mouth_blinking.png
ADDED
frames/open_mouth.png
ADDED
frames/open_mouth_blinking.png
ADDED
main.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, random, json
|
2 |
+
import numpy as np
|
3 |
+
from pydub import AudioSegment
|
4 |
+
from pydub.utils import make_chunks
|
5 |
+
from pydub.effects import compress_dynamic_range
|
6 |
+
from PIL import Image
|
7 |
+
import cv2
|
8 |
+
from moviepy.editor import VideoClip, AudioFileClip
|
9 |
+
|
10 |
+
# Load configuration
|
11 |
+
with open('config.json', 'r') as config_file:
|
12 |
+
config = json.load(config_file)
|
13 |
+
|
14 |
+
# Load the images
|
15 |
+
closed_mouth_img = Image.open(config['frame_paths']['closed_mouth'])
|
16 |
+
open_mouth_img = Image.open(config['frame_paths']['open_mouth'])
|
17 |
+
closed_mouth_blinking_img = Image.open(config['frame_paths']['closed_mouth_blinking'])
|
18 |
+
open_mouth_blinking_img = Image.open(config['frame_paths']['open_mouth_blinking'])
|
19 |
+
|
20 |
+
# Create a background with the color from config
|
21 |
+
background_color = tuple(config['background_color'])
|
22 |
+
background = Image.new('RGBA', closed_mouth_img.size, background_color)
|
23 |
+
|
24 |
+
# Composite the images with the background
|
25 |
+
closed_mouth_img = Image.alpha_composite(background, closed_mouth_img)
|
26 |
+
open_mouth_img = Image.alpha_composite(background, open_mouth_img)
|
27 |
+
closed_mouth_blinking_img = Image.alpha_composite(background, closed_mouth_blinking_img)
|
28 |
+
open_mouth_blinking_img = Image.alpha_composite(background, open_mouth_blinking_img)
|
29 |
+
|
30 |
+
# Convert images to OpenCV format
|
31 |
+
closed_mouth_cv = cv2.cvtColor(np.array(closed_mouth_img), cv2.COLOR_RGBA2RGB)
|
32 |
+
open_mouth_cv = cv2.cvtColor(np.array(open_mouth_img), cv2.COLOR_RGBA2RGB)
|
33 |
+
closed_mouth_blinking_cv = cv2.cvtColor(np.array(closed_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
|
34 |
+
open_mouth_blinking_cv = cv2.cvtColor(np.array(open_mouth_blinking_img), cv2.COLOR_RGBA2RGB)
|
35 |
+
|
36 |
+
# Decide whether to blink
|
37 |
+
def should_blink(t, last_blink_time):
|
38 |
+
if t - last_blink_time > random.uniform(config['minimum_blinking_delay'],config['maximum_blinking_delay']):
|
39 |
+
return True
|
40 |
+
return False
|
41 |
+
|
42 |
+
blink_duration = config['blink_duration']
|
43 |
+
last_blink_time = config['initial_blink_time']
|
44 |
+
|
45 |
+
# Set parameters
|
46 |
+
frame_rate = config['frame_rate']
|
47 |
+
frame_duration_ms = config['frame_duration_ms'] // frame_rate
|
48 |
+
|
49 |
+
for audio_file in os.listdir(config['audio_path']):
|
50 |
+
# Load the audio
|
51 |
+
audio_path = os.path.join(config['audio_path'], audio_file)
|
52 |
+
audio = AudioSegment.from_file(audio_path)
|
53 |
+
|
54 |
+
# Apply compression
|
55 |
+
compressed_audio = compress_dynamic_range(audio, threshold=-20.0, ratio=8.0, attack=1.0, release=10.0)
|
56 |
+
|
57 |
+
# Normalize audio
|
58 |
+
target_dBFS = -10.0
|
59 |
+
change_in_dBFS = target_dBFS - compressed_audio.dBFS
|
60 |
+
normalized_audio = compressed_audio.apply_gain(change_in_dBFS)
|
61 |
+
|
62 |
+
# Split the audio into chunks of the same duration as the frames
|
63 |
+
audio_chunks = make_chunks(normalized_audio, frame_duration_ms)
|
64 |
+
|
65 |
+
# Function to calculate decibels of a chunk
|
66 |
+
def calculate_decibels(chunk):
|
67 |
+
return chunk.dBFS
|
68 |
+
|
69 |
+
# Decide whether to use dynamic threshold or a fixed threshold
|
70 |
+
if config["dynamic_threshold"] == 1:
|
71 |
+
# Calculate average decibels
|
72 |
+
average_dBFS = sum(chunk.dBFS for chunk in audio_chunks) / len(audio_chunks)
|
73 |
+
decibel_threshold = average_dBFS + 4 # Set threshold above average
|
74 |
+
else:
|
75 |
+
decibel_threshold = config['decibel_threshold']
|
76 |
+
|
77 |
+
# Function to generate frames
|
78 |
+
def make_frame(t):
|
79 |
+
global last_blink_time
|
80 |
+
frame_index = int(t * frame_rate)
|
81 |
+
|
82 |
+
if should_blink(t, last_blink_time):
|
83 |
+
last_blink_time = t
|
84 |
+
|
85 |
+
if 0 <= (t - last_blink_time) <= blink_duration:
|
86 |
+
if frame_index < len(audio_chunks):
|
87 |
+
chunk = audio_chunks[frame_index]
|
88 |
+
decibels = calculate_decibels(chunk)
|
89 |
+
|
90 |
+
return open_mouth_blinking_cv if decibels > decibel_threshold else closed_mouth_blinking_cv
|
91 |
+
else:
|
92 |
+
return closed_mouth_blinking_cv
|
93 |
+
|
94 |
+
if frame_index < len(audio_chunks):
|
95 |
+
chunk = audio_chunks[frame_index]
|
96 |
+
decibels = calculate_decibels(chunk)
|
97 |
+
|
98 |
+
return open_mouth_cv if decibels > decibel_threshold else closed_mouth_cv
|
99 |
+
else:
|
100 |
+
return closed_mouth_cv
|
101 |
+
|
102 |
+
# Create a video clip
|
103 |
+
video_clip = VideoClip(make_frame, duration=len(audio_chunks) / frame_rate)
|
104 |
+
|
105 |
+
# Load the audio
|
106 |
+
audio_clip = AudioFileClip(audio_path)
|
107 |
+
|
108 |
+
# Set the audio of the video to the loaded audio
|
109 |
+
video_with_audio = video_clip.set_audio(audio_clip)
|
110 |
+
|
111 |
+
# Write the final video with audio
|
112 |
+
output_video_path = os.path.join(config['output_path'], f"{audio_file.split('.')[0]}.mp4")
|
113 |
+
video_with_audio.write_videofile(output_video_path, fps=frame_rate, codec=config['codec'], audio_codec=config["audio_codec"])
|
114 |
+
|
115 |
+
print("Animation created successfully!")
|
output/test.mp4
ADDED
Binary file (233 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pydub
|
2 |
+
opencv-python
|
3 |
+
numpy
|
4 |
+
pillow
|
5 |
+
moviepy
|
run.bat
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
python main.py
|