Update app.py
Browse files
app.py
CHANGED
@@ -2,28 +2,45 @@ import os
|
|
2 |
import json
|
3 |
import urllib.request
|
4 |
from PIL import Image
|
5 |
-
from gtts import gTTS
|
6 |
import cv2
|
7 |
import moviepy.editor as mp
|
8 |
import logging
|
|
|
9 |
import uuid
|
10 |
import time
|
11 |
import gradio as gr
|
12 |
-
import
|
13 |
-
from
|
|
|
|
|
|
|
14 |
|
15 |
-
# Configure logging
|
16 |
log_dir = os.getenv('LOG_DIRECTORY', './')
|
17 |
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
format='[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
|
23 |
datefmt='%Y-%b-%d %H:%M:%S'
|
24 |
)
|
|
|
|
|
25 |
LOGGER = logging.getLogger(__name__)
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
log_level_env = os.getenv('LOG_LEVEL', 'INFO')
|
28 |
log_level_dict = {
|
29 |
'DEBUG': logging.DEBUG,
|
@@ -38,81 +55,74 @@ else:
|
|
38 |
log_level = log_level_dict['INFO']
|
39 |
LOGGER.setLevel(log_level)
|
40 |
|
|
|
|
|
|
|
41 |
|
42 |
class Text2Video:
|
43 |
"""A class to generate videos from text prompts."""
|
44 |
|
45 |
def __init__(self) -> None:
|
46 |
"""Initialize the Text2Video class."""
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def get_image(self, img_prompt: str) -> str:
|
50 |
-
"""
|
51 |
-
|
52 |
-
Args:
|
53 |
-
img_prompt (str): Text prompt for generating the image.
|
54 |
-
Returns:
|
55 |
-
str: URL of the generated image.
|
56 |
-
"""
|
57 |
try:
|
58 |
-
|
59 |
-
|
60 |
-
prompt=
|
61 |
-
|
62 |
-
provider=Provider.DeepAI
|
63 |
)
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
except Exception as e:
|
70 |
-
LOGGER.error(f"Error generating image: {e}")
|
71 |
return ""
|
72 |
|
73 |
def download_img_from_url(self, image_url: str, image_path: str) -> str:
|
74 |
-
"""
|
75 |
-
|
76 |
-
Args:
|
77 |
-
image_url (str): URL of the image to download.
|
78 |
-
image_path (str): Path to save the downloaded image.
|
79 |
-
Returns:
|
80 |
-
str: Path of the downloaded image.
|
81 |
-
"""
|
82 |
try:
|
83 |
urllib.request.urlretrieve(image_url, image_path)
|
|
|
|
|
|
|
|
|
|
|
84 |
return image_path
|
85 |
-
|
86 |
except Exception as e:
|
87 |
-
LOGGER.error(f"Error downloading image from URL: {e}")
|
88 |
return ""
|
89 |
|
90 |
-
def text_to_audio(self,
|
91 |
-
"""
|
92 |
-
|
93 |
-
Args:
|
94 |
-
img_prompt (str): Text to convert to speech.
|
95 |
-
audio_path (str): Path to save the audio file.
|
96 |
-
Returns:
|
97 |
-
str: Path of the saved audio file.
|
98 |
-
"""
|
99 |
try:
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
return audio_path
|
104 |
except Exception as e:
|
105 |
-
LOGGER.error(f"Error
|
106 |
return ""
|
107 |
|
|
|
|
|
108 |
def get_images_and_audio(self, list_prompts: list) -> tuple:
|
109 |
-
"""
|
110 |
-
|
111 |
-
Args:
|
112 |
-
list_prompts (list): List of text prompts.
|
113 |
-
Returns:
|
114 |
-
tuple: A tuple containing lists of image paths and audio paths.
|
115 |
-
"""
|
116 |
img_list = []
|
117 |
audio_paths = []
|
118 |
for img_prompt in list_prompts:
|
@@ -120,70 +130,108 @@ class Text2Video:
|
|
120 |
unique_id = uuid.uuid4().hex
|
121 |
image_path = f"{img_prompt[:9]}_{unique_id}.png"
|
122 |
img_url = self.get_image(img_prompt)
|
123 |
-
|
124 |
-
|
|
|
|
|
125 |
|
126 |
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
|
127 |
audio = self.text_to_audio(img_prompt, audio_path)
|
128 |
-
|
|
|
|
|
|
|
129 |
|
130 |
except Exception as e:
|
131 |
-
LOGGER.error(f"Error processing prompt: {img_prompt}, {e}")
|
132 |
|
133 |
return img_list, audio_paths
|
|
|
134 |
|
135 |
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
|
136 |
-
"""
|
137 |
-
|
138 |
-
Args:
|
139 |
-
image_files (list): List of image files.
|
140 |
-
audio_files (list): List of audio files.
|
141 |
-
output_path (str): Path to save the output video file.
|
142 |
-
"""
|
143 |
try:
|
144 |
if len(image_files) != len(audio_files):
|
145 |
-
LOGGER.error("Error: Number of images
|
146 |
return
|
147 |
|
148 |
video_clips = []
|
149 |
|
150 |
for image_file, audio_file in zip(image_files, audio_files):
|
151 |
-
|
|
|
|
|
|
|
|
|
152 |
audio_clip = mp.AudioFileClip(audio_file)
|
153 |
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
|
154 |
video_clip = video_clip.set_audio(audio_clip)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
video_clips.append(video_clip)
|
|
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
|
161 |
except Exception as e:
|
162 |
-
LOGGER.error(f"Error creating video: {e}")
|
163 |
|
164 |
-
def generate_video(self, text:
|
165 |
"""
|
166 |
Generate a video from a list of text prompts.
|
167 |
Args:
|
168 |
-
|
|
|
|
|
169 |
"""
|
|
|
170 |
try:
|
171 |
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
img_list, audio_paths = self.get_images_and_audio(list_prompts)
|
|
|
|
|
174 |
self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
|
|
|
175 |
return output_path
|
176 |
except Exception as e:
|
177 |
-
LOGGER.error(f"Error generating video: {e}")
|
|
|
178 |
|
179 |
def gradio_interface(self):
|
|
|
|
|
180 |
with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
|
181 |
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town.
|
182 |
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
|
183 |
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,"""
|
184 |
|
185 |
gr.HTML("""
|
186 |
-
|
187 |
|
188 |
with gr.Row(elem_id="col-container"):
|
189 |
input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated")
|
@@ -192,15 +240,18 @@ class Text2Video:
|
|
192 |
button = gr.Button("Generate Video")
|
193 |
|
194 |
with gr.Row(elem_id="col-container"):
|
195 |
-
output = gr.
|
196 |
|
197 |
with gr.Row(elem_id="col-container"):
|
198 |
example = gr.Examples([example_txt], input_text)
|
199 |
|
200 |
button.click(self.generate_video, [input_text], output)
|
201 |
-
|
|
|
|
|
202 |
|
203 |
|
204 |
if __name__ == "__main__":
|
|
|
205 |
text2video = Text2Video()
|
206 |
text2video.gradio_interface()
|
|
|
2 |
import json
|
3 |
import urllib.request
|
4 |
from PIL import Image
|
|
|
5 |
import cv2
|
6 |
import moviepy.editor as mp
|
7 |
import logging
|
8 |
+
import requests
|
9 |
import uuid
|
10 |
import time
|
11 |
import gradio as gr
|
12 |
+
from moviepy.editor import *
|
13 |
+
from moviepy.video.tools.subtitles import SubtitlesClip
|
14 |
+
from hercai import Hercai # Import the hercai library
|
15 |
+
from gtts import gTTS # Import gTTS for text-to-speech
|
16 |
+
from hercai import Hercai # Import the hercai module
|
17 |
|
18 |
+
# Configure logging for both file and console
|
19 |
log_dir = os.getenv('LOG_DIRECTORY', './')
|
20 |
LOGGER_FILE_PATH = os.path.join(str(log_dir), 'utils.log')
|
21 |
|
22 |
+
# Create a formatter
|
23 |
+
formatter = logging.Formatter(
|
24 |
+
'[%(asctime)s] [%(levelname)s] [%(filename)s] [%(lineno)s:%(funcName)s()] %(message)s',
|
|
|
25 |
datefmt='%Y-%b-%d %H:%M:%S'
|
26 |
)
|
27 |
+
|
28 |
+
# Create and configure the logger
|
29 |
LOGGER = logging.getLogger(__name__)
|
30 |
|
31 |
+
# Create file handler
|
32 |
+
file_handler = logging.FileHandler(LOGGER_FILE_PATH, mode='a')
|
33 |
+
file_handler.setFormatter(formatter)
|
34 |
+
|
35 |
+
# Create console handler
|
36 |
+
console_handler = logging.StreamHandler()
|
37 |
+
console_handler.setFormatter(formatter)
|
38 |
+
|
39 |
+
# Add both handlers to logger
|
40 |
+
LOGGER.addHandler(file_handler)
|
41 |
+
LOGGER.addHandler(console_handler)
|
42 |
+
|
43 |
+
# Set log level
|
44 |
log_level_env = os.getenv('LOG_LEVEL', 'INFO')
|
45 |
log_level_dict = {
|
46 |
'DEBUG': logging.DEBUG,
|
|
|
55 |
log_level = log_level_dict['INFO']
|
56 |
LOGGER.setLevel(log_level)
|
57 |
|
58 |
+
# Set the path to the ImageMagick binary
|
59 |
+
# os.environ["IMAGE_MAGICK_BINARY"] = "/usr/local/bin/convert" # Update this path as needed (No longer needed)
|
60 |
+
|
61 |
|
62 |
class Text2Video:
|
63 |
"""A class to generate videos from text prompts."""
|
64 |
|
65 |
def __init__(self) -> None:
|
66 |
"""Initialize the Text2Video class."""
|
67 |
+
self.hercai_api_key = "YOUR_HERCAI_API_KEY" # Replace with your Hercai API key
|
68 |
+
self.hercai = Hercai(self.hercai_api_key) # Initialize Hercai
|
69 |
+
self.hercai_base_url = "https://hercai.onrender.com/v3/text2image"
|
70 |
+
LOGGER.info("π Text2Video class initialized.")
|
71 |
+
|
72 |
+
def generate_text(self, prompt: str) -> str:
|
73 |
+
"""Generate text using GPT-3.5-turbo."""
|
74 |
+
LOGGER.info(f"π Generating text for prompt: {prompt}")
|
75 |
+
# ... (This part is not needed for Hercai) ...
|
76 |
+
return ""
|
77 |
|
78 |
def get_image(self, img_prompt: str) -> str:
|
79 |
+
"""Generate an image based on the provided text prompt."""
|
80 |
+
LOGGER.info(f"πΌοΈ Generating image for prompt: {img_prompt}")
|
|
|
|
|
|
|
|
|
|
|
81 |
try:
|
82 |
+
image_result = self.hercai.draw_image(
|
83 |
+
model="simurg", # Choose a Hercai model
|
84 |
+
prompt=img_prompt,
|
85 |
+
negative_prompt="Dark and gloomy"
|
|
|
86 |
)
|
87 |
+
image_url = image_result['url']
|
88 |
+
LOGGER.info(f"β
Generated image URL: {image_url}")
|
89 |
+
return image_url
|
90 |
+
except requests.exceptions.RequestException as e:
|
91 |
+
LOGGER.error(f"β Error generating image: {str(e)}")
|
|
|
|
|
92 |
return ""
|
93 |
|
94 |
def download_img_from_url(self, image_url: str, image_path: str) -> str:
|
95 |
+
"""Download an image from a URL."""
|
96 |
+
LOGGER.info(f"β¬οΈ Downloading image from URL: {image_url} to path: {image_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
try:
|
98 |
urllib.request.urlretrieve(image_url, image_path)
|
99 |
+
# Resize after downloading
|
100 |
+
img = Image.open(image_path)
|
101 |
+
img = img.resize((640, 480)) # Adjust resolution for reduced size
|
102 |
+
img.save(image_path)
|
103 |
+
LOGGER.info(f"β
Image downloaded to: {image_path}")
|
104 |
return image_path
|
|
|
105 |
except Exception as e:
|
106 |
+
LOGGER.error(f"β Error downloading image from URL: {e}")
|
107 |
return ""
|
108 |
|
109 |
+
def text_to_audio(self, text: str, audio_path: str) -> str:
|
110 |
+
"""Convert text to speech using gTTS."""
|
111 |
+
LOGGER.info(f"π Converting text to audio for text: {text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
try:
|
113 |
+
tts = gTTS(text=text, lang='en') # You can change the language ('en' for English)
|
114 |
+
tts.save(audio_path, bitrate="128k") # Reduce `bitrate` for smaller file size
|
115 |
+
LOGGER.info(f"β
Audio saved to: {audio_path}")
|
116 |
return audio_path
|
117 |
except Exception as e:
|
118 |
+
LOGGER.error(f"β Error generating speech: {str(e)}")
|
119 |
return ""
|
120 |
|
121 |
+
# The transcription part has been removed as it's no longer needed
|
122 |
+
|
123 |
def get_images_and_audio(self, list_prompts: list) -> tuple:
|
124 |
+
"""Generate images and corresponding audio files from a list of prompts."""
|
125 |
+
LOGGER.info(f"πΌοΈπ Generating images and audio for prompts: {list_prompts}")
|
|
|
|
|
|
|
|
|
|
|
126 |
img_list = []
|
127 |
audio_paths = []
|
128 |
for img_prompt in list_prompts:
|
|
|
130 |
unique_id = uuid.uuid4().hex
|
131 |
image_path = f"{img_prompt[:9]}_{unique_id}.png"
|
132 |
img_url = self.get_image(img_prompt)
|
133 |
+
|
134 |
+
if img_url: # Only process if img_url exists & is not None
|
135 |
+
image = self.download_img_from_url(img_url, image_path)
|
136 |
+
img_list.append(image)
|
137 |
|
138 |
audio_path = f"{img_prompt[:9]}_{unique_id}.mp3"
|
139 |
audio = self.text_to_audio(img_prompt, audio_path)
|
140 |
+
|
141 |
+
if audio: # Only process if audio exists
|
142 |
+
audio_paths.append(audio)
|
143 |
+
LOGGER.info(f"β
Processed prompt: {img_prompt}, Image: {image}, Audio: {audio}")
|
144 |
|
145 |
except Exception as e:
|
146 |
+
LOGGER.error(f"β Error processing prompt: {img_prompt}, {e}")
|
147 |
|
148 |
return img_list, audio_paths
|
149 |
+
|
150 |
|
151 |
def create_video_from_images_and_audio(self, image_files: list, audio_files: list, output_path: str) -> None:
|
152 |
+
"""Create a video from images and corresponding audio files with pop-up bubbles."""
|
153 |
+
LOGGER.info(f"π₯ Creating video from images: {image_files}, audio files: {audio_files}")
|
|
|
|
|
|
|
|
|
|
|
154 |
try:
|
155 |
if len(image_files) != len(audio_files):
|
156 |
+
LOGGER.error("β Error: Number of images and audio files don't match.")
|
157 |
return
|
158 |
|
159 |
video_clips = []
|
160 |
|
161 |
for image_file, audio_file in zip(image_files, audio_files):
|
162 |
+
# Create the base video from the image and audio
|
163 |
+
if not os.path.exists(audio_file) or not os.path.exists(image_file):
|
164 |
+
LOGGER.error(f"β Audio or image file missing for {image_file} or {audio_file}")
|
165 |
+
continue
|
166 |
+
|
167 |
audio_clip = mp.AudioFileClip(audio_file)
|
168 |
video_clip = mp.ImageClip(image_file).set_duration(audio_clip.duration)
|
169 |
video_clip = video_clip.set_audio(audio_clip)
|
170 |
+
|
171 |
+
# Apply image cropping and bubble creation
|
172 |
+
try:
|
173 |
+
img = Image.open(image_file)
|
174 |
+
width, height = img.size
|
175 |
+
cropped_image = img.crop((0, 0, int(width * 0.80), height)) # Cropping image to make space for bubble
|
176 |
+
|
177 |
+
bubble_clip = mp.ImageClip(image_file).resize((150, 150))
|
178 |
+
bubble_clip = bubble_clip.set_position((int(width * 0.90), 0))
|
179 |
+
|
180 |
+
# Now combine the original video with the bubble
|
181 |
+
video_clip = CompositeVideoClip([video_clip, bubble_clip])
|
182 |
+
except Exception as ex:
|
183 |
+
LOGGER.error(f"β Error adding comic bubble: {str(ex)}")
|
184 |
+
|
185 |
video_clips.append(video_clip)
|
186 |
+
LOGGER.info(f"β
Created video clip for image: {image_file}, audio: {audio_file}")
|
187 |
|
188 |
+
# Combine the generated clips into a single video
|
189 |
+
if video_clips: # Only process if clip exists
|
190 |
+
final_clip = mp.concatenate_videoclips(video_clips)
|
191 |
+
final_clip.write_videofile(output_path, codec='libx264', fps=24) # Optimized codec and fps settings
|
192 |
+
LOGGER.info(f"β
Video created successfully at: {output_path}")
|
193 |
|
194 |
except Exception as e:
|
195 |
+
LOGGER.error(f"β Error creating video: {str(e)}")
|
196 |
|
197 |
+
def generate_video(self, text: str) -> str:
|
198 |
"""
|
199 |
Generate a video from a list of text prompts.
|
200 |
Args:
|
201 |
+
text (str): Text prompts separated by double commas.
|
202 |
+
Returns:
|
203 |
+
str: Path to the generated video file.
|
204 |
"""
|
205 |
+
LOGGER.info(f"π¬ Generating video for text: {text}")
|
206 |
try:
|
207 |
list_prompts = [sentence.strip() for sentence in text.split(",,") if sentence.strip()]
|
208 |
+
LOGGER.info(f"π List of prompts: {list_prompts}")
|
209 |
+
|
210 |
+
# Set the output path for the generated video
|
211 |
+
output_path = f"output_video_{uuid.uuid4().hex[:8]}.mp4"
|
212 |
+
LOGGER.info(f"π Output path for video: {output_path}")
|
213 |
+
|
214 |
+
# Generate images and audio
|
215 |
img_list, audio_paths = self.get_images_and_audio(list_prompts)
|
216 |
+
|
217 |
+
# Create video from images and audio
|
218 |
self.create_video_from_images_and_audio(img_list, audio_paths, output_path)
|
219 |
+
|
220 |
return output_path
|
221 |
except Exception as e:
|
222 |
+
LOGGER.error(f"β Error generating video: {str(e)}")
|
223 |
+
return ""
|
224 |
|
225 |
def gradio_interface(self):
|
226 |
+
"""Create and launch the Gradio interface."""
|
227 |
+
LOGGER.info("π Launching Gradio interface.")
|
228 |
with gr.Blocks(css="style.css", theme='abidlabs/dracula_revamped') as demo:
|
229 |
example_txt = """once upon a time there was a village. It was a nice place to live, except for one thing. people did not like to share.,, One day a visitor came to town.
|
230 |
'Hello. Does anybody have food to share?' He asked. 'No', said everyone.,,
|
231 |
That's okay', said the visitor. 'I will make stone soup for everyone'.Then he took a stone and dropped it into a giant pot,,"""
|
232 |
|
233 |
gr.HTML("""
|
234 |
+
<center><h1 style="color:#fff">Comics Video Generator</h1></center>""")
|
235 |
|
236 |
with gr.Row(elem_id="col-container"):
|
237 |
input_text = gr.Textbox(label="Comics Text", placeholder="Enter the comics by double comma separated")
|
|
|
240 |
button = gr.Button("Generate Video")
|
241 |
|
242 |
with gr.Row(elem_id="col-container"):
|
243 |
+
output = gr.Video()
|
244 |
|
245 |
with gr.Row(elem_id="col-container"):
|
246 |
example = gr.Examples([example_txt], input_text)
|
247 |
|
248 |
button.click(self.generate_video, [input_text], output)
|
249 |
+
|
250 |
+
demo.launch(debug=True)
|
251 |
+
LOGGER.info("β
Gradio interface launched.")
|
252 |
|
253 |
|
254 |
if __name__ == "__main__":
|
255 |
+
LOGGER.info("π Starting Text2Video application.")
|
256 |
text2video = Text2Video()
|
257 |
text2video.gradio_interface()
|