Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,22 +17,9 @@ import re
|
|
17 |
|
18 |
# Download NLTK data during initialization
|
19 |
try:
|
|
|
|
|
20 |
nltk.download('punkt', quiet=True)
|
21 |
-
except Exception as e:
|
22 |
-
print(f"Warning: Failed to download NLTK data: {str(e)}")
|
23 |
-
print("Downloading from alternative source...")
|
24 |
-
try:
|
25 |
-
import ssl
|
26 |
-
try:
|
27 |
-
_create_unverified_https_context = ssl._create_unverified_context
|
28 |
-
except AttributeError:
|
29 |
-
pass
|
30 |
-
else:
|
31 |
-
ssl._create_default_https_context = _create_unverified_https_context
|
32 |
-
nltk.download('punkt', quiet=True)
|
33 |
-
except Exception as e:
|
34 |
-
print(f"Critical error downloading NLTK data: {str(e)}")
|
35 |
-
raise
|
36 |
|
37 |
# Configure logging
|
38 |
logging.basicConfig(
|
@@ -160,7 +147,7 @@ class TTSDatasetCollector:
|
|
160 |
|
161 |
try:
|
162 |
# Try NLTK first
|
163 |
-
self.sentences = nltk.sent_tokenize(text.strip()
|
164 |
except Exception as e:
|
165 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
166 |
# Fallback to simple splitting
|
@@ -207,7 +194,7 @@ class TTSDatasetCollector:
|
|
207 |
|
208 |
def get_styled_text(self, text: str) -> str:
|
209 |
"""Get text with current font styling"""
|
210 |
-
font_css = FONT_STYLES
|
211 |
return f"<div style='{font_css}'>{text}</div>"
|
212 |
|
213 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
@@ -532,18 +519,28 @@ def create_interface():
|
|
532 |
# Add font-face declarations
|
533 |
font_face_css = ""
|
534 |
for font_style, font_info in FONT_STYLES.items():
|
535 |
-
|
536 |
-
|
|
|
537 |
font_face_css += f"""
|
538 |
@font-face {{
|
539 |
font-family: '{font_info["family"]}';
|
540 |
-
src: url('
|
541 |
}}
|
542 |
"""
|
|
|
|
|
|
|
543 |
|
544 |
custom_css += font_face_css
|
545 |
|
546 |
with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
|
|
|
|
|
|
|
|
|
|
|
|
|
547 |
gr.Markdown("# TTS Dataset Collection Tool")
|
548 |
|
549 |
with gr.Row():
|
@@ -571,7 +568,7 @@ def create_interface():
|
|
571 |
elem_classes=["small-input"]
|
572 |
)
|
573 |
font_select = gr.Dropdown(
|
574 |
-
choices=list(FONT_STYLES.keys()),
|
575 |
value="english_serif",
|
576 |
label="Select Font Style",
|
577 |
elem_classes=["small-input"]
|
@@ -584,12 +581,6 @@ def create_interface():
|
|
584 |
)
|
585 |
add_font_btn = gr.Button("Add Custom Font")
|
586 |
|
587 |
-
status = gr.Textbox(
|
588 |
-
label="Status",
|
589 |
-
interactive=False,
|
590 |
-
max_lines=3
|
591 |
-
)
|
592 |
-
|
593 |
# Dataset Info
|
594 |
with gr.Accordion("Dataset Statistics", open=False):
|
595 |
dataset_info = gr.JSON(
|
@@ -757,8 +748,23 @@ def create_interface():
|
|
757 |
if not success:
|
758 |
return {status: f"❌ {msg}"}
|
759 |
# Update font dropdown
|
760 |
-
font_choices = list(FONT_STYLES.keys())
|
761 |
font_select.update(choices=font_choices)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
762 |
return {status: f"✅ {msg}"}
|
763 |
|
764 |
# Event handlers
|
|
|
17 |
|
18 |
# Download NLTK data during initialization
|
19 |
try:
|
20 |
+
nltk.data.find('tokenizers/punkt')
|
21 |
+
except LookupError:
|
22 |
nltk.download('punkt', quiet=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Configure logging
|
25 |
logging.basicConfig(
|
|
|
147 |
|
148 |
try:
|
149 |
# Try NLTK first
|
150 |
+
self.sentences = nltk.sent_tokenize(text.strip())
|
151 |
except Exception as e:
|
152 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
153 |
# Fallback to simple splitting
|
|
|
194 |
|
195 |
def get_styled_text(self, text: str) -> str:
|
196 |
"""Get text with current font styling"""
|
197 |
+
font_css = FONT_STYLES.get(self.current_font, {}).get('css', '')
|
198 |
return f"<div style='{font_css}'>{text}</div>"
|
199 |
|
200 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
|
|
519 |
# Add font-face declarations
|
520 |
font_face_css = ""
|
521 |
for font_style, font_info in FONT_STYLES.items():
|
522 |
+
font_file_name = font_info['family'] + '.ttf'
|
523 |
+
font_path = collector.fonts_path / font_file_name
|
524 |
+
if os.path.exists(font_path):
|
525 |
font_face_css += f"""
|
526 |
@font-face {{
|
527 |
font-family: '{font_info["family"]}';
|
528 |
+
src: url('file/{font_path}') format('truetype');
|
529 |
}}
|
530 |
"""
|
531 |
+
else:
|
532 |
+
# For system fonts like 'Arial' and 'Times New Roman', no need to specify src
|
533 |
+
pass
|
534 |
|
535 |
custom_css += font_face_css
|
536 |
|
537 |
with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
|
538 |
+
status = gr.Textbox(
|
539 |
+
label="Status",
|
540 |
+
interactive=False,
|
541 |
+
max_lines=3
|
542 |
+
)
|
543 |
+
|
544 |
gr.Markdown("# TTS Dataset Collection Tool")
|
545 |
|
546 |
with gr.Row():
|
|
|
568 |
elem_classes=["small-input"]
|
569 |
)
|
570 |
font_select = gr.Dropdown(
|
571 |
+
choices=list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys()),
|
572 |
value="english_serif",
|
573 |
label="Select Font Style",
|
574 |
elem_classes=["small-input"]
|
|
|
581 |
)
|
582 |
add_font_btn = gr.Button("Add Custom Font")
|
583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
584 |
# Dataset Info
|
585 |
with gr.Accordion("Dataset Statistics", open=False):
|
586 |
dataset_info = gr.JSON(
|
|
|
748 |
if not success:
|
749 |
return {status: f"❌ {msg}"}
|
750 |
# Update font dropdown
|
751 |
+
font_choices = list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys())
|
752 |
font_select.update(choices=font_choices)
|
753 |
+
# Rebuild CSS to include new font
|
754 |
+
font_face_css = ""
|
755 |
+
for font_style, font_info in FONT_STYLES.items():
|
756 |
+
if font_style in collector.custom_fonts:
|
757 |
+
font_file_name = font_info['family'] + '.ttf'
|
758 |
+
font_path = collector.fonts_path / font_file_name
|
759 |
+
if os.path.exists(font_path):
|
760 |
+
font_face_css += f"""
|
761 |
+
@font-face {{
|
762 |
+
font-family: '{font_info["family"]}';
|
763 |
+
src: url('file/{font_path}') format('truetype');
|
764 |
+
}}
|
765 |
+
"""
|
766 |
+
# Update the interface CSS
|
767 |
+
interface.set_css(custom_css + font_face_css)
|
768 |
return {status: f"✅ {msg}"}
|
769 |
|
770 |
# Event handlers
|