Omarrran commited on
Commit
e708838
·
verified ·
1 Parent(s): 346b8d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -28
app.py CHANGED
@@ -17,22 +17,9 @@ import re
17
 
18
  # Download NLTK data during initialization
19
  try:
 
 
20
  nltk.download('punkt', quiet=True)
21
- except Exception as e:
22
- print(f"Warning: Failed to download NLTK data: {str(e)}")
23
- print("Downloading from alternative source...")
24
- try:
25
- import ssl
26
- try:
27
- _create_unverified_https_context = ssl._create_unverified_context
28
- except AttributeError:
29
- pass
30
- else:
31
- ssl._create_default_https_context = _create_unverified_https_context
32
- nltk.download('punkt', quiet=True)
33
- except Exception as e:
34
- print(f"Critical error downloading NLTK data: {str(e)}")
35
- raise
36
 
37
  # Configure logging
38
  logging.basicConfig(
@@ -160,7 +147,7 @@ class TTSDatasetCollector:
160
 
161
  try:
162
  # Try NLTK first
163
- self.sentences = nltk.sent_tokenize(text.strip(), language='english')
164
  except Exception as e:
165
  logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
166
  # Fallback to simple splitting
@@ -207,7 +194,7 @@ class TTSDatasetCollector:
207
 
208
  def get_styled_text(self, text: str) -> str:
209
  """Get text with current font styling"""
210
- font_css = FONT_STYLES[self.current_font]['css']
211
  return f"<div style='{font_css}'>{text}</div>"
212
 
213
  def set_font(self, font_style: str) -> Tuple[bool, str]:
@@ -532,18 +519,28 @@ def create_interface():
532
  # Add font-face declarations
533
  font_face_css = ""
534
  for font_style, font_info in FONT_STYLES.items():
535
- if font_style in ['nastaliq', 'naskh'] or font_style in collector.custom_fonts:
536
- font_file_name = font_info['family'] + '.ttf' if font_style not in collector.custom_fonts else font_info['family'] + '.ttf'
 
537
  font_face_css += f"""
538
  @font-face {{
539
  font-family: '{font_info["family"]}';
540
- src: url('fonts/{font_file_name}') format('truetype');
541
  }}
542
  """
 
 
 
543
 
544
  custom_css += font_face_css
545
 
546
  with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
 
 
 
 
 
 
547
  gr.Markdown("# TTS Dataset Collection Tool")
548
 
549
  with gr.Row():
@@ -571,7 +568,7 @@ def create_interface():
571
  elem_classes=["small-input"]
572
  )
573
  font_select = gr.Dropdown(
574
- choices=list(FONT_STYLES.keys()),
575
  value="english_serif",
576
  label="Select Font Style",
577
  elem_classes=["small-input"]
@@ -584,12 +581,6 @@ def create_interface():
584
  )
585
  add_font_btn = gr.Button("Add Custom Font")
586
 
587
- status = gr.Textbox(
588
- label="Status",
589
- interactive=False,
590
- max_lines=3
591
- )
592
-
593
  # Dataset Info
594
  with gr.Accordion("Dataset Statistics", open=False):
595
  dataset_info = gr.JSON(
@@ -757,8 +748,23 @@ def create_interface():
757
  if not success:
758
  return {status: f"❌ {msg}"}
759
  # Update font dropdown
760
- font_choices = list(FONT_STYLES.keys())
761
  font_select.update(choices=font_choices)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
  return {status: f"✅ {msg}"}
763
 
764
  # Event handlers
 
17
 
18
  # Download NLTK data during initialization
19
  try:
20
+ nltk.data.find('tokenizers/punkt')
21
+ except LookupError:
22
  nltk.download('punkt', quiet=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Configure logging
25
  logging.basicConfig(
 
147
 
148
  try:
149
  # Try NLTK first
150
+ self.sentences = nltk.sent_tokenize(text.strip())
151
  except Exception as e:
152
  logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
153
  # Fallback to simple splitting
 
194
 
195
  def get_styled_text(self, text: str) -> str:
196
  """Get text with current font styling"""
197
+ font_css = FONT_STYLES.get(self.current_font, {}).get('css', '')
198
  return f"<div style='{font_css}'>{text}</div>"
199
 
200
  def set_font(self, font_style: str) -> Tuple[bool, str]:
 
519
  # Add font-face declarations
520
  font_face_css = ""
521
  for font_style, font_info in FONT_STYLES.items():
522
+ font_file_name = font_info['family'] + '.ttf'
523
+ font_path = collector.fonts_path / font_file_name
524
+ if os.path.exists(font_path):
525
  font_face_css += f"""
526
  @font-face {{
527
  font-family: '{font_info["family"]}';
528
+ src: url('file/{font_path}') format('truetype');
529
  }}
530
  """
531
+ else:
532
+ # For system fonts like 'Arial' and 'Times New Roman', no need to specify src
533
+ pass
534
 
535
  custom_css += font_face_css
536
 
537
  with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
538
+ status = gr.Textbox(
539
+ label="Status",
540
+ interactive=False,
541
+ max_lines=3
542
+ )
543
+
544
  gr.Markdown("# TTS Dataset Collection Tool")
545
 
546
  with gr.Row():
 
568
  elem_classes=["small-input"]
569
  )
570
  font_select = gr.Dropdown(
571
+ choices=list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys()),
572
  value="english_serif",
573
  label="Select Font Style",
574
  elem_classes=["small-input"]
 
581
  )
582
  add_font_btn = gr.Button("Add Custom Font")
583
 
 
 
 
 
 
 
584
  # Dataset Info
585
  with gr.Accordion("Dataset Statistics", open=False):
586
  dataset_info = gr.JSON(
 
748
  if not success:
749
  return {status: f"❌ {msg}"}
750
  # Update font dropdown
751
+ font_choices = list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys())
752
  font_select.update(choices=font_choices)
753
+ # Rebuild CSS to include new font
754
+ font_face_css = ""
755
+ for font_style, font_info in FONT_STYLES.items():
756
+ if font_style in collector.custom_fonts:
757
+ font_file_name = font_info['family'] + '.ttf'
758
+ font_path = collector.fonts_path / font_file_name
759
+ if os.path.exists(font_path):
760
+ font_face_css += f"""
761
+ @font-face {{
762
+ font-family: '{font_info["family"]}';
763
+ src: url('file/{font_path}') format('truetype');
764
+ }}
765
+ """
766
+ # Update the interface CSS
767
+ interface.set_css(custom_css + font_face_css)
768
  return {status: f"✅ {msg}"}
769
 
770
  # Event handlers