Omarrran commited on
Commit
1d92b3b
·
verified ·
1 Parent(s): 18d6a1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -24
app.py CHANGED
@@ -151,20 +151,27 @@ class TTSDatasetCollector:
151
  self.log_operation(error_msg, "error")
152
  logger.error(traceback.format_exc())
153
  return False, error_msg
154
-
155
- def get_styled_text(self, text: str) -> str:
156
- """Get text with current font styling"""
157
- font_css = FONT_STYLES[self.current_font]['css']
158
- return f"<div style='{font_css}'>{text}</div>"
159
 
160
-
161
- def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
 
 
 
 
 
162
  """Generate unique filenames for audio and text files"""
163
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
164
  sentence_id = f"{self.current_index+1:04d}"
165
  base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
166
  return f"{base_name}.wav", f"{base_name}.txt"
167
-
 
 
 
 
 
 
 
168
  def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
169
  """Save recording with enhanced error handling and logging"""
170
  if not all([audio_file, speaker_id, dataset_name]):
@@ -293,6 +300,62 @@ Font_Style: {metadata['font_style']}
293
  self.log_operation(error_msg, "error")
294
  logger.error(traceback.format_exc())
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  def create_interface():
297
  """Create Gradio interface with enhanced features"""
298
 
@@ -305,6 +368,14 @@ def create_interface():
305
  font-size: 1.2em !important;
306
  padding: 20px !important;
307
  }
 
 
 
 
 
 
 
 
308
  """
309
 
310
  # Add font-face declarations
@@ -346,21 +417,24 @@ def create_interface():
346
  # Right column - Recording
347
  with gr.Column():
348
  current_text = gr.HTML(
349
- label="Current Sentence"
 
350
  )
351
  audio_recorder = gr.Audio(
352
  label="Record Audio",
353
- type="filepath"
 
354
  )
355
  next_text = gr.HTML(
356
- label="Next Sentence"
 
357
  )
358
 
359
  # Controls
360
  with gr.Row():
361
  prev_btn = gr.Button("Previous", variant="secondary")
362
- next_btn = gr.Button("Next", variant="secondary")
363
- save_btn = gr.Button("Save Recording", variant="primary")
364
 
365
  # Status and Progress
366
  with gr.Row():
@@ -387,7 +461,12 @@ def create_interface():
387
  if not success:
388
  return {status: msg}
389
 
390
- return update_display()
 
 
 
 
 
391
 
392
  def load_file(file):
393
  """Handle file loading with enhanced error reporting"""
@@ -396,7 +475,8 @@ def create_interface():
396
  current_text: "",
397
  next_text: "",
398
  progress: "",
399
- status: "⚠️ No file selected"
 
400
  }
401
 
402
  success, msg = collector.load_text_file(file)
@@ -406,30 +486,105 @@ def create_interface():
406
  next_text: "",
407
  progress: "",
408
  status: f"❌ {msg}",
409
- dataset_info: update_dataset_info()
410
  }
411
 
 
412
  return {
413
- current_text: collector.get_styled_text(collector.sentences[0]),
414
- next_text: collector.get_styled_text(collector.sentences[1]) if len(collector.sentences) > 1 else "",
415
- progress: f"📖 Sentence 1 of {len(collector.sentences)}",
416
  status: f"✅ {msg}",
417
- dataset_info: update_dataset_info()
418
  }
419
 
420
- # Remaining methods and event handlers go here ...
421
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  return interface
423
 
424
  if __name__ == "__main__":
425
  try:
 
 
 
 
 
426
  interface = create_interface()
 
427
  interface.launch(
428
  server_name="0.0.0.0",
429
  server_port=7860,
430
- share=True
 
 
431
  )
432
  except Exception as e:
433
  logger.error(f"Failed to launch interface: {str(e)}")
434
  logger.error(traceback.format_exc())
435
- raise
 
151
  self.log_operation(error_msg, "error")
152
  logger.error(traceback.format_exc())
153
  return False, error_msg
 
 
 
 
 
154
 
155
+ def get_styled_text(self, text: str) -> str:
156
+ """Get text with current font styling"""
157
+ font_css = FONT_STYLES[self.current_font]['css']
158
+ return f"<div style='{font_css}'>{text}</div>"
159
+
160
+
161
+ def generate_filenames(self, dataset_name: str, speaker_id: str) -> Tuple[str, str]:
162
  """Generate unique filenames for audio and text files"""
163
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
164
  sentence_id = f"{self.current_index+1:04d}"
165
  base_name = f"{dataset_name}_{speaker_id}_{sentence_id}_{timestamp}"
166
  return f"{base_name}.wav", f"{base_name}.txt"
167
+
168
+ def set_font(self, font_style: str) -> Tuple[bool, str]:
169
+ """Set the current font style"""
170
+ if font_style not in FONT_STYLES:
171
+ return False, f"Invalid font style. Available styles: {', '.join(FONT_STYLES.keys())}"
172
+ self.current_font = font_style
173
+ return True, f"Font style set to {font_style}"
174
+
175
  def save_recording(self, audio_file, speaker_id: str, dataset_name: str) -> Tuple[bool, str]:
176
  """Save recording with enhanced error handling and logging"""
177
  if not all([audio_file, speaker_id, dataset_name]):
 
300
  self.log_operation(error_msg, "error")
301
  logger.error(traceback.format_exc())
302
 
303
+ def get_navigation_info(self) -> Dict[str, Optional[str]]:
304
+ """Get current and next sentence information"""
305
+ if not self.sentences:
306
+ return {
307
+ 'current': None,
308
+ 'next': None,
309
+ 'progress': "No text loaded"
310
+ }
311
+
312
+ current = self.get_styled_text(self.sentences[self.current_index])
313
+ next_text = None
314
+
315
+ if self.current_index < len(self.sentences) - 1:
316
+ next_text = self.get_styled_text(self.sentences[self.current_index + 1])
317
+
318
+ progress = f"Sentence {self.current_index + 1} of {len(self.sentences)}"
319
+
320
+ return {
321
+ 'current': current,
322
+ 'next': next_text,
323
+ 'progress': progress
324
+ }
325
+
326
+ def navigate(self, direction: str) -> Dict[str, Optional[str]]:
327
+ """Navigate through sentences"""
328
+ if not self.sentences:
329
+ return {
330
+ 'current': None,
331
+ 'next': None,
332
+ 'progress': "No text loaded",
333
+ 'status': "⚠️ Please load a text file first"
334
+ }
335
+
336
+ if direction == "next" and self.current_index < len(self.sentences) - 1:
337
+ self.current_index += 1
338
+ elif direction == "prev" and self.current_index > 0:
339
+ self.current_index -= 1
340
+
341
+ nav_info = self.get_navigation_info()
342
+ nav_info['status'] = "✅ Navigation successful"
343
+
344
+ return nav_info
345
+
346
+ def get_dataset_statistics(self) -> Dict:
347
+ """Get current dataset statistics"""
348
+ try:
349
+ metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
350
+ if not metadata_file.exists():
351
+ return {}
352
+
353
+ with open(metadata_file, 'r') as f:
354
+ return json.load(f)
355
+ except Exception as e:
356
+ logger.error(f"Error reading dataset statistics: {str(e)}")
357
+ return {}
358
+
359
  def create_interface():
360
  """Create Gradio interface with enhanced features"""
361
 
 
368
  font-size: 1.2em !important;
369
  padding: 20px !important;
370
  }
371
+ .sentence-display {
372
+ font-size: 1.4em !important;
373
+ padding: 15px !important;
374
+ border: 1px solid #ddd !important;
375
+ border-radius: 8px !important;
376
+ margin: 10px 0 !important;
377
+ min-height: 100px !important;
378
+ }
379
  """
380
 
381
  # Add font-face declarations
 
417
  # Right column - Recording
418
  with gr.Column():
419
  current_text = gr.HTML(
420
+ label="Current Sentence",
421
+ elem_classes=["sentence-display"]
422
  )
423
  audio_recorder = gr.Audio(
424
  label="Record Audio",
425
+ type="filepath",
426
+ elem_classes=["record-button"]
427
  )
428
  next_text = gr.HTML(
429
+ label="Next Sentence",
430
+ elem_classes=["sentence-display"]
431
  )
432
 
433
  # Controls
434
  with gr.Row():
435
  prev_btn = gr.Button("Previous", variant="secondary")
436
+ next_btn = gr.Button("Next", variant="primary")
437
+ save_btn = gr.Button("Save Recording", variant="primary", elem_classes=["record-button"])
438
 
439
  # Status and Progress
440
  with gr.Row():
 
461
  if not success:
462
  return {status: msg}
463
 
464
+ nav_info = collector.get_navigation_info()
465
+ return {
466
+ current_text: nav_info['current'],
467
+ next_text: nav_info['next'],
468
+ status: f"Font updated to {font_style}"
469
+ }
470
 
471
  def load_file(file):
472
  """Handle file loading with enhanced error reporting"""
 
475
  current_text: "",
476
  next_text: "",
477
  progress: "",
478
+ status: "⚠️ No file selected",
479
+ dataset_info: collector.get_dataset_statistics()
480
  }
481
 
482
  success, msg = collector.load_text_file(file)
 
486
  next_text: "",
487
  progress: "",
488
  status: f"❌ {msg}",
489
+ dataset_info: collector.get_dataset_statistics()
490
  }
491
 
492
+ nav_info = collector.get_navigation_info()
493
  return {
494
+ current_text: nav_info['current'],
495
+ next_text: nav_info['next'],
496
+ progress: nav_info['progress'],
497
  status: f"✅ {msg}",
498
+ dataset_info: collector.get_dataset_statistics()
499
  }
500
 
501
+ def save_current_recording(audio_file, speaker_id_value, dataset_name_value):
502
+ """Handle saving the current recording"""
503
+ if not audio_file:
504
+ return {status: "⚠️ Please record audio first"}
505
+
506
+ success, msg = collector.save_recording(
507
+ audio_file, speaker_id_value, dataset_name_value
508
+ )
509
+
510
+ if not success:
511
+ return {
512
+ status: f"❌ {msg}",
513
+ dataset_info: collector.get_dataset_statistics()
514
+ }
515
+
516
+ # Auto-advance to next sentence after successful save
517
+ nav_info = collector.navigate("next")
518
+
519
+ return {
520
+ current_text: nav_info['current'],
521
+ next_text: nav_info['next'],
522
+ progress: nav_info['progress'],
523
+ status: f"✅ {msg}",
524
+ dataset_info: collector.get_dataset_statistics()
525
+ }
526
+
527
+ def navigate_sentences(direction):
528
+ """Handle navigation between sentences"""
529
+ nav_info = collector.navigate(direction)
530
+ return {
531
+ current_text: nav_info['current'],
532
+ next_text: nav_info['next'],
533
+ progress: nav_info['progress'],
534
+ status: nav_info['status']
535
+ }
536
+
537
+ # Event handlers
538
+ file_input.upload(
539
+ load_file,
540
+ inputs=[file_input],
541
+ outputs=[current_text, next_text, progress, status, dataset_info]
542
+ )
543
+
544
+ font_select.change(
545
+ update_font,
546
+ inputs=[font_select],
547
+ outputs=[current_text, next_text, status]
548
+ )
549
+
550
+ save_btn.click(
551
+ save_current_recording,
552
+ inputs=[audio_recorder, speaker_id, dataset_name],
553
+ outputs=[current_text, next_text, progress, status, dataset_info]
554
+ )
555
+
556
+ prev_btn.click(
557
+ lambda: navigate_sentences("prev"),
558
+ outputs=[current_text, next_text, progress, status]
559
+ )
560
+
561
+ next_btn.click(
562
+ lambda: navigate_sentences("next"),
563
+ outputs=[current_text, next_text, progress, status]
564
+ )
565
+
566
+ # Initialize dataset info
567
+ dataset_info.value = collector.get_dataset_statistics()
568
+
569
  return interface
570
 
571
  if __name__ == "__main__":
572
  try:
573
+ # Set up any required environment variables
574
+ os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
575
+ os.environ["GRADIO_SERVER_PORT"] = "7860"
576
+
577
+ # Create and launch the interface
578
  interface = create_interface()
579
+ interface.queue() # Enable queuing for better handling of concurrent users
580
  interface.launch(
581
  server_name="0.0.0.0",
582
  server_port=7860,
583
+ share=True,
584
+ debug=True,
585
+ show_error=True
586
  )
587
  except Exception as e:
588
  logger.error(f"Failed to launch interface: {str(e)}")
589
  logger.error(traceback.format_exc())
590
+ raise