Omarrran commited on
Commit
03ba3c8
·
verified ·
1 Parent(s): 904e92f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -25
app.py CHANGED
@@ -12,16 +12,33 @@ nltk.download('punkt')
12
  class TTSDatasetCollector:
13
  """Manages TTS dataset collection and organization"""
14
 
15
- def __init__(self, root_path: str = "dataset_root"):
16
- self.root_path = Path(root_path)
 
17
  self.sentences = []
18
  self.current_index = 0
19
  self.setup_directories()
20
 
21
  def setup_directories(self):
22
  """Create necessary directory structure"""
 
 
 
 
23
  for subdir in ['audio', 'transcriptions', 'metadata']:
24
- (self.root_path / subdir).mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def load_text_file(self, file):
27
  """Process and load text file"""
@@ -30,8 +47,12 @@ class TTSDatasetCollector:
30
  text = f.read()
31
  self.sentences = nltk.sent_tokenize(text)
32
  self.current_index = 0
 
 
 
33
  return True, f"Loaded {len(self.sentences)} sentences"
34
  except Exception as e:
 
35
  return False, f"Error loading file: {str(e)}"
36
 
37
  def generate_filenames(self, dataset_name: str, speaker_id: str) -> tuple:
@@ -57,11 +78,13 @@ class TTSDatasetCollector:
57
  text_dir.mkdir(exist_ok=True)
58
 
59
  # Save audio file
60
- shutil.copy2(audio_file, audio_dir / audio_name)
 
61
 
62
  # Save transcription
 
63
  self.save_transcription(
64
- text_dir / text_name,
65
  self.sentences[self.current_index],
66
  {
67
  'speaker_id': speaker_id,
@@ -71,9 +94,20 @@ class TTSDatasetCollector:
71
  }
72
  )
73
 
74
- return True, "Recording saved successfully"
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
- return False, f"Error saving recording: {str(e)}"
 
 
77
 
78
  def save_transcription(self, file_path: Path, text: str, metadata: dict):
79
  """Save transcription with metadata"""
@@ -88,6 +122,50 @@ Timestamp: {metadata['timestamp']}
88
  """
89
  with open(file_path, 'w', encoding='utf-8') as f:
90
  f.write(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  def create_interface():
93
  """Create Gradio interface for TTS data collection"""
@@ -144,6 +222,24 @@ def create_interface():
144
  label="Status",
145
  interactive=False
146
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  # Event handlers
149
  def load_file(file):
@@ -152,7 +248,8 @@ def create_interface():
152
  current_text: "",
153
  next_text: "",
154
  progress: "",
155
- status: "No file selected"
 
156
  }
157
 
158
  success, msg = collector.load_text_file(file)
@@ -161,14 +258,16 @@ def create_interface():
161
  current_text: "",
162
  next_text: "",
163
  progress: "",
164
- status: msg
 
165
  }
166
 
167
  return {
168
  current_text: collector.sentences[0],
169
  next_text: collector.sentences[1] if len(collector.sentences) > 1 else "",
170
  progress: f"Sentence 1 of {len(collector.sentences)}",
171
- status: msg
 
172
  }
173
 
174
  def update_display():
@@ -178,7 +277,8 @@ def create_interface():
178
  current_text: "",
179
  next_text: "",
180
  progress: "",
181
- status: "No text loaded"
 
182
  }
183
 
184
  next_idx = collector.current_index + 1
@@ -186,7 +286,8 @@ def create_interface():
186
  current_text: collector.sentences[collector.current_index],
187
  next_text: collector.sentences[next_idx] if next_idx < len(collector.sentences) else "",
188
  progress: f"Sentence {collector.current_index + 1} of {len(collector.sentences)}",
189
- status: "Ready for recording"
 
190
  }
191
 
192
  def next_sentence():
@@ -204,44 +305,43 @@ def create_interface():
204
  def save_recording(audio, spk_id, ds_name):
205
  """Handle saving recording"""
206
  if not audio:
207
- return {status: "No audio recorded"}
208
  if not spk_id:
209
- return {status: "Speaker ID required"}
210
  if not ds_name:
211
- return {status: "Dataset name required"}
212
 
213
  success, msg = collector.save_recording(audio, spk_id, ds_name)
214
- return {status: msg}
 
 
 
215
 
216
  # Connect event handlers
217
  file_input.change(
218
  load_file,
219
  inputs=[file_input],
220
- outputs=[current_text, next_text, progress, status]
221
  )
222
 
223
  next_btn.click(
224
  next_sentence,
225
- outputs=[current_text, next_text, progress, status]
226
  )
227
 
228
  prev_btn.click(
229
  prev_sentence,
230
- outputs=[current_text, next_text, progress, status]
231
  )
232
 
233
  save_btn.click(
234
  save_recording,
235
  inputs=[audio_recorder, speaker_id, dataset_name],
236
- outputs=[status]
237
  )
238
 
239
  return interface
240
 
241
  if __name__ == "__main__":
242
  interface = create_interface()
243
- interface.launch(
244
- server_name="0.0.0.0",
245
- server_port=7860,
246
- share=True
247
- )
 
12
  class TTSDatasetCollector:
13
  """Manages TTS dataset collection and organization"""
14
 
15
+ def __init__(self):
16
+ # Get the directory where app.py is located
17
+ self.root_path = Path(os.path.dirname(os.path.abspath(__file__))) / "dataset"
18
  self.sentences = []
19
  self.current_index = 0
20
  self.setup_directories()
21
 
22
  def setup_directories(self):
23
  """Create necessary directory structure"""
24
+ # Create main dataset directory
25
+ self.root_path.mkdir(exist_ok=True)
26
+
27
+ # Create subdirectories
28
  for subdir in ['audio', 'transcriptions', 'metadata']:
29
+ (self.root_path / subdir).mkdir(exist_ok=True)
30
+
31
+ # Create a log file to track operations
32
+ log_file = self.root_path / 'dataset_log.txt'
33
+ if not log_file.exists():
34
+ with open(log_file, 'w', encoding='utf-8') as f:
35
+ f.write(f"Dataset collection started on {datetime.now().isoformat()}\n")
36
+
37
+ def log_operation(self, message: str):
38
+ """Log operations to keep track of dataset collection"""
39
+ log_file = self.root_path / 'dataset_log.txt'
40
+ with open(log_file, 'a', encoding='utf-8') as f:
41
+ f.write(f"[{datetime.now().isoformat()}] {message}\n")
42
 
43
  def load_text_file(self, file):
44
  """Process and load text file"""
 
47
  text = f.read()
48
  self.sentences = nltk.sent_tokenize(text)
49
  self.current_index = 0
50
+
51
+ # Log the file loading
52
+ self.log_operation(f"Loaded text file with {len(self.sentences)} sentences")
53
  return True, f"Loaded {len(self.sentences)} sentences"
54
  except Exception as e:
55
+ self.log_operation(f"Error loading file: {str(e)}")
56
  return False, f"Error loading file: {str(e)}"
57
 
58
  def generate_filenames(self, dataset_name: str, speaker_id: str) -> tuple:
 
78
  text_dir.mkdir(exist_ok=True)
79
 
80
  # Save audio file
81
+ audio_path = audio_dir / audio_name
82
+ shutil.copy2(audio_file, audio_path)
83
 
84
  # Save transcription
85
+ text_path = text_dir / text_name
86
  self.save_transcription(
87
+ text_path,
88
  self.sentences[self.current_index],
89
  {
90
  'speaker_id': speaker_id,
 
94
  }
95
  )
96
 
97
+ # Update metadata
98
+ self.update_metadata(speaker_id, dataset_name)
99
+
100
+ # Log the save operation
101
+ self.log_operation(
102
+ f"Saved recording: Speaker={speaker_id}, Dataset={dataset_name}, "
103
+ f"Audio={audio_name}, Text={text_name}"
104
+ )
105
+
106
+ return True, f"Recording saved successfully as {audio_name}"
107
  except Exception as e:
108
+ error_msg = f"Error saving recording: {str(e)}"
109
+ self.log_operation(error_msg)
110
+ return False, error_msg
111
 
112
  def save_transcription(self, file_path: Path, text: str, metadata: dict):
113
  """Save transcription with metadata"""
 
122
  """
123
  with open(file_path, 'w', encoding='utf-8') as f:
124
  f.write(content)
125
+
126
+ def update_metadata(self, speaker_id: str, dataset_name: str):
127
+ """Update dataset metadata file"""
128
+ metadata_file = self.root_path / 'metadata' / 'dataset_info.json'
129
+
130
+ try:
131
+ if metadata_file.exists():
132
+ with open(metadata_file, 'r') as f:
133
+ metadata = json.load(f)
134
+ else:
135
+ metadata = {'speakers': {}, 'last_updated': None}
136
+
137
+ # Update speaker data
138
+ if speaker_id not in metadata['speakers']:
139
+ metadata['speakers'][speaker_id] = {
140
+ 'total_recordings': 0,
141
+ 'datasets': {}
142
+ }
143
+
144
+ if dataset_name not in metadata['speakers'][speaker_id]['datasets']:
145
+ metadata['speakers'][speaker_id]['datasets'][dataset_name] = {
146
+ 'recordings': 0,
147
+ 'sentences': len(self.sentences),
148
+ 'first_recording': datetime.now().isoformat(),
149
+ 'last_recording': None
150
+ }
151
+
152
+ # Update counts and timestamps
153
+ metadata['speakers'][speaker_id]['total_recordings'] += 1
154
+ metadata['speakers'][speaker_id]['datasets'][dataset_name]['recordings'] += 1
155
+ metadata['speakers'][speaker_id]['datasets'][dataset_name]['last_recording'] = \
156
+ datetime.now().isoformat()
157
+ metadata['last_updated'] = datetime.now().isoformat()
158
+
159
+ # Save updated metadata
160
+ with open(metadata_file, 'w') as f:
161
+ json.dump(metadata, f, indent=2)
162
+
163
+ self.log_operation(f"Updated metadata for {speaker_id} in {dataset_name}")
164
+
165
+ except Exception as e:
166
+ error_msg = f"Error updating metadata: {str(e)}"
167
+ self.log_operation(error_msg)
168
+ print(error_msg)
169
 
170
  def create_interface():
171
  """Create Gradio interface for TTS data collection"""
 
222
  label="Status",
223
  interactive=False
224
  )
225
+
226
+ # Dataset Info
227
+ with gr.Row():
228
+ dataset_info = gr.JSON(
229
+ label="Dataset Statistics",
230
+ value={}
231
+ )
232
+
233
+ def update_dataset_info():
234
+ """Update dataset statistics display"""
235
+ try:
236
+ metadata_file = collector.root_path / 'metadata' / 'dataset_info.json'
237
+ if metadata_file.exists():
238
+ with open(metadata_file, 'r') as f:
239
+ return json.load(f)
240
+ return {}
241
+ except Exception:
242
+ return {}
243
 
244
  # Event handlers
245
  def load_file(file):
 
248
  current_text: "",
249
  next_text: "",
250
  progress: "",
251
+ status: "No file selected",
252
+ dataset_info: update_dataset_info()
253
  }
254
 
255
  success, msg = collector.load_text_file(file)
 
258
  current_text: "",
259
  next_text: "",
260
  progress: "",
261
+ status: msg,
262
+ dataset_info: update_dataset_info()
263
  }
264
 
265
  return {
266
  current_text: collector.sentences[0],
267
  next_text: collector.sentences[1] if len(collector.sentences) > 1 else "",
268
  progress: f"Sentence 1 of {len(collector.sentences)}",
269
+ status: msg,
270
+ dataset_info: update_dataset_info()
271
  }
272
 
273
  def update_display():
 
277
  current_text: "",
278
  next_text: "",
279
  progress: "",
280
+ status: "No text loaded",
281
+ dataset_info: update_dataset_info()
282
  }
283
 
284
  next_idx = collector.current_index + 1
 
286
  current_text: collector.sentences[collector.current_index],
287
  next_text: collector.sentences[next_idx] if next_idx < len(collector.sentences) else "",
288
  progress: f"Sentence {collector.current_index + 1} of {len(collector.sentences)}",
289
+ status: "Ready for recording",
290
+ dataset_info: update_dataset_info()
291
  }
292
 
293
  def next_sentence():
 
305
  def save_recording(audio, spk_id, ds_name):
306
  """Handle saving recording"""
307
  if not audio:
308
+ return {status: "No audio recorded", dataset_info: update_dataset_info()}
309
  if not spk_id:
310
+ return {status: "Speaker ID required", dataset_info: update_dataset_info()}
311
  if not ds_name:
312
+ return {status: "Dataset name required", dataset_info: update_dataset_info()}
313
 
314
  success, msg = collector.save_recording(audio, spk_id, ds_name)
315
+ return {
316
+ status: msg,
317
+ dataset_info: update_dataset_info()
318
+ }
319
 
320
  # Connect event handlers
321
  file_input.change(
322
  load_file,
323
  inputs=[file_input],
324
+ outputs=[current_text, next_text, progress, status, dataset_info]
325
  )
326
 
327
  next_btn.click(
328
  next_sentence,
329
+ outputs=[current_text, next_text, progress, status, dataset_info]
330
  )
331
 
332
  prev_btn.click(
333
  prev_sentence,
334
+ outputs=[current_text, next_text, progress, status, dataset_info]
335
  )
336
 
337
  save_btn.click(
338
  save_recording,
339
  inputs=[audio_recorder, speaker_id, dataset_name],
340
+ outputs=[status, dataset_info]
341
  )
342
 
343
  return interface
344
 
345
  if __name__ == "__main__":
346
  interface = create_interface()
347
+ interface.launch()