Nash-pAnDiTa commited on
Commit
9178174
·
verified ·
1 Parent(s): 18555fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -32,13 +32,14 @@ def update_transcriptions(df, dataset, token,dataset_link):
32
 
33
  # Replace the original transcription column in the dataset
34
  dataset = dataset.map(
35
- lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else [],
36
- with_indices=True,
37
- batched=True # Required to handle the list-based filtering
38
- # lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
39
- # with_indices=True
 
 
40
  )
41
- print(dataset['transcription'][0])
42
 
43
  login(token)
44
 
 
32
 
33
  # Replace the original transcription column in the dataset
34
  dataset = dataset.map(
35
+ lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
36
+ with_indices=True
37
+ )
38
+
39
+ # Filter out rows with empty or whitespace-only transcriptions
40
+ dataset = dataset.filter(
41
+ lambda examples: examples["transcription"].strip() != "", # Keep only non-empty transcriptions
42
  )
 
43
 
44
  login(token)
45