Spaces:

Nash-pAnDiTa
/

whisper-dataset-editor

Sleeping

Nash-pAnDiTa commited on Nov 24, 2024

Commit

9178174

verified ·

1 Parent(s): 18555fc

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,13 +32,14 @@ def update_transcriptions(df, dataset, token,dataset_link):
     # Replace the original transcription column in the dataset
     dataset = dataset.map(
-        lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else [],
-        with_indices=True,
-        batched=True  # Required to handle the list-based filtering
-        # lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
-        # with_indices=True
     )
-    print(dataset['transcription'][0])
     login(token)

     # Replace the original transcription column in the dataset
     dataset = dataset.map(
+        lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
+        with_indices=True
+    )
+    # Filter out rows with empty or whitespace-only transcriptions
+    dataset = dataset.filter(
+        lambda examples: examples["transcription"].strip() != "",  # Keep only non-empty transcriptions
     )
     login(token)