Spaces:

Nash-pAnDiTa
/

whisper-dataset-editor

Sleeping

Nash-pAnDiTa commited on Nov 24, 2024

Commit

7c83e84

verified ·

1 Parent(s): 4165a1d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,16 +18,7 @@ def load_huggingface_dataset(dataset_link, token):
     df = dataset.to_pandas().reset_index()
     # print(df)
     return df[["index", "transcription"]], dataset
-def delete_empty_rows(updated_df):
-    """
-    Delete rows marked for removal from the dataset.
-    """
-    # Remove rows where transcription is empty or manually marked
-    updated_df = updated_df.dropna(subset=["transcription"])
-    updated_df = updated_df[updated_df["transcription"].str.strip() != ""]
-    return updated_df
 def update_transcriptions(df, dataset, token,dataset_link):
     """
@@ -42,8 +33,10 @@ def update_transcriptions(df, dataset, token,dataset_link):
     # Replace the original transcription column in the dataset
     dataset = dataset.map(
-        lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
-        with_indices=True
     )
     print(dataset['transcription'][0])

     df = dataset.to_pandas().reset_index()
     # print(df)
     return df[["index", "transcription"]], dataset
 def update_transcriptions(df, dataset, token,dataset_link):
     """
     # Replace the original transcription column in the dataset
     dataset = dataset.map(
+        lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else None,
+        with_indices=True
+        # lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
+        # with_indices=True
     )
     print(dataset['transcription'][0])