Spaces:
Sleeping
Sleeping
Nash-pAnDiTa
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -18,16 +18,7 @@ def load_huggingface_dataset(dataset_link, token):
|
|
18 |
df = dataset.to_pandas().reset_index()
|
19 |
# print(df)
|
20 |
return df[["index", "transcription"]], dataset
|
21 |
-
|
22 |
-
|
23 |
-
def delete_empty_rows(updated_df):
|
24 |
-
"""
|
25 |
-
Delete rows marked for removal from the dataset.
|
26 |
-
"""
|
27 |
-
# Remove rows where transcription is empty or manually marked
|
28 |
-
updated_df = updated_df.dropna(subset=["transcription"])
|
29 |
-
updated_df = updated_df[updated_df["transcription"].str.strip() != ""]
|
30 |
-
return updated_df
|
31 |
|
32 |
def update_transcriptions(df, dataset, token,dataset_link):
|
33 |
"""
|
@@ -42,8 +33,10 @@ def update_transcriptions(df, dataset, token,dataset_link):
|
|
42 |
|
43 |
# Replace the original transcription column in the dataset
|
44 |
dataset = dataset.map(
|
45 |
-
lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
|
46 |
-
with_indices=True
|
|
|
|
|
47 |
)
|
48 |
print(dataset['transcription'][0])
|
49 |
|
|
|
18 |
df = dataset.to_pandas().reset_index()
|
19 |
# print(df)
|
20 |
return df[["index", "transcription"]], dataset
|
21 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def update_transcriptions(df, dataset, token,dataset_link):
|
24 |
"""
|
|
|
33 |
|
34 |
# Replace the original transcription column in the dataset
|
35 |
dataset = dataset.map(
|
36 |
+
lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else None,
|
37 |
+
with_indices=True
|
38 |
+
# lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
|
39 |
+
# with_indices=True
|
40 |
)
|
41 |
print(dataset['transcription'][0])
|
42 |
|