Nash-pAnDiTa commited on
Commit
7c83e84
·
verified ·
1 Parent(s): 4165a1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -12
app.py CHANGED
@@ -18,16 +18,7 @@ def load_huggingface_dataset(dataset_link, token):
18
  df = dataset.to_pandas().reset_index()
19
  # print(df)
20
  return df[["index", "transcription"]], dataset
21
-
22
-
23
- def delete_empty_rows(updated_df):
24
- """
25
- Delete rows marked for removal from the dataset.
26
- """
27
- # Remove rows where transcription is empty or manually marked
28
- updated_df = updated_df.dropna(subset=["transcription"])
29
- updated_df = updated_df[updated_df["transcription"].str.strip() != ""]
30
- return updated_df
31
 
32
  def update_transcriptions(df, dataset, token,dataset_link):
33
  """
@@ -42,8 +33,10 @@ def update_transcriptions(df, dataset, token,dataset_link):
42
 
43
  # Replace the original transcription column in the dataset
44
  dataset = dataset.map(
45
- lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
46
- with_indices=True
 
 
47
  )
48
  print(dataset['transcription'][0])
49
 
 
18
  df = dataset.to_pandas().reset_index()
19
  # print(df)
20
  return df[["index", "transcription"]], dataset
21
+
 
 
 
 
 
 
 
 
 
22
 
23
  def update_transcriptions(df, dataset, token,dataset_link):
24
  """
 
33
 
34
  # Replace the original transcription column in the dataset
35
  dataset = dataset.map(
36
+ lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else None,
37
+ with_indices=True
38
+ # lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
39
+ # with_indices=True
40
  )
41
  print(dataset['transcription'][0])
42