Nash-pAnDiTa commited on
Commit
6bf9f6c
·
verified ·
1 Parent(s): b47e16a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -10
app.py CHANGED
@@ -10,17 +10,13 @@ def load_huggingface_dataset(dataset_link, token):
10
  Load a Hugging Face dataset using the provided link and token.
11
  """
12
  # Extract dataset name and config if applicable
13
- parts = dataset_link.split("/")
14
- dataset_name = parts[-1]
15
- if len(parts) > 2:
16
- owner = parts[-2]
17
- dataset_name = f"{owner}/{dataset_name}"
18
 
19
  # Load the dataset
20
  dataset = load_dataset(dataset_link, split="train")
21
 
22
  # Return the dataset as a DataFrame with index and transcription columns
23
  df = dataset.to_pandas().reset_index()
 
24
  return df[["index", "transcription"]], dataset
25
 
26
  def update_transcriptions(df, dataset, token,dataset_link):
@@ -52,13 +48,32 @@ def main():
52
 
53
  dataset = None # To store the loaded dataset object globally
54
 
 
 
55
  def load_dataset_and_show_table(dataset_link, token):
56
  """
57
  Load the dataset and return the DataFrame to display in Gradio.
58
  """
59
- nonlocal dataset
60
- df, dataset = load_huggingface_dataset(dataset_link, token)
61
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  def submit_changes(df, token,dataset_link):
64
  """
@@ -67,8 +82,13 @@ def main():
67
  if dataset is None:
68
  return "No dataset loaded to update."
69
 
70
- print(df)
71
- print(token)
 
 
 
 
 
72
  return update_transcriptions(df, dataset, token,dataset_link)
73
 
74
  # Gradio Interface
@@ -82,6 +102,10 @@ def main():
82
  # Button to load dataset
83
  load_button = gr.Button("Load Dataset")
84
 
 
 
 
 
85
  # Table to display and edit dataset
86
  table = gr.Dataframe(
87
  headers=["index", "transcription"],
@@ -90,8 +114,10 @@ def main():
90
  label="Edit Dataset (Transcriptions are RTL)",
91
  )
92
 
 
93
  # Button to submit changes
94
  submit_button = gr.Button("Submit Changes")
 
95
  output_message = gr.Textbox(label="Message")
96
 
97
  # RTL styling for transcription column
@@ -99,6 +125,8 @@ def main():
99
 
100
  # Button functionality
101
  load_button.click(load_dataset_and_show_table, [dataset_link, hf_token], table)
 
 
102
  submit_button.click(submit_changes, [table, hf_token,dataset_link], output_message)
103
 
104
  # Launch Gradio Interface
 
10
  Load a Hugging Face dataset using the provided link and token.
11
  """
12
  # Extract dataset name and config if applicable
 
 
 
 
 
13
 
14
  # Load the dataset
15
  dataset = load_dataset(dataset_link, split="train")
16
 
17
  # Return the dataset as a DataFrame with index and transcription columns
18
  df = dataset.to_pandas().reset_index()
19
+ # print(df)
20
  return df[["index", "transcription"]], dataset
21
 
22
  def update_transcriptions(df, dataset, token,dataset_link):
 
48
 
49
  dataset = None # To store the loaded dataset object globally
50
 
51
+ original_df = None # Store the original DataFrame globally for resetting search results
52
+
53
  def load_dataset_and_show_table(dataset_link, token):
54
  """
55
  Load the dataset and return the DataFrame to display in Gradio.
56
  """
57
+ nonlocal dataset, original_df
58
+ original_df, dataset = load_huggingface_dataset(dataset_link, token)
59
+ return original_df
60
+
61
+ def search_transcriptions(search_term):
62
+ """
63
+ Search the transcription column and filter the table based on the search term.
64
+ """
65
+ if original_df is None:
66
+ return pd.DataFrame(columns=["index", "transcription"]) # Empty table if no dataset is loaded
67
+ filtered_df = original_df[original_df["transcription"].str.contains(search_term, case=False, na=False)]
68
+ return filtered_df
69
+
70
+ def update_original(df):
71
+ # Merge modified DataFrame into original DataFrame
72
+ for _, row in df.iterrows():
73
+ # Locate the row in the original DataFrame with the same index
74
+ original_df.loc[original_df["index"] == row["index"], "transcription"] = row["transcription"]
75
+
76
+ return "update Successful"
77
 
78
  def submit_changes(df, token,dataset_link):
79
  """
 
82
  if dataset is None:
83
  return "No dataset loaded to update."
84
 
85
+ print(len(dataset))
86
+ print(len(df))
87
+
88
+ if len(df) < len(dataset):
89
+ update_original(df)
90
+ return update_transcriptions(original_df, dataset, token,dataset_link)
91
+
92
  return update_transcriptions(df, dataset, token,dataset_link)
93
 
94
  # Gradio Interface
 
102
  # Button to load dataset
103
  load_button = gr.Button("Load Dataset")
104
 
105
+ # Search bar
106
+ search_box = gr.Textbox(label="Search Transcriptions", placeholder="Enter a search term...")
107
+
108
+
109
  # Table to display and edit dataset
110
  table = gr.Dataframe(
111
  headers=["index", "transcription"],
 
114
  label="Edit Dataset (Transcriptions are RTL)",
115
  )
116
 
117
+ update_button = gr.Button("Update Table")
118
  # Button to submit changes
119
  submit_button = gr.Button("Submit Changes")
120
+ update_message = gr.Textbox(label="update message")
121
  output_message = gr.Textbox(label="Message")
122
 
123
  # RTL styling for transcription column
 
125
 
126
  # Button functionality
127
  load_button.click(load_dataset_and_show_table, [dataset_link, hf_token], table)
128
+ search_box.change(search_transcriptions, search_box, table)
129
+ update_button.click(update_original, [table], update_message)
130
  submit_button.click(submit_changes, [table, hf_token,dataset_link], output_message)
131
 
132
  # Launch Gradio Interface