Spaces:
Sleeping
Sleeping
Nash-pAnDiTa
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -10,17 +10,13 @@ def load_huggingface_dataset(dataset_link, token):
|
|
10 |
Load a Hugging Face dataset using the provided link and token.
|
11 |
"""
|
12 |
# Extract dataset name and config if applicable
|
13 |
-
parts = dataset_link.split("/")
|
14 |
-
dataset_name = parts[-1]
|
15 |
-
if len(parts) > 2:
|
16 |
-
owner = parts[-2]
|
17 |
-
dataset_name = f"{owner}/{dataset_name}"
|
18 |
|
19 |
# Load the dataset
|
20 |
dataset = load_dataset(dataset_link, split="train")
|
21 |
|
22 |
# Return the dataset as a DataFrame with index and transcription columns
|
23 |
df = dataset.to_pandas().reset_index()
|
|
|
24 |
return df[["index", "transcription"]], dataset
|
25 |
|
26 |
def update_transcriptions(df, dataset, token,dataset_link):
|
@@ -52,13 +48,32 @@ def main():
|
|
52 |
|
53 |
dataset = None # To store the loaded dataset object globally
|
54 |
|
|
|
|
|
55 |
def load_dataset_and_show_table(dataset_link, token):
|
56 |
"""
|
57 |
Load the dataset and return the DataFrame to display in Gradio.
|
58 |
"""
|
59 |
-
nonlocal dataset
|
60 |
-
|
61 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
def submit_changes(df, token,dataset_link):
|
64 |
"""
|
@@ -67,8 +82,13 @@ def main():
|
|
67 |
if dataset is None:
|
68 |
return "No dataset loaded to update."
|
69 |
|
70 |
-
print(
|
71 |
-
print(
|
|
|
|
|
|
|
|
|
|
|
72 |
return update_transcriptions(df, dataset, token,dataset_link)
|
73 |
|
74 |
# Gradio Interface
|
@@ -82,6 +102,10 @@ def main():
|
|
82 |
# Button to load dataset
|
83 |
load_button = gr.Button("Load Dataset")
|
84 |
|
|
|
|
|
|
|
|
|
85 |
# Table to display and edit dataset
|
86 |
table = gr.Dataframe(
|
87 |
headers=["index", "transcription"],
|
@@ -90,8 +114,10 @@ def main():
|
|
90 |
label="Edit Dataset (Transcriptions are RTL)",
|
91 |
)
|
92 |
|
|
|
93 |
# Button to submit changes
|
94 |
submit_button = gr.Button("Submit Changes")
|
|
|
95 |
output_message = gr.Textbox(label="Message")
|
96 |
|
97 |
# RTL styling for transcription column
|
@@ -99,6 +125,8 @@ def main():
|
|
99 |
|
100 |
# Button functionality
|
101 |
load_button.click(load_dataset_and_show_table, [dataset_link, hf_token], table)
|
|
|
|
|
102 |
submit_button.click(submit_changes, [table, hf_token,dataset_link], output_message)
|
103 |
|
104 |
# Launch Gradio Interface
|
|
|
10 |
Load a Hugging Face dataset using the provided link and token.
|
11 |
"""
|
12 |
# Extract dataset name and config if applicable
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
# Load the dataset
|
15 |
dataset = load_dataset(dataset_link, split="train")
|
16 |
|
17 |
# Return the dataset as a DataFrame with index and transcription columns
|
18 |
df = dataset.to_pandas().reset_index()
|
19 |
+
# print(df)
|
20 |
return df[["index", "transcription"]], dataset
|
21 |
|
22 |
def update_transcriptions(df, dataset, token,dataset_link):
|
|
|
48 |
|
49 |
dataset = None # To store the loaded dataset object globally
|
50 |
|
51 |
+
original_df = None # Store the original DataFrame globally for resetting search results
|
52 |
+
|
53 |
def load_dataset_and_show_table(dataset_link, token):
|
54 |
"""
|
55 |
Load the dataset and return the DataFrame to display in Gradio.
|
56 |
"""
|
57 |
+
nonlocal dataset, original_df
|
58 |
+
original_df, dataset = load_huggingface_dataset(dataset_link, token)
|
59 |
+
return original_df
|
60 |
+
|
61 |
+
def search_transcriptions(search_term):
|
62 |
+
"""
|
63 |
+
Search the transcription column and filter the table based on the search term.
|
64 |
+
"""
|
65 |
+
if original_df is None:
|
66 |
+
return pd.DataFrame(columns=["index", "transcription"]) # Empty table if no dataset is loaded
|
67 |
+
filtered_df = original_df[original_df["transcription"].str.contains(search_term, case=False, na=False)]
|
68 |
+
return filtered_df
|
69 |
+
|
70 |
+
def update_original(df):
|
71 |
+
# Merge modified DataFrame into original DataFrame
|
72 |
+
for _, row in df.iterrows():
|
73 |
+
# Locate the row in the original DataFrame with the same index
|
74 |
+
original_df.loc[original_df["index"] == row["index"], "transcription"] = row["transcription"]
|
75 |
+
|
76 |
+
return "update Successful"
|
77 |
|
78 |
def submit_changes(df, token,dataset_link):
|
79 |
"""
|
|
|
82 |
if dataset is None:
|
83 |
return "No dataset loaded to update."
|
84 |
|
85 |
+
print(len(dataset))
|
86 |
+
print(len(df))
|
87 |
+
|
88 |
+
if len(df) < len(dataset):
|
89 |
+
update_original(df)
|
90 |
+
return update_transcriptions(original_df, dataset, token,dataset_link)
|
91 |
+
|
92 |
return update_transcriptions(df, dataset, token,dataset_link)
|
93 |
|
94 |
# Gradio Interface
|
|
|
102 |
# Button to load dataset
|
103 |
load_button = gr.Button("Load Dataset")
|
104 |
|
105 |
+
# Search bar
|
106 |
+
search_box = gr.Textbox(label="Search Transcriptions", placeholder="Enter a search term...")
|
107 |
+
|
108 |
+
|
109 |
# Table to display and edit dataset
|
110 |
table = gr.Dataframe(
|
111 |
headers=["index", "transcription"],
|
|
|
114 |
label="Edit Dataset (Transcriptions are RTL)",
|
115 |
)
|
116 |
|
117 |
+
update_button = gr.Button("Update Table")
|
118 |
# Button to submit changes
|
119 |
submit_button = gr.Button("Submit Changes")
|
120 |
+
update_message = gr.Textbox(label="update message")
|
121 |
output_message = gr.Textbox(label="Message")
|
122 |
|
123 |
# RTL styling for transcription column
|
|
|
125 |
|
126 |
# Button functionality
|
127 |
load_button.click(load_dataset_and_show_table, [dataset_link, hf_token], table)
|
128 |
+
search_box.change(search_transcriptions, search_box, table)
|
129 |
+
update_button.click(update_original, [table], update_message)
|
130 |
submit_button.click(submit_changes, [table, hf_token,dataset_link], output_message)
|
131 |
|
132 |
# Launch Gradio Interface
|