Nash-pAnDiTa commited on
Commit
e1f36d5
·
verified ·
1 Parent(s): 4f0cffd

upload app and req

Browse files
Files changed (2) hide show
  1. app.py +77 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from datasets import load_dataset, Dataset, Audio
4
+ from huggingface_hub import login
5
+
6
+ # Global variables to store dataset and token
7
+ editable_df = pd.DataFrame()
8
+ dataset_name = ""
9
+ hub_token = ""
10
+
11
+
12
+ def load_hf_dataset(dataset_url, token):
13
+ global editable_df, dataset_name, hub_token
14
+ # Extract dataset name and token
15
+ dataset_name = dataset_url.split("/")[-1]
16
+ hub_token = token
17
+
18
+ # Authenticate and load dataset
19
+ login(token)
20
+ dataset = load_dataset(dataset_url)
21
+ editable_df = pd.DataFrame(dataset["train"])
22
+ return editable_df
23
+
24
+ def update_row(row_index, column_name, new_value):
25
+ """Update a specific cell in the DataFrame."""
26
+ global editable_df
27
+ if row_index < len(editable_df) and column_name in editable_df.columns:
28
+ editable_df.at[row_index, column_name] = new_value.replace('"', '')
29
+ print(new_value.replace('"', ''))
30
+ return editable_df
31
+
32
+ def save_and_upload():
33
+ """Save the updated DataFrame back to the Hugging Face Hub."""
34
+ global editable_df, dataset_name, hub_token
35
+ # Convert DataFrame to Dataset
36
+ updated_dataset = Dataset.from_pandas(editable_df)
37
+
38
+ updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000))
39
+
40
+ # Push updated dataset to Hugging Face
41
+ updated_dataset.push_to_hub(dataset_name, token=hub_token)
42
+ return f"Updated dataset successfully pushed to: {dataset_name}"
43
+
44
+ def handle_row_selection(selected_row, evt: gr.SelectData):
45
+ # print(selected_row)
46
+ index = evt.index[0]
47
+ return index , selected_row.transcription[index]
48
+
49
+ # Gradio interface
50
+ with gr.Blocks() as app:
51
+ gr.Markdown("### Hugging Face Dataset Editor")
52
+
53
+ with gr.Row():
54
+ dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name")
55
+ token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password")
56
+ load_btn = gr.Button("Load Dataset")
57
+
58
+ data_table = gr.DataFrame(value=editable_df)
59
+
60
+ with gr.Row():
61
+ row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False)
62
+ col_input = gr.Text(label="Column Name", value="transcription", interactive=False)
63
+ new_value_input = gr.Text(label="New Value", value="new_value", interactive=True)
64
+ update_btn = gr.Button("Update Row")
65
+
66
+ # Register callback to handle row selection and update
67
+ data_table.select(handle_row_selection, data_table,[row_input,new_value_input])#
68
+
69
+ save_btn = gr.Button("Save and Upload")
70
+ status_output = gr.Textbox(label="Status", interactive=False)
71
+
72
+ # Button actions
73
+ load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table)
74
+ update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table)
75
+ save_btn.click(save_and_upload, outputs=status_output)
76
+
77
+ app.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ datasets
3
+ huggingface_hub
4
+ pandas
5
+ librosa