import gradio as gr import pandas as pd from datasets import load_dataset, Dataset, Audio from huggingface_hub import login # Global variables to store dataset and token editable_df = pd.DataFrame() dataset_name = "" hub_token = "" def load_hf_dataset(dataset_url, token): global editable_df, dataset_name, hub_token # Extract dataset name and token dataset_name = dataset_url.split("/")[-1] hub_token = token # Authenticate and load dataset login(token) dataset = load_dataset(dataset_url) editable_df = pd.DataFrame(dataset["train"]) return editable_df def update_row(row_index, column_name, new_value): """Update a specific cell in the DataFrame.""" global editable_df if row_index < len(editable_df) and column_name in editable_df.columns: editable_df.at[row_index, column_name] = new_value.replace('"', '') print(new_value.replace('"', '')) return editable_df def save_and_upload(): """Save the updated DataFrame back to the Hugging Face Hub.""" global editable_df, dataset_name, hub_token # Convert DataFrame to Dataset updated_dataset = Dataset.from_pandas(editable_df) updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000)) # Push updated dataset to Hugging Face updated_dataset.push_to_hub(dataset_name, token=hub_token) return f"Updated dataset successfully pushed to: {dataset_name}" def handle_row_selection(selected_row, evt: gr.SelectData): # print(selected_row) index = evt.index[0] return index , selected_row.transcription[index] # Gradio interface with gr.Blocks() as app: gr.Markdown("### Hugging Face Dataset Editor") with gr.Row(): dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name") token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password") load_btn = gr.Button("Load Dataset") data_table = gr.DataFrame(value=editable_df) with gr.Row(): row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False) col_input = gr.Text(label="Column Name", value="transcription", interactive=False) new_value_input = gr.Text(label="New Value", value="new_value", interactive=True) update_btn = gr.Button("Update Row") # Register callback to handle row selection and update data_table.select(handle_row_selection, data_table,[row_input,new_value_input])# save_btn = gr.Button("Save and Upload") status_output = gr.Textbox(label="Status", interactive=False) # Button actions load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table) update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table) save_btn.click(save_and_upload, outputs=status_output) app.launch(share=True)