Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from datasets import load_dataset, Dataset, Audio | |
from huggingface_hub import login | |
# Global variables to store dataset and token | |
editable_df = pd.DataFrame() | |
dataset_name = "" | |
hub_token = "" | |
def load_hf_dataset(dataset_url, token): | |
global editable_df, dataset_name, hub_token | |
# Extract dataset name and token | |
dataset_name = dataset_url.split("/")[-1] | |
hub_token = token | |
# Authenticate and load dataset | |
login(token) | |
dataset = load_dataset(dataset_url) | |
editable_df = pd.DataFrame(dataset["train"]) | |
return editable_df | |
def update_row(row_index, column_name, new_value): | |
"""Update a specific cell in the DataFrame.""" | |
global editable_df | |
if row_index < len(editable_df) and column_name in editable_df.columns: | |
editable_df.at[row_index, column_name] = new_value.replace('"', '') | |
print(new_value.replace('"', '')) | |
return editable_df | |
def save_and_upload(): | |
"""Save the updated DataFrame back to the Hugging Face Hub.""" | |
global editable_df, dataset_name, hub_token | |
# Convert DataFrame to Dataset | |
updated_dataset = Dataset.from_pandas(editable_df) | |
updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000)) | |
# Push updated dataset to Hugging Face | |
updated_dataset.push_to_hub(dataset_name, token=hub_token) | |
return f"Updated dataset successfully pushed to: {dataset_name}" | |
def handle_row_selection(selected_row, evt: gr.SelectData): | |
# print(selected_row) | |
index = evt.index[0] | |
return index , selected_row.transcription[index] | |
# Gradio interface | |
with gr.Blocks() as app: | |
gr.Markdown("### Hugging Face Dataset Editor") | |
with gr.Row(): | |
dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name") | |
token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password") | |
load_btn = gr.Button("Load Dataset") | |
data_table = gr.DataFrame(value=editable_df) | |
with gr.Row(): | |
row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False) | |
col_input = gr.Text(label="Column Name", value="transcription", interactive=False) | |
new_value_input = gr.Text(label="New Value", value="new_value", interactive=True) | |
update_btn = gr.Button("Update Row") | |
# Register callback to handle row selection and update | |
data_table.select(handle_row_selection, data_table,[row_input,new_value_input])# | |
save_btn = gr.Button("Save and Upload") | |
status_output = gr.Textbox(label="Status", interactive=False) | |
# Button actions | |
load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table) | |
update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table) | |
save_btn.click(save_and_upload, outputs=status_output) | |
app.launch(share=True) | |