File size: 2,929 Bytes
e1f36d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gradio as gr
import pandas as pd
from datasets import load_dataset, Dataset, Audio
from huggingface_hub import login

# Global variables to store dataset and token
editable_df = pd.DataFrame()
dataset_name = ""
hub_token = ""


def load_hf_dataset(dataset_url, token):
    global editable_df, dataset_name, hub_token
    # Extract dataset name and token
    dataset_name = dataset_url.split("/")[-1]
    hub_token = token

    # Authenticate and load dataset
    login(token)
    dataset = load_dataset(dataset_url)
    editable_df = pd.DataFrame(dataset["train"])
    return editable_df

def update_row(row_index, column_name, new_value):
    """Update a specific cell in the DataFrame."""
    global editable_df
    if row_index < len(editable_df) and column_name in editable_df.columns:
        editable_df.at[row_index, column_name] = new_value.replace('"', '')
        print(new_value.replace('"', ''))
    return editable_df

def save_and_upload():
    """Save the updated DataFrame back to the Hugging Face Hub."""
    global editable_df, dataset_name, hub_token
    # Convert DataFrame to Dataset
    updated_dataset = Dataset.from_pandas(editable_df)

    updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000))

    # Push updated dataset to Hugging Face
    updated_dataset.push_to_hub(dataset_name, token=hub_token)
    return f"Updated dataset successfully pushed to: {dataset_name}"

def handle_row_selection(selected_row, evt: gr.SelectData):
    # print(selected_row)
    index = evt.index[0]
    return index , selected_row.transcription[index]

# Gradio interface
with gr.Blocks() as app:
    gr.Markdown("### Hugging Face Dataset Editor")
    
    with gr.Row():
        dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name")
        token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password")
        load_btn = gr.Button("Load Dataset")

    data_table = gr.DataFrame(value=editable_df)
    
    with gr.Row():
        row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False)
        col_input = gr.Text(label="Column Name", value="transcription", interactive=False)
        new_value_input = gr.Text(label="New Value", value="new_value", interactive=True)
        update_btn = gr.Button("Update Row")

    # Register callback to handle row selection and update
    data_table.select(handle_row_selection, data_table,[row_input,new_value_input])#
    
    save_btn = gr.Button("Save and Upload")
    status_output = gr.Textbox(label="Status", interactive=False)

    # Button actions
    load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table)
    update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table)
    save_btn.click(save_and_upload, outputs=status_output)

app.launch(share=True)