Nash-pAnDiTa's picture
upload app and req
e1f36d5 verified
raw
history blame
2.93 kB
import gradio as gr
import pandas as pd
from datasets import load_dataset, Dataset, Audio
from huggingface_hub import login
# Global variables to store dataset and token
editable_df = pd.DataFrame()
dataset_name = ""
hub_token = ""
def load_hf_dataset(dataset_url, token):
global editable_df, dataset_name, hub_token
# Extract dataset name and token
dataset_name = dataset_url.split("/")[-1]
hub_token = token
# Authenticate and load dataset
login(token)
dataset = load_dataset(dataset_url)
editable_df = pd.DataFrame(dataset["train"])
return editable_df
def update_row(row_index, column_name, new_value):
"""Update a specific cell in the DataFrame."""
global editable_df
if row_index < len(editable_df) and column_name in editable_df.columns:
editable_df.at[row_index, column_name] = new_value.replace('"', '')
print(new_value.replace('"', ''))
return editable_df
def save_and_upload():
"""Save the updated DataFrame back to the Hugging Face Hub."""
global editable_df, dataset_name, hub_token
# Convert DataFrame to Dataset
updated_dataset = Dataset.from_pandas(editable_df)
updated_dataset = updated_dataset.cast_column("audio", Audio(sampling_rate=16000))
# Push updated dataset to Hugging Face
updated_dataset.push_to_hub(dataset_name, token=hub_token)
return f"Updated dataset successfully pushed to: {dataset_name}"
def handle_row_selection(selected_row, evt: gr.SelectData):
# print(selected_row)
index = evt.index[0]
return index , selected_row.transcription[index]
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("### Hugging Face Dataset Editor")
with gr.Row():
dataset_url_input = gr.Textbox(label="Dataset URL", placeholder="username/dataset_name")
token_input = gr.Textbox(label="Hub Token", placeholder="Enter your Hugging Face Hub token", type="password")
load_btn = gr.Button("Load Dataset")
data_table = gr.DataFrame(value=editable_df)
with gr.Row():
row_input = gr.Number(label="Row Index", value=0, precision=0, interactive=False)
col_input = gr.Text(label="Column Name", value="transcription", interactive=False)
new_value_input = gr.Text(label="New Value", value="new_value", interactive=True)
update_btn = gr.Button("Update Row")
# Register callback to handle row selection and update
data_table.select(handle_row_selection, data_table,[row_input,new_value_input])#
save_btn = gr.Button("Save and Upload")
status_output = gr.Textbox(label="Status", interactive=False)
# Button actions
load_btn.click(load_hf_dataset, inputs=[dataset_url_input, token_input], outputs=data_table)
update_btn.click(update_row, inputs=[row_input, col_input, new_value_input], outputs=data_table)
save_btn.click(save_and_upload, outputs=status_output)
app.launch(share=True)