import gradio as gr
import os
from datasets import load_dataset, Dataset
import pandas as pd
from huggingface_hub import login

def load_huggingface_dataset(dataset_link, token):
    """
    Load a Hugging Face dataset using the provided link and token.
    """
    # Extract dataset name and config if applicable
    
    # Load the dataset
    dataset = load_dataset(dataset_link, split="train")
    
    # Return the dataset as a DataFrame with index and transcription columns
    df = dataset.to_pandas().reset_index()
    # print(df)
    return df[["index", "transcription"]], dataset
    

def update_transcriptions(df, dataset, token,dataset_link):
    """
    Update the transcriptions in the dataset and push it back to the Hugging Face Hub.
    """
    # Convert DataFrame back to Dataset
    df = delete_empty_rows(df)
    updated_dataset = Dataset.from_pandas(df)

    # print(updated_dataset)
    # print(dataset)
    
    # Replace the original transcription column in the dataset
    dataset = dataset.map(
        lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]} if updated_dataset["transcription"][idx].strip() != "" else None,
        with_indices=True
        # lambda examples, idx: {"transcription": updated_dataset["transcription"][idx]},
        # with_indices=True 
    )
    print(dataset['transcription'][0])
    
    login(token)
    
    dataset.push_to_hub(dataset_link)
    
    return "Dataset updated and changes submitted to the Hugging Face Hub!"

# Gradio Interface
def main():
    
    
    dataset = None  # To store the loaded dataset object globally
    
    original_df = None  # Store the original DataFrame globally for resetting search results

    def load_dataset_and_show_table(dataset_link, token):
        """
        Load the dataset and return the DataFrame to display in Gradio.
        """
        nonlocal dataset, original_df
        original_df, dataset = load_huggingface_dataset(dataset_link, token)
        return original_df
    
    def search_transcriptions(search_term):
        """
        Search the transcription column and filter the table based on the search term.
        """
        if original_df is None:
            return pd.DataFrame(columns=["index", "transcription"])  # Empty table if no dataset is loaded
        filtered_df = original_df[original_df["transcription"].str.contains(search_term, case=False, na=False)]
        return filtered_df
    
    def update_original(df):
        # Merge modified DataFrame into original DataFrame
        for _, row in df.iterrows():
            # Locate the row in the original DataFrame with the same index
            original_df.loc[original_df["index"] == row["index"], "transcription"] = row["transcription"]
            
        return "update Successful"
    
    def submit_changes(df, token,dataset_link):
        """
        Submit updated changes to the Hugging Face Hub.
        """
        if dataset is None:
            return "No dataset loaded to update."
        
        print(len(dataset))
        print(len(df))
        
        if len(df) < len(dataset):
            update_original(df)
            return update_transcriptions(original_df, dataset, token,dataset_link)
        
        return update_transcriptions(df, dataset, token,dataset_link)

    # Gradio Interface
    with gr.Blocks(css=".dataframe-row { height: 200px; }") as interface:
        gr.Markdown("## Hugging Face Audio Dataset Editor")
        
        # Input fields for dataset link and token
        dataset_link = gr.Textbox(label="Hugging Face Dataset Link")
        hf_token = gr.Textbox(label="Hugging Face Token", type="password")
        
        # Button to load dataset
        load_button = gr.Button("Load Dataset")
        
        # Search bar
        search_box = gr.Textbox(label="Search Transcriptions", placeholder="Enter a search term...")
        
        
        # Table to display and edit dataset
        table = gr.Dataframe(
            headers=["index", "transcription"],
            datatype=["number", "str"],
            interactive=True,
            label="Edit Dataset (Transcriptions are RTL)",
        )
        
        update_button = gr.Button("Update Table")
        # Button to submit changes
        submit_button = gr.Button("Submit Changes")
        update_message = gr.Textbox(label="update message")
        output_message = gr.Textbox(label="Message")
        
        # RTL styling for transcription column
        table.style = {"transcription": {"direction": "rtl"}}
        
        # Button functionality
        load_button.click(load_dataset_and_show_table, [dataset_link, hf_token], table)
        search_box.change(search_transcriptions, search_box, table)
        update_button.click(update_original, [table], update_message)
        submit_button.click(submit_changes, [table, hf_token,dataset_link], output_message)
    
    # Launch Gradio Interface
    interface.launch(share=True)
    
    

if __name__ == "__main__":
    main()