File size: 5,211 Bytes
ef0b4df
 
 
 
 
 
 
c28954e
ef0b4df
 
c28954e
ef0b4df
 
c28954e
ef0b4df
 
 
 
 
 
 
 
 
 
c28954e
ef0b4df
 
 
 
 
 
 
 
 
 
 
 
b06cfa4
ef0b4df
 
 
 
 
 
 
 
 
 
c28954e
ef0b4df
 
 
 
 
 
 
 
b06cfa4
ef0b4df
 
 
 
 
 
 
 
 
 
 
 
b06cfa4
ef0b4df
 
 
b06cfa4
ef0b4df
 
 
 
 
 
 
 
 
 
 
b06cfa4
 
 
 
 
 
 
 
 
 
ef0b4df
 
 
b06cfa4
ef0b4df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b06cfa4
 
 
 
 
 
 
 
 
ef0b4df
 
 
 
 
 
 
 
 
 
de150e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import gradio as gr
import json
import os
import random
import matplotlib.pyplot as plt

# Load transcription results
with open("./results/gradio_results.json", "r", encoding='utf-8') as file:
    gradio_transcriptions = json.load(file)

with open("./results/openai_results.json", "r", encoding='utf-8') as file:
    openai_transcriptions = json.load(file)

audio_files_directory = "./chunks"

def get_random_audio_and_transcriptions():
    random_choice = random.choice(os.listdir(audio_files_directory))
    audio_path = os.path.join(audio_files_directory, random_choice)
    base_name = os.path.splitext(random_choice)[0]
    gradio_transcription = next((t for t in gradio_transcriptions if t['chunk'].startswith(base_name)), {'text': ''})['text']
    openai_transcription = next((t for t in openai_transcriptions if t['chunk'].startswith(base_name)), {'text': ''})['text']
    return audio_path, gradio_transcription, openai_transcription

def handle_vote(vote, audio_path, gradio_transcription, openai_transcription):
    votes_file = "./results/votes.json"

    # Ensure vote key is in lowercase to match dictionary keys
    vote = vote.lower()

    if os.path.exists(votes_file):
        with open(votes_file, "r", encoding='utf-8') as file:
            votes = json.load(file)
    else:
        votes = {}

    key = os.path.basename(audio_path)
    if key not in votes:
        votes[key] = {"seamlessm4t": 0, "whisper": 0, "tie": 0, "out of context": 0, "innapropriate": 0}

    if vote in votes[key]:
        votes[key][vote] += 1
    else:
        print(f"Invalid vote option: {vote}. Valid options are 'gradio', 'openai', and 'tie'.")

    with open(votes_file, "w", encoding='utf-8') as file:
        json.dump(votes, file, indent=4)

def calculate_vote_totals():
    votes_file = "./results/votes.json"
    if os.path.exists(votes_file):
        with open(votes_file, "r", encoding='utf-8') as file:
            votes_data = json.load(file)
    else:
        print("No votes have been recorded yet.")
        return None

    # Initialize totals
    totals = {"seamlessm4t": 0, "whisper": 0, "tie": 0,"out of context": 0, "innapropriate": 0}

    # Aggregate votes
    for _, vote_counts in votes_data.items():
        for key in totals:
            totals[key] += vote_counts.get(key, 0)

    return totals

def show_results():
    totals = calculate_vote_totals()
    if totals:
        # Create a bar graph
        labels = list(["SeamlessM4T", "Whisper", "Tie", "Out of context", "Innapropriate"])
        values = list(totals.values())

        plt.figure(figsize=(8, 6))
        plt.bar(labels, values, color=['cornflowerblue', 'lavender', 'green', 'red', 'yellow'])
        plt.xlabel('Models')
        plt.ylabel('Votes')
        plt.title('Vote Distribution')
        plt.xticks(labels)
        plt.ylim(0, max(values) + 1)  # Set y-axis limit to make the graph aesthetically pleasing

        return plt
    else:
        # Return an empty plot if no votes are found
        plt.figure(figsize=(8, 6))
        return plt
    
def display_votes():
    votes_file = "./results/votes.json"
    if os.path.exists(votes_file):
        with open(votes_file, "r", encoding='utf-8') as file:
            votes = json.load(file)
        formatted_votes = json.dumps(votes, indent=4)
    else:
        formatted_votes = "No votes file found."
    return formatted_votes

def setup_interface():
    with gr.Blocks() as demo:
        vote_options = gr.Radio(choices=["SeamlessM4T", "Whisper", "Tie", "Out of context", "Innapropriate"], label="Vote")
        submit_button = gr.Button("Submit Vote")
        gradio_transcription = gr.Textbox(label="SeamlessM4T-V2-large Transcription", interactive=False)
        openai_transcription = gr.Textbox(label="OpenAI Whisper Transcription", interactive=False)
        audio_player = gr.Audio(label="Listen to the Audio", interactive=False)

        def submit_vote(vote):
            audio_path, gr_transcription, oa_transcription = get_random_audio_and_transcriptions()
            if vote:  # Ensure a vote was made
                handle_vote(vote, audio_path, gr_transcription, oa_transcription)
            # Return new data to update the UI components
            return gr_transcription, oa_transcription, audio_path

        submit_button.click(submit_vote, inputs=[vote_options], outputs=[gradio_transcription, openai_transcription, audio_player])
        show_results_button = gr.Button("Show Results")
        results_plot = gr.Plot()
        
        show_votes_button = gr.Button("Show Votes")
        votes_display = gr.Textbox(label="Votes", placeholder="Click the button to load votes...", lines=20, interactive=False)
        
        show_votes_button.click(
            fn=display_votes,
            inputs=[],
            outputs=votes_display
        )
        show_results_button.click(show_results, inputs=[], outputs=results_plot)
        # Initialize with data
        initial_data = get_random_audio_and_transcriptions()
        gradio_transcription.value = initial_data[1]
        openai_transcription.value = initial_data[2]
        audio_player.value = initial_data[0]

    return demo

demo = setup_interface()
demo.launch()