updated ui and file saving logic
Browse files- .gitignore +2 -1
- app.py +413 -498
- requirements.txt +2 -0
.gitignore
CHANGED
@@ -4,4 +4,5 @@ mapping
|
|
4 |
*.ipynb
|
5 |
test.py
|
6 |
.notebook/
|
7 |
-
__pycache__/
|
|
|
|
4 |
*.ipynb
|
5 |
test.py
|
6 |
.notebook/
|
7 |
+
__pycache__/
|
8 |
+
/results
|
app.py
CHANGED
@@ -1,69 +1,50 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import io
|
3 |
import base64
|
4 |
-
import librosa
|
5 |
-
import tempfile
|
6 |
-
import os
|
7 |
import random
|
8 |
-
|
9 |
-
import
|
10 |
-
import csv
|
11 |
-
from audio_recorder_streamlit import audio_recorder
|
12 |
import pandas as pd
|
13 |
import plotly.express as px
|
14 |
import plotly.graph_objects as go
|
15 |
-
import
|
16 |
-
import time
|
17 |
-
import re
|
18 |
-
import requests
|
19 |
-
|
20 |
-
|
21 |
-
SAVE_PATH = "results/results.csv"
|
22 |
-
TEMP_DIR = "results/audios"
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
f.write("""email,path,Ori Apex_score,Ori Apex XT_score,deepgram_score,Ori Swift_score,Ori Prime_score,Ori Apex_appearance,Ori Apex XT_appearance,deepgram_appearance,Ori Swift_appearance,Ori Prime_appearance,Ori Apex_duration,Ori Apex XT_duration,deepgram_duration,Ori Swift_duration,Ori Prime_duration,azure_score,azure_appearance,azure_duration\n""")
|
30 |
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
33 |
|
|
|
|
|
|
|
34 |
CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
|
35 |
|
36 |
-
def decode_audio_array(base64_string):
|
37 |
-
bytes_data = base64.b64decode(base64_string)
|
38 |
-
|
39 |
-
buffer = io.BytesIO(bytes_data)
|
40 |
-
audio_array = np.load(buffer)
|
41 |
-
|
42 |
-
return audio_array
|
43 |
-
|
44 |
-
def send_task(payload):
|
45 |
-
header = {
|
46 |
-
"Authorization": f"Bearer {os.getenv('CREATE_TASK_API_KEY')}"
|
47 |
-
}
|
48 |
-
response = requests.post(CREATE_TASK_URL,json=payload,headers=header)
|
49 |
-
response = response.json()
|
50 |
-
|
51 |
-
if payload["task"] == "transcribe_with_fastapi":
|
52 |
-
return response["text"]
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
time_delta = timedelta(seconds=seconds)
|
63 |
-
return str(time_delta).split('.')[0]
|
64 |
|
65 |
-
|
66 |
-
|
67 |
|
68 |
class ResultWriter:
|
69 |
def __init__(self, save_path):
|
@@ -76,10 +57,11 @@ class ResultWriter:
|
|
76 |
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
77 |
]
|
78 |
|
79 |
-
if not
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
83 |
|
84 |
def write_result(self,user_email ,audio_path,option_1_duration_info,option_2_duration_info ,winner_model=None, loser_model=None, both_preferred=False, none_preferred=False):
|
85 |
result = {
|
@@ -105,64 +87,55 @@ class ResultWriter:
|
|
105 |
result[f'{winner_model}_score'] = 1
|
106 |
|
107 |
if option_1_duration_info and option_1_duration_info[0]:
|
108 |
-
duration_key, duration_value = option_1_duration_info[0]
|
109 |
if duration_key in self.headers:
|
110 |
result[duration_key] = float(duration_value)
|
111 |
|
112 |
if option_2_duration_info and option_2_duration_info[0]:
|
113 |
-
duration_key, duration_value = option_2_duration_info[0]
|
114 |
if duration_key in self.headers:
|
115 |
result[duration_key] = float(duration_value)
|
116 |
|
117 |
-
|
118 |
-
writer = csv.DictWriter(f, fieldnames=self.headers)
|
119 |
-
writer.writerow(result)
|
120 |
|
121 |
-
|
|
|
|
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
st.session_state.option_selected = False
|
129 |
-
st.session_state.current_audio_path = None
|
130 |
-
st.session_state.option_1_model_name = None
|
131 |
-
st.session_state.option_2_model_name = None
|
132 |
-
st.session_state.option_1_model_name_state = None
|
133 |
-
st.session_state.option_2_model_name_state = None
|
134 |
-
st.session_state.option_2_response_time = None
|
135 |
-
st.session_state.option_1_response_time = None
|
136 |
-
st.session_state.audio_tab = None
|
137 |
-
|
138 |
-
|
139 |
-
def process_random_file(audio_file):
|
140 |
-
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
|
141 |
-
option_1_model_name, option_2_model_name = random.sample(models_list, 2)
|
142 |
|
143 |
-
st.session_state.current_audio_path = audio_file
|
144 |
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
return process_normal_audio(audio_file,option_1_model_name,option_2_model_name,"loaded_models")
|
149 |
|
150 |
-
|
151 |
-
|
152 |
-
tmp_file.write(audio_file.getvalue())
|
153 |
-
permanent_path = os.path.join(TEMP_DIR, os.path.basename(tmp_file.name))
|
154 |
-
os.makedirs(TEMP_DIR, exist_ok=True)
|
155 |
-
shutil.move(tmp_file.name, permanent_path)
|
156 |
|
157 |
-
|
158 |
|
159 |
-
|
160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
|
163 |
-
|
164 |
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
def encode_audio_array(audio_array):
|
168 |
buffer = io.BytesIO()
|
@@ -174,9 +147,9 @@ def encode_audio_array(audio_array):
|
|
174 |
|
175 |
return base64_string
|
176 |
|
177 |
-
def call_function(model_name
|
178 |
-
if st.session_state.
|
179 |
-
y,_ = librosa.load(audio_path,sr=22050,mono=True)
|
180 |
encoded_array = encode_audio_array(y)
|
181 |
payload = {
|
182 |
"task":"transcribe_with_fastapi",
|
@@ -189,7 +162,7 @@ def call_function(model_name,audio_path):
|
|
189 |
payload = {
|
190 |
"task":"transcribe_with_fastapi",
|
191 |
"payload":{
|
192 |
-
"file_path":audio_path,
|
193 |
"model_name":model_name,
|
194 |
"audio_b64":False
|
195 |
}}
|
@@ -197,38 +170,126 @@ def call_function(model_name,audio_path):
|
|
197 |
transcript = send_task(payload)
|
198 |
return transcript
|
199 |
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|
|
201 |
|
202 |
-
def process_normal_audio(audio_path, model1_name, model2_name, loaded_models):
|
203 |
time_1 = time.time()
|
204 |
-
transcript1 = call_function(model1_name
|
205 |
time_2 = time.time()
|
206 |
-
transcript2 = call_function(model2_name
|
207 |
time_3 = time.time()
|
208 |
|
209 |
st.session_state.option_2_response_time = round(time_3 - time_2,3)
|
210 |
st.session_state.option_1_response_time = round(time_2 - time_1,3)
|
211 |
|
|
|
212 |
return transcript1, transcript2
|
213 |
|
214 |
-
def
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
-
|
227 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
|
233 |
def get_model_abbreviation(model_name):
|
234 |
abbrev_map = {
|
@@ -350,203 +411,228 @@ def create_metric_container(label, value, full_name=None):
|
|
350 |
else:
|
351 |
st.markdown(f"<h3 style='margin-top: 0;'>{value}</h3>", unsafe_allow_html=True)
|
352 |
|
353 |
-
def
|
354 |
-
|
355 |
-
st.session_state.
|
356 |
-
st.session_state.option_2_model_name_state = f"👎 {st.session_state.option_2_model_name} 👎"
|
357 |
-
st.session_state.choice = f"You chose Option 1. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
358 |
-
result_writer.write_result(
|
359 |
-
st.session_state.user_email,
|
360 |
-
st.session_state.current_audio_path,
|
361 |
-
winner_model=st.session_state.option_1_model_name,
|
362 |
-
loser_model=st.session_state.option_2_model_name,
|
363 |
-
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
364 |
-
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
|
365 |
-
)
|
366 |
-
st.session_state.option_selected = True
|
367 |
|
368 |
-
def
|
369 |
-
|
370 |
-
st.session_state.option_2_model_name_state = f"👑 {st.session_state.option_2_model_name} 👑"
|
371 |
-
st.session_state.option_1_model_name_state = f"👎 {st.session_state.option_1_model_name} 👎"
|
372 |
-
st.session_state.choice = f"You chose Option 2. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
373 |
-
result_writer.write_result(
|
374 |
-
st.session_state.user_email,
|
375 |
-
st.session_state.current_audio_path,
|
376 |
-
winner_model=st.session_state.option_2_model_name,
|
377 |
-
loser_model=st.session_state.option_1_model_name,
|
378 |
-
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
379 |
-
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
|
380 |
-
)
|
381 |
-
st.session_state.option_selected = True
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
st.session_state.option_1_model_name_state = f"👑 {st.session_state.option_1_model_name} 👑"
|
387 |
-
st.session_state.choice = f"You chose Prefer both. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
388 |
-
result_writer.write_result(
|
389 |
-
st.session_state.user_email,
|
390 |
-
st.session_state.current_audio_path,
|
391 |
-
winner_model=st.session_state.option_1_model_name,
|
392 |
-
loser_model=st.session_state.option_2_model_name,
|
393 |
-
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
394 |
-
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)],
|
395 |
-
both_preferred=True
|
396 |
-
)
|
397 |
-
st.session_state.option_selected = True
|
398 |
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
)
|
413 |
-
st.session_state.option_selected = True
|
414 |
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
|
420 |
-
def arena():
|
421 |
if 'logged_in' not in st.session_state:
|
422 |
st.session_state.logged_in = False
|
423 |
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
st.
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
st.session_state.option_1_response_time = None
|
457 |
-
if "option_2_response_time" not in st.session_state:
|
458 |
-
st.session_state.option_2_response_time = None
|
459 |
-
if "audio_tab" not in st.session_state:
|
460 |
-
st.session_state.audio_tab = None
|
461 |
-
|
462 |
-
tab2, tab3,tab4 = st.tabs(["Upload Audio", "Record Audio","Random Audio Example"])
|
463 |
-
|
464 |
-
with tab2:
|
465 |
-
normal_audio = st.file_uploader("Upload Normal Audio File", type=['wav', 'mp3'], key='normal_audio')
|
466 |
-
if normal_audio:
|
467 |
-
if st.session_state.get('last_normal_file') != normal_audio.name:
|
468 |
-
reset_state()
|
469 |
-
st.session_state.last_normal_file = normal_audio.name
|
470 |
-
st.session_state.current_file_id = normal_audio.name
|
471 |
-
|
472 |
-
st.audio(normal_audio)
|
473 |
-
|
474 |
-
if st.button("Transcribe File"):
|
475 |
-
reset_state()
|
476 |
-
st.session_state.choice = ""
|
477 |
-
st.session_state.option_selected = False
|
478 |
-
st.session_state.audio_tab = "Upload"
|
479 |
-
option_1_text, option_2_text = process_audio_file(normal_audio)
|
480 |
-
st.session_state.option_1 = option_1_text
|
481 |
-
st.session_state.option_2 = option_2_text
|
482 |
-
st.session_state.transcribed = True
|
483 |
-
|
484 |
-
with tab3:
|
485 |
-
audio_bytes = audio_recorder(text="Click 🎙️ to record ((Recording active when icon is red))",pause_threshold=3,icon_size="2x")
|
486 |
-
|
487 |
-
if audio_bytes and audio_bytes != st.session_state.last_recorded_audio:
|
488 |
reset_state()
|
489 |
st.session_state.last_recorded_audio = audio_bytes
|
490 |
-
st.session_state.
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
st.session_state.current_random_audio = filepath
|
512 |
-
if "current_array" not in st.session_state:
|
513 |
-
st.session_state.current_array = array
|
514 |
-
if "current_sampling_rate" not in st.session_state:
|
515 |
-
st.session_state.current_sampling_rate = sampling_rate
|
516 |
-
|
517 |
-
if "current_random_audio" not in st.session_state:
|
518 |
-
st.session_state.current_random_audio = filepath
|
519 |
-
|
520 |
-
if st.button("Next File"):
|
521 |
-
reset_state()
|
522 |
-
fetch_audio_payload = {
|
523 |
-
"task":"fetch_audio"
|
524 |
-
}
|
525 |
-
array,sampling_rate,filepath = send_task(fetch_audio_payload)
|
526 |
-
st.session_state.current_random_audio = filepath
|
527 |
-
st.session_state.current_array = array
|
528 |
-
st.session_state.current_sampling_rate = sampling_rate
|
529 |
-
st.session_state.last_random_audio = None
|
530 |
-
|
531 |
-
audio = st.session_state.current_random_audio
|
532 |
-
|
533 |
-
if audio and audio != st.session_state.last_random_audio:
|
534 |
-
st.session_state.choice = ""
|
535 |
-
st.session_state.option_selected = False
|
536 |
-
st.session_state.last_random_audio = audio
|
537 |
-
st.session_state.current_file_id = audio
|
538 |
-
|
539 |
-
st.audio(data=st.session_state.current_array,
|
540 |
-
sample_rate=st.session_state.current_sampling_rate,
|
541 |
-
format="audio/wav")
|
542 |
-
|
543 |
-
if st.button("Transcribe Random Audio"):
|
544 |
-
if audio:
|
545 |
-
st.session_state.option_selected = False
|
546 |
-
option_1_text, option_2_text = process_random_file(audio)
|
547 |
-
st.session_state.option_1 = option_1_text
|
548 |
-
st.session_state.option_2 = option_2_text
|
549 |
-
st.session_state.transcribed = True
|
550 |
|
551 |
text_containers = st.columns([1, 1])
|
552 |
name_containers = st.columns([1, 1])
|
@@ -579,203 +665,32 @@ def arena():
|
|
579 |
with c4:
|
580 |
st.button("Prefer None",on_click=on_option_none_click)
|
581 |
|
582 |
-
|
583 |
-
st.
|
584 |
-
|
585 |
-
else:
|
586 |
-
st.write('You have not entered your email and name yet')
|
587 |
-
st.write('Please Navigate to login page in the dropdown menu')
|
588 |
-
|
589 |
-
|
590 |
-
def dashboard():
|
591 |
-
if 'logged_in' not in st.session_state:
|
592 |
-
st.session_state.logged_in = False
|
593 |
-
|
594 |
-
if st.session_state.logged_in:
|
595 |
-
st.title('Model Arena Scoreboard')
|
596 |
-
|
597 |
-
df = pd.read_csv(SAVE_PATH)
|
598 |
-
if len(df) != 0:
|
599 |
-
metrics = calculate_metrics(df)
|
600 |
-
|
601 |
-
MODEL_DESCRIPTIONS = {
|
602 |
-
"Ori Prime": "Foundational, large, and stable.",
|
603 |
-
"Ori Swift": "Lighter and faster than Ori Prime.",
|
604 |
-
"Ori Apex": "The top-performing model, fast and stable.",
|
605 |
-
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
606 |
-
"DG" : "Deepgram Nova-2 API",
|
607 |
-
"Azure" : "Azure Speech Services API"
|
608 |
-
}
|
609 |
-
|
610 |
-
st.header('Model Descriptions')
|
611 |
-
|
612 |
-
cols = st.columns(2)
|
613 |
-
for idx, (model, description) in enumerate(MODEL_DESCRIPTIONS.items()):
|
614 |
-
with cols[idx % 2]:
|
615 |
-
st.markdown(f"""
|
616 |
-
<div style='padding: 1rem; border: 1px solid #e1e4e8; border-radius: 6px; margin-bottom: 1rem;'>
|
617 |
-
<h3 style='margin: 0; margin-bottom: 0.5rem;'>{model}</h3>
|
618 |
-
<p style='margin: 0; color: #6e7681;'>{description}</p>
|
619 |
-
</div>
|
620 |
-
""", unsafe_allow_html=True)
|
621 |
-
|
622 |
-
st.header('Overall Performance')
|
623 |
-
|
624 |
-
col1, col2, col3= st.columns(3)
|
625 |
-
|
626 |
-
with col1:
|
627 |
-
create_metric_container("Total Matches", len(df))
|
628 |
-
|
629 |
-
best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
|
630 |
-
with col2:
|
631 |
-
create_metric_container(
|
632 |
-
"Best Model",
|
633 |
-
get_model_abbreviation(best_model),
|
634 |
-
full_name=best_model
|
635 |
-
)
|
636 |
-
|
637 |
-
most_appearances = max(metrics.items(), key=lambda x: x[1]['appearances'])[0]
|
638 |
-
with col3:
|
639 |
-
create_metric_container(
|
640 |
-
"Most Used",
|
641 |
-
get_model_abbreviation(most_appearances),
|
642 |
-
full_name=most_appearances
|
643 |
-
)
|
644 |
-
|
645 |
-
st.header('Win Rates')
|
646 |
-
win_rate_chart = create_win_rate_chart(metrics)
|
647 |
-
st.plotly_chart(win_rate_chart, use_container_width=True)
|
648 |
-
|
649 |
-
st.header('Appearance Distribution')
|
650 |
-
appearance_chart = create_appearance_chart(metrics)
|
651 |
-
st.plotly_chart(appearance_chart, use_container_width=True)
|
652 |
-
|
653 |
-
st.header('Head-to-Head Analysis')
|
654 |
-
matrix_chart = create_head_to_head_matrix(df)
|
655 |
-
st.plotly_chart(matrix_chart, use_container_width=True)
|
656 |
-
|
657 |
-
st.header('Detailed Metrics')
|
658 |
-
metrics_df = pd.DataFrame.from_dict(metrics, orient='index')
|
659 |
-
metrics_df['win_rate'] = metrics_df['win_rate'].round(2)
|
660 |
-
metrics_df.drop(["avg_response_time","response_time_std"],axis=1,inplace=True)
|
661 |
-
# metrics_df['avg_response_time'] = metrics_df['avg_response_time'].round(3)
|
662 |
-
metrics_df.index = [get_model_abbreviation(model) for model in metrics_df.index]
|
663 |
-
st.dataframe(metrics_df)
|
664 |
-
|
665 |
-
st.header('Full Dataframe')
|
666 |
-
df = df.drop('path', axis=1)
|
667 |
-
df = df.drop(['Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_duration','email'],axis=1)
|
668 |
-
st.dataframe(df)
|
669 |
else:
|
670 |
-
st.
|
671 |
-
|
672 |
-
st.write('You have not entered your email and name yet')
|
673 |
-
st.write('Please Navigate to login page in the dropdown menu')
|
674 |
-
|
675 |
-
def help():
|
676 |
-
st.title("Help")
|
677 |
|
678 |
-
|
679 |
-
"""
|
680 |
-
# Ori Speech-To-Text Arena
|
681 |
-
|
682 |
-
## Introduction
|
683 |
-
|
684 |
-
Below are the general instructions for participating in the Ori Speech-To-Text Arena.
|
685 |
-
|
686 |
-
## Options:
|
687 |
-
There are three options for participating in the Ori Speech-To-Text Arena:
|
688 |
-
|
689 |
-
1. Compare different model by uploading your own audio file and submit it to the Arena
|
690 |
-
2. Compare different model by recording your own audio file and submit it to the Arena
|
691 |
-
3. Choose and compare from one of our randomly selected audio files
|
692 |
-
|
693 |
-
### 1. Compare different model by uploading your own audio file and submit it to the Arena
|
694 |
-
|
695 |
-
Steps:
|
696 |
-
1. Select the upload audio file option
|
697 |
-
""")
|
698 |
-
|
699 |
-
st.image("./images/1.png")
|
700 |
-
st.image("./images/2.png")
|
701 |
-
st.image("./images/3.png")
|
702 |
-
st.image("./images/4.png")
|
703 |
-
|
704 |
-
st.markdown("""
|
705 |
-
### 2. Compare different model by recording your own audio file and submit it to the Arena
|
706 |
-
|
707 |
-
Steps:
|
708 |
-
1. Select the record audio file option
|
709 |
-
""")
|
710 |
-
|
711 |
-
st.image("./images/5.png")
|
712 |
-
st.image("./images/6.png")
|
713 |
-
st.image("./images/7.png")
|
714 |
-
|
715 |
-
st.markdown("""
|
716 |
-
4. Rest of the steps remain same as above
|
717 |
-
|
718 |
-
### 3. Choose and compare from one of our randomly selected audio files
|
719 |
-
|
720 |
-
Steps:
|
721 |
-
1. Select the random audio file option
|
722 |
-
""")
|
723 |
-
|
724 |
-
st.image("./images/8.png")
|
725 |
-
st.image("./images/9.png")
|
726 |
-
|
727 |
-
st.markdown("""
|
728 |
-
4. Rest of the steps remain same as above
|
729 |
-
""")
|
730 |
|
731 |
-
|
732 |
|
733 |
-
|
734 |
-
|
735 |
-
|
736 |
-
|
737 |
-
def validate_name(name):
|
738 |
-
pattern = r'^[a-zA-Z\s-]{2,}$'
|
739 |
-
return re.match(pattern, name) is not None
|
740 |
-
|
741 |
-
def create_login_page():
|
742 |
-
st.title("Welcome to the Speech-To-Text Arena")
|
743 |
-
|
744 |
-
if 'logged_in' not in st.session_state:
|
745 |
-
st.session_state.logged_in = False
|
746 |
-
|
747 |
-
if not st.session_state.logged_in:
|
748 |
-
with st.form("login_form"):
|
749 |
-
st.subheader("Please Login")
|
750 |
-
|
751 |
-
email = st.text_input("Email")
|
752 |
-
name = st.text_input("Name")
|
753 |
-
|
754 |
-
submit_button = st.form_submit_button("Login")
|
755 |
-
|
756 |
-
if submit_button:
|
757 |
-
if not email or not name:
|
758 |
-
st.error("Please fill in all fields")
|
759 |
-
else:
|
760 |
-
if not validate_email(email):
|
761 |
-
st.error("Please enter a valid email address")
|
762 |
-
elif not validate_name(name):
|
763 |
-
st.error("Please enter a valid name (letters, spaces, and hyphens only)")
|
764 |
else:
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
demo_name = st.sidebar.selectbox("Choose a View\nTo view the help page choose the help view", page_names_to_funcs.keys())
|
781 |
-
page_names_to_funcs[demo_name]()
|
|
|
1 |
import streamlit as st
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import tempfile
|
5 |
+
from audio_recorder_streamlit import audio_recorder
|
6 |
+
import numpy as np
|
7 |
+
import time
|
8 |
+
import requests
|
9 |
import io
|
10 |
import base64
|
|
|
|
|
|
|
11 |
import random
|
12 |
+
import librosa
|
13 |
+
import fsspec
|
|
|
|
|
14 |
import pandas as pd
|
15 |
import plotly.express as px
|
16 |
import plotly.graph_objects as go
|
17 |
+
import boto3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
fs = fsspec.filesystem(
|
20 |
+
's3',
|
21 |
+
key=os.getenv("AWS_ACCESS_KEY"),
|
22 |
+
secret=os.getenv("AWS_SECRET_KEY")
|
23 |
+
)
|
|
|
24 |
|
25 |
+
s3_client = boto3.client(
|
26 |
+
's3',
|
27 |
+
aws_access_key_id=os.getenv("AWS_ACCESS_KEY"),
|
28 |
+
aws_secret_access_key=os.getenv("AWS_SECRET_KEY")
|
29 |
+
)
|
30 |
|
31 |
+
SAVE_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('RESULTS_KEY')}"
|
32 |
+
EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
|
33 |
+
TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
|
34 |
CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
+
def write_email(email):
|
38 |
+
if fs.exists(EMAIL_PATH):
|
39 |
+
with fs.open(EMAIL_PATH, 'rb') as f:
|
40 |
+
existing_content = f.read().decode('utf-8')
|
41 |
+
else:
|
42 |
+
existing_content = ''
|
43 |
|
44 |
+
new_content = existing_content + email + '\n'
|
|
|
|
|
45 |
|
46 |
+
with fs.open(EMAIL_PATH, 'wb') as f:
|
47 |
+
f.write(new_content.encode('utf-8'))
|
48 |
|
49 |
class ResultWriter:
|
50 |
def __init__(self, save_path):
|
|
|
57 |
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
58 |
]
|
59 |
|
60 |
+
if not fs.exists(save_path):
|
61 |
+
print("CSV File not found in s3 bucket creating a new one",save_path)
|
62 |
+
with fs.open(save_path, 'wb') as f:
|
63 |
+
df = pd.DataFrame(columns=self.headers)
|
64 |
+
df.to_csv(f, index=False)
|
65 |
|
66 |
def write_result(self,user_email ,audio_path,option_1_duration_info,option_2_duration_info ,winner_model=None, loser_model=None, both_preferred=False, none_preferred=False):
|
67 |
result = {
|
|
|
87 |
result[f'{winner_model}_score'] = 1
|
88 |
|
89 |
if option_1_duration_info and option_1_duration_info[0]:
|
90 |
+
duration_key, duration_value = option_1_duration_info[0]
|
91 |
if duration_key in self.headers:
|
92 |
result[duration_key] = float(duration_value)
|
93 |
|
94 |
if option_2_duration_info and option_2_duration_info[0]:
|
95 |
+
duration_key, duration_value = option_2_duration_info[0]
|
96 |
if duration_key in self.headers:
|
97 |
result[duration_key] = float(duration_value)
|
98 |
|
99 |
+
self.write_to_s3(result)
|
|
|
|
|
100 |
|
101 |
+
def write_to_s3(self,result):
|
102 |
+
with fs.open(self.save_path, 'rb') as f:
|
103 |
+
df = pd.read_csv(f)
|
104 |
|
105 |
+
records = df.to_dict('records')
|
106 |
+
records.append(result)
|
107 |
+
df = pd.DataFrame(records)
|
108 |
+
with fs.open(self.save_path, 'wb') as f:
|
109 |
+
df.to_csv(f, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
|
|
111 |
|
112 |
+
def decode_audio_array(base64_string):
|
113 |
+
bytes_data = base64.b64decode(base64_string)
|
|
|
|
|
114 |
|
115 |
+
buffer = io.BytesIO(bytes_data)
|
116 |
+
audio_array = np.load(buffer)
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
return audio_array
|
119 |
|
120 |
+
def send_task(payload):
|
121 |
+
header = {
|
122 |
+
"Authorization": f"Bearer {os.getenv('CREATE_TASK_API_KEY')}"
|
123 |
+
}
|
124 |
+
response = requests.post(CREATE_TASK_URL,json=payload,headers=header)
|
125 |
+
try:
|
126 |
+
response = response.json()
|
127 |
+
except Exception:
|
128 |
+
return "error please try again"
|
129 |
|
130 |
+
if payload["task"] == "transcribe_with_fastapi":
|
131 |
+
return response["text"]
|
132 |
|
133 |
+
elif payload["task"] == "fetch_audio":
|
134 |
+
array = response["array"]
|
135 |
+
array = decode_audio_array(array)
|
136 |
+
sampling_rate = response["sample_rate"]
|
137 |
+
filepath = response["filepath"]
|
138 |
+
return array,sampling_rate,filepath
|
139 |
|
140 |
def encode_audio_array(audio_array):
|
141 |
buffer = io.BytesIO()
|
|
|
147 |
|
148 |
return base64_string
|
149 |
|
150 |
+
def call_function(model_name):
|
151 |
+
if st.session_state.current_audio_type == "recorded":
|
152 |
+
y,_ = librosa.load(st.session_state.audio_path,sr=22050,mono=True)
|
153 |
encoded_array = encode_audio_array(y)
|
154 |
payload = {
|
155 |
"task":"transcribe_with_fastapi",
|
|
|
162 |
payload = {
|
163 |
"task":"transcribe_with_fastapi",
|
164 |
"payload":{
|
165 |
+
"file_path":st.session_state.audio_path,
|
166 |
"model_name":model_name,
|
167 |
"audio_b64":False
|
168 |
}}
|
|
|
170 |
transcript = send_task(payload)
|
171 |
return transcript
|
172 |
|
173 |
+
def transcribe_audio():
|
174 |
+
|
175 |
+
models_list = ["Ori Apex", "Ori Apex XT", "deepgram", "Ori Swift", "Ori Prime","azure"]
|
176 |
+
model1_name, model2_name = random.sample(models_list, 2)
|
177 |
|
178 |
+
st.session_state.option_1_model_name = model1_name
|
179 |
+
st.session_state.option_2_model_name = model2_name
|
180 |
|
|
|
181 |
time_1 = time.time()
|
182 |
+
transcript1 = call_function(model1_name)
|
183 |
time_2 = time.time()
|
184 |
+
transcript2 = call_function(model2_name)
|
185 |
time_3 = time.time()
|
186 |
|
187 |
st.session_state.option_2_response_time = round(time_3 - time_2,3)
|
188 |
st.session_state.option_1_response_time = round(time_2 - time_1,3)
|
189 |
|
190 |
+
|
191 |
return transcript1, transcript2
|
192 |
|
193 |
+
def reset_state():
|
194 |
+
st.session_state.audio = None
|
195 |
+
st.session_state.current_audio_type = None
|
196 |
+
st.session_state.audio_path = ""
|
197 |
+
st.session_state.option_selected = False
|
198 |
+
st.session_state.transcribed = False
|
199 |
+
st.session_state.option_2_model_name = ""
|
200 |
+
st.session_state.option_1_model_name = ""
|
201 |
+
st.session_state.option_1 = ""
|
202 |
+
st.session_state.option_2 = ""
|
203 |
+
st.session_state.option_1_model_name_state = ""
|
204 |
+
st.session_state.option_2_model_name_state = ""
|
205 |
|
206 |
+
def on_option_1_click():
|
207 |
+
if st.session_state.transcribed and not st.session_state.option_selected:
|
208 |
+
st.session_state.option_1_model_name_state = f"👑 {st.session_state.option_1_model_name} 👑"
|
209 |
+
st.session_state.option_2_model_name_state = f"👎 {st.session_state.option_2_model_name} 👎"
|
210 |
+
st.session_state.choice = f"You chose Option 1. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
211 |
+
result_writer.write_result(
|
212 |
+
st.session_state.user_email,
|
213 |
+
st.session_state.audio_path,
|
214 |
+
winner_model=st.session_state.option_1_model_name,
|
215 |
+
loser_model=st.session_state.option_2_model_name,
|
216 |
+
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
217 |
+
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
|
218 |
+
)
|
219 |
+
st.session_state.option_selected = True
|
220 |
|
221 |
+
def on_option_2_click():
|
222 |
+
if st.session_state.transcribed and not st.session_state.option_selected:
|
223 |
+
st.session_state.option_2_model_name_state = f"👑 {st.session_state.option_2_model_name} 👑"
|
224 |
+
st.session_state.option_1_model_name_state = f"👎 {st.session_state.option_1_model_name} 👎"
|
225 |
+
st.session_state.choice = f"You chose Option 2. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
226 |
+
result_writer.write_result(
|
227 |
+
st.session_state.user_email,
|
228 |
+
st.session_state.audio_path,
|
229 |
+
winner_model=st.session_state.option_2_model_name,
|
230 |
+
loser_model=st.session_state.option_1_model_name,
|
231 |
+
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
232 |
+
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)]
|
233 |
+
)
|
234 |
+
st.session_state.option_selected = True
|
235 |
+
|
236 |
+
def on_option_both_click():
|
237 |
+
if st.session_state.transcribed and not st.session_state.option_selected:
|
238 |
+
st.session_state.option_2_model_name_state = f"👑 {st.session_state.option_2_model_name} 👑"
|
239 |
+
st.session_state.option_1_model_name_state = f"👑 {st.session_state.option_1_model_name} 👑"
|
240 |
+
st.session_state.choice = f"You chose Prefer both. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
241 |
+
result_writer.write_result(
|
242 |
+
st.session_state.user_email,
|
243 |
+
st.session_state.audio_path,
|
244 |
+
winner_model=st.session_state.option_1_model_name,
|
245 |
+
loser_model=st.session_state.option_2_model_name,
|
246 |
+
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
247 |
+
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)],
|
248 |
+
both_preferred=True
|
249 |
+
)
|
250 |
+
st.session_state.option_selected = True
|
251 |
|
252 |
+
def on_option_none_click():
|
253 |
+
if st.session_state.transcribed and not st.session_state.option_selected:
|
254 |
+
st.session_state.option_1_model_name_state = f"👎 {st.session_state.option_1_model_name} 👎"
|
255 |
+
st.session_state.option_2_model_name_state = f"👎 {st.session_state.option_2_model_name} 👎"
|
256 |
+
st.session_state.choice = f"You chose none option. Option 1 was {st.session_state.option_1_model_name} Option 2 was {st.session_state.option_2_model_name}"
|
257 |
+
result_writer.write_result(
|
258 |
+
st.session_state.user_email,
|
259 |
+
st.session_state.audio_path,
|
260 |
+
winner_model=st.session_state.option_1_model_name,
|
261 |
+
loser_model=st.session_state.option_2_model_name,
|
262 |
+
option_1_duration_info=[(f"{st.session_state.option_1_model_name}_duration",st.session_state.option_1_response_time)],
|
263 |
+
option_2_duration_info=[(f"{st.session_state.option_2_model_name}_duration",st.session_state.option_2_response_time)],
|
264 |
+
none_preferred=True
|
265 |
+
)
|
266 |
+
st.session_state.option_selected = True
|
267 |
|
268 |
+
def on_click_transcribe():
|
269 |
+
if st.session_state.has_audio:
|
270 |
+
option_1_text, option_2_text = transcribe_audio(
|
271 |
+
)
|
272 |
+
st.session_state.option_1 = option_1_text
|
273 |
+
st.session_state.option_2 = option_2_text
|
274 |
+
st.session_state.transcribed = True
|
275 |
+
st.session_state.option_1_model_name_state = ""
|
276 |
+
st.session_state.option_2_model_name_state = ""
|
277 |
|
278 |
+
def on_random_click():
|
279 |
+
reset_state()
|
280 |
+
fetch_audio_payload = {"task": "fetch_audio"}
|
281 |
+
array, sampling_rate, filepath = send_task(fetch_audio_payload)
|
282 |
+
st.session_state.audio = {"data":array,"sample_rate":sampling_rate,"format":"audio/wav"}
|
283 |
+
st.session_state.has_audio = True
|
284 |
+
st.session_state.current_audio_type = "random"
|
285 |
+
st.session_state.audio_path = filepath
|
286 |
+
st.session_state.option_selected = None
|
287 |
+
|
288 |
+
result_writer = ResultWriter(SAVE_PATH)
|
289 |
+
|
290 |
+
def validate_email(email):
|
291 |
+
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
|
292 |
+
return re.match(pattern, email) is not None
|
293 |
|
294 |
def get_model_abbreviation(model_name):
|
295 |
abbrev_map = {
|
|
|
411 |
else:
|
412 |
st.markdown(f"<h3 style='margin-top: 0;'>{value}</h3>", unsafe_allow_html=True)
|
413 |
|
414 |
+
def on_refresh_click():
|
415 |
+
with fs.open(SAVE_PATH, 'rb') as f:
|
416 |
+
st.session_state.df = pd.read_csv(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
+
def dashboard():
|
419 |
+
st.title('Model Arena Scoreboard')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
+
if "df" not in st.session_state:
|
422 |
+
with fs.open(SAVE_PATH, 'rb') as f:
|
423 |
+
st.session_state.df = pd.read_csv(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
+
st.button("Refresh",on_click=on_refresh_click)
|
426 |
+
|
427 |
+
if len(st.session_state.df) != 0:
|
428 |
+
metrics = calculate_metrics(st.session_state.df)
|
429 |
+
|
430 |
+
MODEL_DESCRIPTIONS = {
|
431 |
+
"Ori Prime": "Foundational, large, and stable.",
|
432 |
+
"Ori Swift": "Lighter and faster than Ori Prime.",
|
433 |
+
"Ori Apex": "The top-performing model, fast and stable.",
|
434 |
+
"Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.",
|
435 |
+
"DG" : "Deepgram Nova-2 API",
|
436 |
+
"Azure" : "Azure Speech Services API"
|
437 |
+
}
|
438 |
+
|
439 |
+
st.header('Model Descriptions')
|
440 |
+
|
441 |
+
cols = st.columns(2)
|
442 |
+
for idx, (model, description) in enumerate(MODEL_DESCRIPTIONS.items()):
|
443 |
+
with cols[idx % 2]:
|
444 |
+
st.markdown(f"""
|
445 |
+
<div style='padding: 1rem; border: 1px solid #e1e4e8; border-radius: 6px; margin-bottom: 1rem;'>
|
446 |
+
<h3 style='margin: 0; margin-bottom: 0.5rem;'>{model}</h3>
|
447 |
+
<p style='margin: 0; color: #6e7681;'>{description}</p>
|
448 |
+
</div>
|
449 |
+
""", unsafe_allow_html=True)
|
450 |
+
|
451 |
+
st.header('Overall Performance')
|
452 |
+
|
453 |
+
col1, col2, col3= st.columns(3)
|
454 |
+
|
455 |
+
with col1:
|
456 |
+
create_metric_container("Total Matches", len(st.session_state.df))
|
457 |
+
|
458 |
+
best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
|
459 |
+
with col2:
|
460 |
+
create_metric_container(
|
461 |
+
"Best Model",
|
462 |
+
get_model_abbreviation(best_model),
|
463 |
+
full_name=best_model
|
464 |
+
)
|
465 |
+
|
466 |
+
most_appearances = max(metrics.items(), key=lambda x: x[1]['appearances'])[0]
|
467 |
+
with col3:
|
468 |
+
create_metric_container(
|
469 |
+
"Most Used",
|
470 |
+
get_model_abbreviation(most_appearances),
|
471 |
+
full_name=most_appearances
|
472 |
+
)
|
473 |
+
|
474 |
+
metrics_df = pd.DataFrame.from_dict(metrics, orient='index')
|
475 |
+
metrics_df['win_rate'] = metrics_df['win_rate'].round(2)
|
476 |
+
metrics_df.drop(["avg_response_time","response_time_std"],axis=1,inplace=True)
|
477 |
+
metrics_df.index = [get_model_abbreviation(model) for model in metrics_df.index]
|
478 |
+
st.dataframe(metrics_df,use_container_width=True)
|
479 |
+
|
480 |
+
st.header('Win Rates')
|
481 |
+
win_rate_chart = create_win_rate_chart(metrics)
|
482 |
+
st.plotly_chart(win_rate_chart, use_container_width=True)
|
483 |
+
|
484 |
+
st.header('Appearance Distribution')
|
485 |
+
appearance_chart = create_appearance_chart(metrics)
|
486 |
+
st.plotly_chart(appearance_chart, use_container_width=True)
|
487 |
+
|
488 |
+
st.header('Head-to-Head Analysis')
|
489 |
+
matrix_chart = create_head_to_head_matrix(st.session_state.df)
|
490 |
+
st.plotly_chart(matrix_chart, use_container_width=True)
|
491 |
+
|
492 |
+
st.header('Full Dataframe')
|
493 |
+
st.dataframe(st.session_state.df.drop(['path','Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_duration','email'],axis=1),use_container_width=True)
|
494 |
+
else:
|
495 |
+
st.write("No Data to show")
|
496 |
+
|
497 |
+
def about():
|
498 |
+
st.title("About")
|
499 |
+
|
500 |
+
st.markdown(
|
501 |
+
"""
|
502 |
+
# Ori Speech-To-Text Arena
|
503 |
+
"""
|
504 |
+
)
|
505 |
+
|
506 |
+
st.markdown(
|
507 |
+
"""## Arena
|
508 |
+
"""
|
509 |
)
|
|
|
510 |
|
511 |
+
st.markdown(
|
512 |
+
"""
|
513 |
+
* The Arena allows a user to record their audios, in which speech will be recognized by two randomly selected models. After listening to the audio, and evaluating the output from both the models, the user can vote on which transcription they prefer. Due to the risks of human bias and abuse, model names are revealed only after a vote is submitted."""
|
514 |
+
)
|
515 |
+
|
516 |
+
st.markdown(
|
517 |
+
"## Scoreboard"
|
518 |
+
)
|
519 |
+
|
520 |
+
st.markdown(
|
521 |
+
""" * The Scoreboard shows the performance of the models in the Arena. The user can see the overall performance of the models, the model with the highest win rate, and the model with the most appearances. The user can also see the win rates of each model, as well as the appearance distribution of each model."""
|
522 |
+
)
|
523 |
+
|
524 |
+
st.markdown(
|
525 |
+
"## Contact Us"
|
526 |
+
)
|
527 |
+
|
528 |
+
st.markdown(
|
529 |
+
"To inquire about our speech-to-text models and APIs, you can submit your email using the form below."
|
530 |
+
)
|
531 |
+
|
532 |
+
with st.form("login_form"):
|
533 |
+
st.subheader("Please Enter you Email")
|
534 |
+
|
535 |
+
email = st.text_input("Email")
|
536 |
+
|
537 |
+
submit_button = st.form_submit_button("Submit")
|
538 |
+
|
539 |
+
if submit_button:
|
540 |
+
if not email:
|
541 |
+
st.error("Please fill in all fields")
|
542 |
+
else:
|
543 |
+
if not validate_email(email):
|
544 |
+
st.error("Please enter a valid email address")
|
545 |
+
else:
|
546 |
+
st.session_state.logged_in = True
|
547 |
+
st.session_state.user_email = email
|
548 |
+
write_email(st.session_state.user_email)
|
549 |
+
st.success("Thanks for submitting your email, our team will be in touch with you shortly!")
|
550 |
+
|
551 |
+
def main():
|
552 |
+
|
553 |
+
st.title("⚔️ Ori Speech-To-Text Arena ⚔️")
|
554 |
+
|
555 |
+
if "has_audio" not in st.session_state:
|
556 |
+
st.session_state.has_audio = False
|
557 |
+
if "audio" not in st.session_state:
|
558 |
+
st.session_state.audio = None
|
559 |
+
if "audio_path" not in st.session_state:
|
560 |
+
st.session_state.audio_path = ""
|
561 |
+
if "option_1" not in st.session_state:
|
562 |
+
st.session_state.option_1 = ""
|
563 |
+
if "option_2" not in st.session_state:
|
564 |
+
st.session_state.option_2 = ""
|
565 |
+
if "transcribed" not in st.session_state:
|
566 |
+
st.session_state.transcribed = False
|
567 |
+
if "option_1_model_name_state" not in st.session_state:
|
568 |
+
st.session_state.option_1_model_name_state = ""
|
569 |
+
if "option_1_model_name" not in st.session_state:
|
570 |
+
st.session_state.option_1_model_name = ""
|
571 |
+
if "option_2_model_name" not in st.session_state:
|
572 |
+
st.session_state.option_2_model_name = ""
|
573 |
+
if "option_2_model_name_state" not in st.session_state:
|
574 |
+
st.session_state.option_2_model_name_state = ""
|
575 |
+
if "user_email" not in st.session_state:
|
576 |
+
st.session_state.user_email = ""
|
577 |
|
|
|
578 |
if 'logged_in' not in st.session_state:
|
579 |
st.session_state.logged_in = False
|
580 |
|
581 |
+
arena, scoreboard,about_tab = st.tabs(["Arena", "Scoreboard","About"])
|
582 |
+
|
583 |
+
with arena:
|
584 |
+
INSTR = """
|
585 |
+
## Instructions:
|
586 |
+
* Record audio to recognise speech (or press 🎲 for random Audio).
|
587 |
+
* Click on transcribe audio button to commence the transcription process.
|
588 |
+
* Read the two options one after the other while listening to the audio.
|
589 |
+
* Vote on which transcript you prefer.
|
590 |
+
* Note:
|
591 |
+
* Model names are revealed after the vote is cast.
|
592 |
+
* Currently only Indian Hindi language is supported, and
|
593 |
+
the results will be in Hinglish (Hindi in Latin script)
|
594 |
+
* Random audios are only in hindi
|
595 |
+
* It may take up to 30 seconds for speech recognition in some cases.
|
596 |
+
""".strip()
|
597 |
+
|
598 |
+
st.markdown(INSTR)
|
599 |
+
|
600 |
+
col1, col2 = st.columns([1, 1])
|
601 |
+
|
602 |
+
with col1:
|
603 |
+
st.markdown("### Record Audio")
|
604 |
+
with st.container():
|
605 |
+
audio_bytes = audio_recorder(
|
606 |
+
text="🎙️ Click to Record",
|
607 |
+
pause_threshold=3,
|
608 |
+
icon_size="2x",
|
609 |
+
key="audio_recorder",
|
610 |
+
sample_rate=16_000
|
611 |
+
)
|
612 |
+
if audio_bytes and audio_bytes != st.session_state.get('last_recorded_audio'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
reset_state()
|
614 |
st.session_state.last_recorded_audio = audio_bytes
|
615 |
+
st.session_state.audio = {"data":audio_bytes,"format":"audio/wav"}
|
616 |
+
st.session_state.current_audio_type = "recorded"
|
617 |
+
st.session_state.has_audio = True
|
618 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
|
619 |
+
tmp_file.write(audio_bytes)
|
620 |
+
os.makedirs(TEMP_DIR, exist_ok=True)
|
621 |
+
s3_client.put_object(Bucket=os.getenv('AWS_BUCKET_NAME'), Key=f"{os.getenv('AUDIOS_KEY')}/{tmp_file.name.split('/')[-1]}", Body=audio_bytes)
|
622 |
+
st.session_state.audio_path = tmp_file.name
|
623 |
+
st.session_state.option_selected = None
|
624 |
+
|
625 |
+
with col2:
|
626 |
+
st.markdown("### Random Audio Example")
|
627 |
+
with st.container():
|
628 |
+
st.button("🎲 Random Audio",on_click=on_random_click)
|
629 |
+
|
630 |
+
if st.session_state.has_audio:
|
631 |
+
st.audio(**st.session_state.audio)
|
632 |
+
|
633 |
+
|
634 |
+
with st.container():
|
635 |
+
st.button("📝 Transcribe Audio",on_click=on_click_transcribe,use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
|
637 |
text_containers = st.columns([1, 1])
|
638 |
name_containers = st.columns([1, 1])
|
|
|
665 |
with c4:
|
666 |
st.button("Prefer None",on_click=on_option_none_click)
|
667 |
|
668 |
+
with scoreboard:
|
669 |
+
if st.session_state.logged_in:
|
670 |
+
dashboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
else:
|
672 |
+
with st.form("contact_us_form"):
|
673 |
+
st.subheader("Please Enter you Email")
|
|
|
|
|
|
|
|
|
|
|
674 |
|
675 |
+
email = st.text_input("Email")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
676 |
|
677 |
+
submit_button = st.form_submit_button("Submit")
|
678 |
|
679 |
+
if submit_button:
|
680 |
+
if not email:
|
681 |
+
st.error("Please fill in all fields")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
682 |
else:
|
683 |
+
if not validate_email(email):
|
684 |
+
st.error("Please enter a valid email address")
|
685 |
+
else:
|
686 |
+
st.session_state.logged_in = True
|
687 |
+
st.session_state.user_email = email
|
688 |
+
write_email(st.session_state.user_email)
|
689 |
+
st.success("Thanks for submitting your email")
|
690 |
+
if st.session_state.logged_in:
|
691 |
+
dashboard()
|
692 |
+
|
693 |
+
with about_tab:
|
694 |
+
about()
|
695 |
+
|
696 |
+
main()
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -6,3 +6,5 @@ plotly==5.24.1
|
|
6 |
requests==2.32.3
|
7 |
scipy
|
8 |
streamlit==1.40.2
|
|
|
|
|
|
6 |
requests==2.32.3
|
7 |
scipy
|
8 |
streamlit==1.40.2
|
9 |
+
fsspec==2024.10.0
|
10 |
+
boto3
|