moved writing results to csv to tasks api
Browse files
app.py
CHANGED
@@ -15,6 +15,8 @@ import pandas as pd
|
|
15 |
import plotly.express as px
|
16 |
import plotly.graph_objects as go
|
17 |
import boto3
|
|
|
|
|
18 |
|
19 |
fs = fsspec.filesystem(
|
20 |
's3',
|
@@ -29,6 +31,8 @@ s3_client = boto3.client(
|
|
29 |
)
|
30 |
|
31 |
SAVE_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('RESULTS_KEY')}"
|
|
|
|
|
32 |
EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
|
33 |
TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
|
34 |
CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
|
@@ -57,87 +61,41 @@ class ResultWriter:
|
|
57 |
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
58 |
]
|
59 |
|
|
|
|
|
60 |
if not fs.exists(save_path):
|
61 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
62 |
with fs.open(save_path, 'wb') as f:
|
63 |
df = pd.DataFrame(columns=self.headers)
|
64 |
df.to_csv(f, index=False)
|
65 |
|
66 |
-
def
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
if outcome == 'win':
|
83 |
-
scoreA = 1
|
84 |
-
scoreB = 0
|
85 |
-
elif outcome == 'tie':
|
86 |
-
scoreA = 0.5
|
87 |
-
scoreB = 0.5
|
88 |
-
else: # no_result
|
89 |
-
# No change in ratings for no result
|
90 |
-
return rating_a, rating_b
|
91 |
-
|
92 |
-
newRatingA = rating_a + kFactor * (scoreA - expectedA)
|
93 |
-
newRatingB = rating_b + kFactor * (scoreB - expectedB)
|
94 |
-
|
95 |
-
return round(newRatingA, 3), round(newRatingB, 3)
|
96 |
-
|
97 |
-
def write_result(self,user_email ,audio_path,option_1_duration_info,option_2_duration_info ,winner_model=None, loser_model=None, both_preferred=False, none_preferred=False):
|
98 |
-
result = {
|
99 |
-
'email': user_email,
|
100 |
-
'path': audio_path,
|
101 |
-
'Ori Apex_score': 0, 'Ori Apex XT_score': 0, 'deepgram_score': 0, 'Ori Swift_score': 0, 'Ori Prime_score': 0,
|
102 |
-
'Ori Apex_appearance': 0, 'Ori Apex XT_appearance': 0, 'deepgram_appearance': 0, 'Ori Swift_appearance': 0, 'Ori Prime_appearance': 0,
|
103 |
-
'Ori Apex_duration':0, 'Ori Apex XT_duration':0, 'deepgram_duration':0, 'Ori Swift_duration':0, 'Ori Prime_duration':0,'azure_score':0,'azure_appearance':0,'azure_duration':0
|
104 |
}
|
105 |
|
106 |
-
|
107 |
-
result[f'{winner_model}_appearance'] = 1
|
108 |
-
|
109 |
-
if loser_model:
|
110 |
-
result[f'{loser_model}_appearance'] = 1
|
111 |
-
|
112 |
-
if both_preferred:
|
113 |
-
if winner_model:
|
114 |
-
result[f'{winner_model}_score'] = 1
|
115 |
-
if loser_model:
|
116 |
-
result[f'{loser_model}_score'] = 1
|
117 |
-
elif not none_preferred and winner_model:
|
118 |
-
result[f'{winner_model}_score'] = 1
|
119 |
|
120 |
-
if option_1_duration_info and option_1_duration_info[0]:
|
121 |
-
duration_key, duration_value = option_1_duration_info[0]
|
122 |
-
if duration_key in self.headers:
|
123 |
-
result[duration_key] = float(duration_value)
|
124 |
-
|
125 |
-
if option_2_duration_info and option_2_duration_info[0]:
|
126 |
-
duration_key, duration_value = option_2_duration_info[0]
|
127 |
-
if duration_key in self.headers:
|
128 |
-
result[duration_key] = float(duration_value)
|
129 |
-
|
130 |
-
self.write_to_s3(result)
|
131 |
-
|
132 |
-
def write_to_s3(self,result):
|
133 |
-
with fs.open(self.save_path, 'rb') as f:
|
134 |
-
df = pd.read_csv(f)
|
135 |
-
|
136 |
-
records = df.to_dict('records')
|
137 |
-
records.append(result)
|
138 |
-
df = pd.DataFrame(records)
|
139 |
-
with fs.open(self.save_path, 'wb') as f:
|
140 |
-
df.to_csv(f, index=False)
|
141 |
|
142 |
|
143 |
def decode_audio_array(base64_string):
|
@@ -433,6 +391,33 @@ def create_head_to_head_matrix(df):
|
|
433 |
|
434 |
return fig
|
435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
def create_metric_container(label, value, full_name=None):
|
437 |
container = st.container()
|
438 |
with container:
|
@@ -447,12 +432,36 @@ def on_refresh_click():
|
|
447 |
with fs.open(SAVE_PATH, 'rb') as f:
|
448 |
st.session_state.df = pd.read_csv(f)
|
449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
def dashboard():
|
451 |
st.title('Model Arena Scoreboard')
|
452 |
|
453 |
if "df" not in st.session_state:
|
454 |
with fs.open(SAVE_PATH, 'rb') as f:
|
455 |
st.session_state.df = pd.read_csv(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
st.button("Refresh",on_click=on_refresh_click)
|
458 |
|
@@ -487,7 +496,8 @@ def dashboard():
|
|
487 |
with col1:
|
488 |
create_metric_container("Total Matches", len(st.session_state.df))
|
489 |
|
490 |
-
best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
|
|
|
491 |
with col2:
|
492 |
create_metric_container(
|
493 |
"Best Model",
|
@@ -517,6 +527,12 @@ def dashboard():
|
|
517 |
appearance_chart = create_appearance_chart(metrics)
|
518 |
st.plotly_chart(appearance_chart, use_container_width=True)
|
519 |
|
|
|
|
|
|
|
|
|
|
|
|
|
520 |
st.header('Head-to-Head Analysis')
|
521 |
matrix_chart = create_head_to_head_matrix(st.session_state.df)
|
522 |
st.plotly_chart(matrix_chart, use_container_width=True)
|
|
|
15 |
import plotly.express as px
|
16 |
import plotly.graph_objects as go
|
17 |
import boto3
|
18 |
+
import json
|
19 |
+
from plotly.subplots import make_subplots
|
20 |
|
21 |
fs = fsspec.filesystem(
|
22 |
's3',
|
|
|
31 |
)
|
32 |
|
33 |
SAVE_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('RESULTS_KEY')}"
|
34 |
+
ELO_JSON_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_JSON_PATH')}"
|
35 |
+
ELO_CSV_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_CSV_KEY')}"
|
36 |
EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
|
37 |
TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
|
38 |
CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
|
|
|
61 |
'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
|
62 |
]
|
63 |
|
64 |
+
self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
|
65 |
+
|
66 |
if not fs.exists(save_path):
|
67 |
print("CSV File not found in s3 bucket creating a new one",save_path)
|
68 |
with fs.open(save_path, 'wb') as f:
|
69 |
df = pd.DataFrame(columns=self.headers)
|
70 |
df.to_csv(f, index=False)
|
71 |
|
72 |
+
def write_result(self,
|
73 |
+
user_email,
|
74 |
+
audio_path,
|
75 |
+
option_1_duration_info,
|
76 |
+
option_2_duration_info,
|
77 |
+
winner_model=None,
|
78 |
+
loser_model=None,
|
79 |
+
both_preferred=False,
|
80 |
+
none_preferred=False
|
81 |
+
):
|
82 |
|
83 |
+
payload = {
|
84 |
+
"task":"write_result",
|
85 |
+
"payload":{
|
86 |
+
"winner_model":winner_model,
|
87 |
+
"loser_model":loser_model,
|
88 |
+
"both_preferred":both_preferred,
|
89 |
+
"none_preferred":none_preferred,
|
90 |
+
"user_email":user_email,
|
91 |
+
"audio_path":audio_path,
|
92 |
+
"option_1_duration_info":option_1_duration_info,
|
93 |
+
"option_2_duration_info":option_2_duration_info
|
94 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
}
|
96 |
|
97 |
+
send_task(payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
|
101 |
def decode_audio_array(base64_string):
|
|
|
391 |
|
392 |
return fig
|
393 |
|
394 |
+
def create_elo_chart(df):
|
395 |
+
fig = make_subplots(rows=1, cols=1,
|
396 |
+
subplot_titles=('ELO Rating Progression'),
|
397 |
+
row_heights=[0.7])
|
398 |
+
|
399 |
+
for column in df.columns:
|
400 |
+
fig.add_trace(
|
401 |
+
go.Scatter(
|
402 |
+
x=list(range(len(df))),
|
403 |
+
y=df[column],
|
404 |
+
name=column,
|
405 |
+
mode='lines+markers'
|
406 |
+
),
|
407 |
+
row=1, col=1
|
408 |
+
)
|
409 |
+
|
410 |
+
fig.update_layout(
|
411 |
+
title='Model ELO Ratings Analysis',
|
412 |
+
showlegend=True,
|
413 |
+
hovermode='x unified'
|
414 |
+
)
|
415 |
+
|
416 |
+
fig.update_xaxes(title_text='Match Number', row=1, col=1)
|
417 |
+
fig.update_xaxes(title_text='Models', row=2, col=1)
|
418 |
+
|
419 |
+
return fig
|
420 |
+
|
421 |
def create_metric_container(label, value, full_name=None):
|
422 |
container = st.container()
|
423 |
with container:
|
|
|
432 |
with fs.open(SAVE_PATH, 'rb') as f:
|
433 |
st.session_state.df = pd.read_csv(f)
|
434 |
|
435 |
+
try:
|
436 |
+
with fs.open(ELO_JSON_PATH,'r') as f:
|
437 |
+
st.session_state.elo_json = json.load(f)
|
438 |
+
except Exception:
|
439 |
+
st.session_state.elo_json = None
|
440 |
+
|
441 |
+
try:
|
442 |
+
with fs.open(ELO_CSV_PATH,'rb') as f:
|
443 |
+
st.session_state.elo_df = pd.read_csv(f)
|
444 |
+
except Exception:
|
445 |
+
st.session_state.elo_df = None
|
446 |
+
|
447 |
def dashboard():
|
448 |
st.title('Model Arena Scoreboard')
|
449 |
|
450 |
if "df" not in st.session_state:
|
451 |
with fs.open(SAVE_PATH, 'rb') as f:
|
452 |
st.session_state.df = pd.read_csv(f)
|
453 |
+
if "elo_json" not in st.session_state:
|
454 |
+
try:
|
455 |
+
with fs.open(ELO_JSON_PATH,'r') as f:
|
456 |
+
st.session_state.elo_json = json.load(f)
|
457 |
+
except Exception:
|
458 |
+
st.session_state.elo_json = None
|
459 |
+
if "elo_df" not in st.session_state:
|
460 |
+
try:
|
461 |
+
with fs.open(ELO_CSV_PATH,'rb') as f:
|
462 |
+
st.session_state.elo_df = pd.read_csv(f)
|
463 |
+
except Exception:
|
464 |
+
st.session_state.elo_df = None
|
465 |
|
466 |
st.button("Refresh",on_click=on_refresh_click)
|
467 |
|
|
|
496 |
with col1:
|
497 |
create_metric_container("Total Matches", len(st.session_state.df))
|
498 |
|
499 |
+
# best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
|
500 |
+
best_model = max(st.session_state.elo_json.items(), key=lambda x: x[1])[0] if st.session_state.elo_json else max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
|
501 |
with col2:
|
502 |
create_metric_container(
|
503 |
"Best Model",
|
|
|
527 |
appearance_chart = create_appearance_chart(metrics)
|
528 |
st.plotly_chart(appearance_chart, use_container_width=True)
|
529 |
|
530 |
+
if st.session_state.elo_json and st.session_state.elo_df:
|
531 |
+
st.header('Elo Ratings')
|
532 |
+
st.dataframe(pd.DataFrame(st.session_state.elo_json,index=[0]),use_container_width=True)
|
533 |
+
elo_progression_chart = create_elo_chart(st.session_state.elo_df)
|
534 |
+
st.plotly_chart(elo_progression_chart, use_container_width=True)
|
535 |
+
|
536 |
st.header('Head-to-Head Analysis')
|
537 |
matrix_chart = create_head_to_head_matrix(st.session_state.df)
|
538 |
st.plotly_chart(matrix_chart, use_container_width=True)
|