chinmayc3 commited on
Commit
b67d31e
·
1 Parent(s): 5abd25c

moved writing results to csv to tasks api

Browse files
Files changed (1) hide show
  1. app.py +88 -72
app.py CHANGED
@@ -15,6 +15,8 @@ import pandas as pd
15
  import plotly.express as px
16
  import plotly.graph_objects as go
17
  import boto3
 
 
18
 
19
  fs = fsspec.filesystem(
20
  's3',
@@ -29,6 +31,8 @@ s3_client = boto3.client(
29
  )
30
 
31
  SAVE_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('RESULTS_KEY')}"
 
 
32
  EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
33
  TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
34
  CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
@@ -57,87 +61,41 @@ class ResultWriter:
57
  'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
58
  ]
59
 
 
 
60
  if not fs.exists(save_path):
61
  print("CSV File not found in s3 bucket creating a new one",save_path)
62
  with fs.open(save_path, 'wb') as f:
63
  df = pd.DataFrame(columns=self.headers)
64
  df.to_csv(f, index=False)
65
 
66
- def expected_rating(self, rating_a, rating_b):
67
- return 1 / (1 + 10 ** ((rating_a - rating_b) / 400))
68
-
 
 
 
 
 
 
 
69
 
70
- def updateElo(self,rating_a, rating_b, outcome, baseKFactor):
71
- # Calculate the rating difference
72
- ratingDiff = abs(rating_a - rating_b)
73
-
74
- if ratingDiff > 25:
75
- kFactor = baseKFactor + 10
76
- else:
77
- kFactor = baseKFactor
78
-
79
- expectedA = self.expected_rating(rating_a, rating_b)
80
- expectedB = self.expected_rating(rating_b, rating_a)
81
-
82
- if outcome == 'win':
83
- scoreA = 1
84
- scoreB = 0
85
- elif outcome == 'tie':
86
- scoreA = 0.5
87
- scoreB = 0.5
88
- else: # no_result
89
- # No change in ratings for no result
90
- return rating_a, rating_b
91
-
92
- newRatingA = rating_a + kFactor * (scoreA - expectedA)
93
- newRatingB = rating_b + kFactor * (scoreB - expectedB)
94
-
95
- return round(newRatingA, 3), round(newRatingB, 3)
96
-
97
- def write_result(self,user_email ,audio_path,option_1_duration_info,option_2_duration_info ,winner_model=None, loser_model=None, both_preferred=False, none_preferred=False):
98
- result = {
99
- 'email': user_email,
100
- 'path': audio_path,
101
- 'Ori Apex_score': 0, 'Ori Apex XT_score': 0, 'deepgram_score': 0, 'Ori Swift_score': 0, 'Ori Prime_score': 0,
102
- 'Ori Apex_appearance': 0, 'Ori Apex XT_appearance': 0, 'deepgram_appearance': 0, 'Ori Swift_appearance': 0, 'Ori Prime_appearance': 0,
103
- 'Ori Apex_duration':0, 'Ori Apex XT_duration':0, 'deepgram_duration':0, 'Ori Swift_duration':0, 'Ori Prime_duration':0,'azure_score':0,'azure_appearance':0,'azure_duration':0
104
  }
105
 
106
- if winner_model:
107
- result[f'{winner_model}_appearance'] = 1
108
-
109
- if loser_model:
110
- result[f'{loser_model}_appearance'] = 1
111
-
112
- if both_preferred:
113
- if winner_model:
114
- result[f'{winner_model}_score'] = 1
115
- if loser_model:
116
- result[f'{loser_model}_score'] = 1
117
- elif not none_preferred and winner_model:
118
- result[f'{winner_model}_score'] = 1
119
 
120
- if option_1_duration_info and option_1_duration_info[0]:
121
- duration_key, duration_value = option_1_duration_info[0]
122
- if duration_key in self.headers:
123
- result[duration_key] = float(duration_value)
124
-
125
- if option_2_duration_info and option_2_duration_info[0]:
126
- duration_key, duration_value = option_2_duration_info[0]
127
- if duration_key in self.headers:
128
- result[duration_key] = float(duration_value)
129
-
130
- self.write_to_s3(result)
131
-
132
- def write_to_s3(self,result):
133
- with fs.open(self.save_path, 'rb') as f:
134
- df = pd.read_csv(f)
135
-
136
- records = df.to_dict('records')
137
- records.append(result)
138
- df = pd.DataFrame(records)
139
- with fs.open(self.save_path, 'wb') as f:
140
- df.to_csv(f, index=False)
141
 
142
 
143
  def decode_audio_array(base64_string):
@@ -433,6 +391,33 @@ def create_head_to_head_matrix(df):
433
 
434
  return fig
435
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  def create_metric_container(label, value, full_name=None):
437
  container = st.container()
438
  with container:
@@ -447,12 +432,36 @@ def on_refresh_click():
447
  with fs.open(SAVE_PATH, 'rb') as f:
448
  st.session_state.df = pd.read_csv(f)
449
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  def dashboard():
451
  st.title('Model Arena Scoreboard')
452
 
453
  if "df" not in st.session_state:
454
  with fs.open(SAVE_PATH, 'rb') as f:
455
  st.session_state.df = pd.read_csv(f)
 
 
 
 
 
 
 
 
 
 
 
 
456
 
457
  st.button("Refresh",on_click=on_refresh_click)
458
 
@@ -487,7 +496,8 @@ def dashboard():
487
  with col1:
488
  create_metric_container("Total Matches", len(st.session_state.df))
489
 
490
- best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
 
491
  with col2:
492
  create_metric_container(
493
  "Best Model",
@@ -517,6 +527,12 @@ def dashboard():
517
  appearance_chart = create_appearance_chart(metrics)
518
  st.plotly_chart(appearance_chart, use_container_width=True)
519
 
 
 
 
 
 
 
520
  st.header('Head-to-Head Analysis')
521
  matrix_chart = create_head_to_head_matrix(st.session_state.df)
522
  st.plotly_chart(matrix_chart, use_container_width=True)
 
15
  import plotly.express as px
16
  import plotly.graph_objects as go
17
  import boto3
18
+ import json
19
+ from plotly.subplots import make_subplots
20
 
21
  fs = fsspec.filesystem(
22
  's3',
 
31
  )
32
 
33
  SAVE_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('RESULTS_KEY')}"
34
+ ELO_JSON_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_JSON_PATH')}"
35
+ ELO_CSV_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('ELO_CSV_KEY')}"
36
  EMAIL_PATH = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('EMAILS_KEY')}"
37
  TEMP_DIR = f"s3://{os.getenv('AWS_BUCKET_NAME')}/{os.getenv('AUDIOS_KEY')}"
38
  CREATE_TASK_URL = os.getenv("CREATE_TASK_URL")
 
61
  'Ori Apex_duration', 'Ori Apex XT_duration', 'deepgram_duration', 'Ori Swift_duration', 'Ori Prime_duration','azure_score','azure_appearance','azure_duration'
62
  ]
63
 
64
+ self.models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure']
65
+
66
  if not fs.exists(save_path):
67
  print("CSV File not found in s3 bucket creating a new one",save_path)
68
  with fs.open(save_path, 'wb') as f:
69
  df = pd.DataFrame(columns=self.headers)
70
  df.to_csv(f, index=False)
71
 
72
+ def write_result(self,
73
+ user_email,
74
+ audio_path,
75
+ option_1_duration_info,
76
+ option_2_duration_info,
77
+ winner_model=None,
78
+ loser_model=None,
79
+ both_preferred=False,
80
+ none_preferred=False
81
+ ):
82
 
83
+ payload = {
84
+ "task":"write_result",
85
+ "payload":{
86
+ "winner_model":winner_model,
87
+ "loser_model":loser_model,
88
+ "both_preferred":both_preferred,
89
+ "none_preferred":none_preferred,
90
+ "user_email":user_email,
91
+ "audio_path":audio_path,
92
+ "option_1_duration_info":option_1_duration_info,
93
+ "option_2_duration_info":option_2_duration_info
94
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
 
97
+ send_task(payload)
 
 
 
 
 
 
 
 
 
 
 
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
 
101
  def decode_audio_array(base64_string):
 
391
 
392
  return fig
393
 
394
+ def create_elo_chart(df):
395
+ fig = make_subplots(rows=1, cols=1,
396
+ subplot_titles=('ELO Rating Progression'),
397
+ row_heights=[0.7])
398
+
399
+ for column in df.columns:
400
+ fig.add_trace(
401
+ go.Scatter(
402
+ x=list(range(len(df))),
403
+ y=df[column],
404
+ name=column,
405
+ mode='lines+markers'
406
+ ),
407
+ row=1, col=1
408
+ )
409
+
410
+ fig.update_layout(
411
+ title='Model ELO Ratings Analysis',
412
+ showlegend=True,
413
+ hovermode='x unified'
414
+ )
415
+
416
+ fig.update_xaxes(title_text='Match Number', row=1, col=1)
417
+ fig.update_xaxes(title_text='Models', row=2, col=1)
418
+
419
+ return fig
420
+
421
  def create_metric_container(label, value, full_name=None):
422
  container = st.container()
423
  with container:
 
432
  with fs.open(SAVE_PATH, 'rb') as f:
433
  st.session_state.df = pd.read_csv(f)
434
 
435
+ try:
436
+ with fs.open(ELO_JSON_PATH,'r') as f:
437
+ st.session_state.elo_json = json.load(f)
438
+ except Exception:
439
+ st.session_state.elo_json = None
440
+
441
+ try:
442
+ with fs.open(ELO_CSV_PATH,'rb') as f:
443
+ st.session_state.elo_df = pd.read_csv(f)
444
+ except Exception:
445
+ st.session_state.elo_df = None
446
+
447
  def dashboard():
448
  st.title('Model Arena Scoreboard')
449
 
450
  if "df" not in st.session_state:
451
  with fs.open(SAVE_PATH, 'rb') as f:
452
  st.session_state.df = pd.read_csv(f)
453
+ if "elo_json" not in st.session_state:
454
+ try:
455
+ with fs.open(ELO_JSON_PATH,'r') as f:
456
+ st.session_state.elo_json = json.load(f)
457
+ except Exception:
458
+ st.session_state.elo_json = None
459
+ if "elo_df" not in st.session_state:
460
+ try:
461
+ with fs.open(ELO_CSV_PATH,'rb') as f:
462
+ st.session_state.elo_df = pd.read_csv(f)
463
+ except Exception:
464
+ st.session_state.elo_df = None
465
 
466
  st.button("Refresh",on_click=on_refresh_click)
467
 
 
496
  with col1:
497
  create_metric_container("Total Matches", len(st.session_state.df))
498
 
499
+ # best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
500
+ best_model = max(st.session_state.elo_json.items(), key=lambda x: x[1])[0] if st.session_state.elo_json else max(metrics.items(), key=lambda x: x[1]['win_rate'])[0]
501
  with col2:
502
  create_metric_container(
503
  "Best Model",
 
527
  appearance_chart = create_appearance_chart(metrics)
528
  st.plotly_chart(appearance_chart, use_container_width=True)
529
 
530
+ if st.session_state.elo_json and st.session_state.elo_df:
531
+ st.header('Elo Ratings')
532
+ st.dataframe(pd.DataFrame(st.session_state.elo_json,index=[0]),use_container_width=True)
533
+ elo_progression_chart = create_elo_chart(st.session_state.elo_df)
534
+ st.plotly_chart(elo_progression_chart, use_container_width=True)
535
+
536
  st.header('Head-to-Head Analysis')
537
  matrix_chart = create_head_to_head_matrix(st.session_state.df)
538
  st.plotly_chart(matrix_chart, use_container_width=True)