import streamlit as st import os import numpy as np import pandas as pd from logger import logger import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import json from utils import fs,validate_email from enums import SAVE_PATH, ELO_JSON_PATH, ELO_CSV_PATH, EMAIL_PATH def write_email(email): if fs.exists(EMAIL_PATH): with fs.open(EMAIL_PATH, 'rb') as f: existing_content = f.read().decode('utf-8') else: existing_content = '' new_content = existing_content + email + '\n' with fs.open(EMAIL_PATH, 'wb') as f: f.write(new_content.encode('utf-8')) def get_model_abbreviation(model_name): abbrev_map = { 'Ori Apex': 'Ori Apex', 'Ori Apex XT': 'Ori Apex XT', 'deepgram': 'Deepgram', 'Ori Swift': 'Ori Swift', 'Ori Prime': 'Ori Prime', 'azure' : 'Azure' } return abbrev_map.get(model_name, model_name) def calculate_metrics(df): models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure'] metrics = {} for model in models: appearances = df[f'{model}_appearance'].sum() wins = df[f'{model}_score'].sum() durations = df[df[f'{model}_appearance'] == 1][f'{model}_duration'] if appearances > 0: win_rate = (wins / appearances) * 100 avg_duration = durations.mean() duration_std = durations.std() else: win_rate = 0 avg_duration = 0 duration_std = 0 metrics[model] = { 'appearances': appearances, 'wins': wins, 'win_rate': win_rate, 'avg_response_time': avg_duration, 'response_time_std': duration_std } return metrics def create_win_rate_chart(metrics): models = list(metrics.keys()) win_rates = [metrics[model]['win_rate'] for model in models] fig = go.Figure(data=[ go.Bar( x=[get_model_abbreviation(model) for model in models], y=win_rates, text=[f'{rate:.1f}%' for rate in win_rates], textposition='auto', hovertext=models ) ]) fig.update_layout( title='Win Rate by Model', xaxis_title='Model', yaxis_title='Win Rate (%)', yaxis_range=[0, 100] ) return fig def create_appearance_chart(metrics): models = list(metrics.keys()) appearances = [metrics[model]['appearances'] for model in models] fig = px.pie( values=appearances, names=[get_model_abbreviation(model) for model in models], title='Model Appearances Distribution', # hover_data=[models] ) return fig def create_head_to_head_matrix(df): models = ['Ori Apex', 'Ori Apex XT', 'deepgram', 'Ori Swift', 'Ori Prime', 'azure'] matrix = np.zeros((len(models), len(models))) for i, model1 in enumerate(models): for j, model2 in enumerate(models): if i != j: matches = df[ (df[f'{model1}_appearance'] == 1) & (df[f'{model2}_appearance'] == 1) ] if len(matches) > 0: win_rate = (matches[f'{model1}_score'].sum() / len(matches)) * 100 matrix[i][j] = win_rate fig = go.Figure(data=go.Heatmap( z=matrix, x=[get_model_abbreviation(model) for model in models], y=[get_model_abbreviation(model) for model in models], text=[[f'{val:.1f}%' if val > 0 else '' for val in row] for row in matrix], texttemplate='%{text}', colorscale='RdYlBu', zmin=0, zmax=100 )) fig.update_layout( title='Head-to-Head Win Rates', xaxis_title='Opponent Model', yaxis_title='Model' ) return fig def create_elo_chart(df): fig = make_subplots(rows=1, cols=1, row_heights=[0.7]) for column in df.columns: fig.add_trace( go.Scatter( x=list(range(len(df))), y=df[column], name=column, mode='lines+markers' ), row=1, col=1 ) fig.update_layout( title='Model ELO Ratings Analysis', showlegend=True, hovermode='x unified' ) fig.update_xaxes(title_text='Match Number', row=1, col=1) return fig def create_metric_container(label, value, full_name=None): container = st.container() with container: st.markdown(f"**{label}**") if full_name: st.markdown(f"

{value}

", unsafe_allow_html=True) st.caption(f"Full name: {full_name}") else: st.markdown(f"

{value}

", unsafe_allow_html=True) def on_refresh_click(): st.toast("Refreshing data... please wait",icon="🔄") with fs.open(SAVE_PATH, 'rb') as f: st.session_state.df = pd.read_csv(f) try: with fs.open(ELO_JSON_PATH,'r') as f: st.session_state.elo_json = json.load(f) except Exception as e: logger.error("Error while reading elo json file %s",e) st.session_state.elo_json = None try: with fs.open(ELO_CSV_PATH,'rb') as f: st.session_state.elo_df = pd.read_csv(f) except Exception as e: logger.error("Error while reading elo csv file %s",e) st.session_state.elo_df = None def dashboard(): st.title('Model Arena Scoreboard') if "df" not in st.session_state: with fs.open(SAVE_PATH, 'rb') as f: st.session_state.df = pd.read_csv(f) if "elo_json" not in st.session_state: with fs.open(ELO_JSON_PATH,'r') as f: elo_json = json.load(f) st.session_state.elo_json = elo_json if "elo_df" not in st.session_state: with fs.open(ELO_CSV_PATH,'rb') as f: elo_df = pd.read_csv(f) st.session_state.elo_df = elo_df st.button("🔄 Refresh",on_click=on_refresh_click,key="refresh_btn") if len(st.session_state.df) != 0: metrics = calculate_metrics(st.session_state.df) MODEL_DESCRIPTIONS = { "Ori Prime": "Foundational, large, and stable.", "Ori Swift": "Lighter and faster than Ori Prime.", "Ori Apex": "The top-performing model, fast and stable.", "Ori Apex XT": "Enhanced with more training, though slightly less stable than Ori Apex.", "Deepgram" : "Deepgram Nova-2 API", "Azure" : "Azure Speech Services API" } st.header('Model Descriptions') cols = st.columns(2) for idx, (model, description) in enumerate(MODEL_DESCRIPTIONS.items()): with cols[idx % 2]: st.markdown(f"""

{model}

{description}

""", unsafe_allow_html=True) st.header('Overall Performance') col1, col2, col3= st.columns(3) with col1: create_metric_container("Total Matches", len(st.session_state.df)) # best_model = max(metrics.items(), key=lambda x: x[1]['win_rate'])[0] best_model = max(st.session_state.elo_json.items(), key=lambda x: x[1])[0] if st.session_state.elo_json else max(metrics.items(), key=lambda x: x[1]['win_rate'])[0] with col2: create_metric_container( "Best Model", get_model_abbreviation(best_model), full_name=best_model ) most_appearances = max(metrics.items(), key=lambda x: x[1]['appearances'])[0] with col3: create_metric_container( "Most Used", get_model_abbreviation(most_appearances), full_name=most_appearances ) metrics_df = pd.DataFrame.from_dict(metrics, orient='index') metrics_df['win_rate'] = metrics_df['win_rate'].round(2) metrics_df.drop(["avg_response_time","response_time_std"],axis=1,inplace=True) metrics_df.index = [get_model_abbreviation(model) for model in metrics_df.index] st.dataframe(metrics_df,use_container_width=True) st.header('Win Rates') win_rate_chart = create_win_rate_chart(metrics) st.plotly_chart(win_rate_chart, use_container_width=True) st.header('Appearance Distribution') appearance_chart = create_appearance_chart(metrics) st.plotly_chart(appearance_chart, use_container_width=True) if st.session_state.elo_json is not None and st.session_state.elo_df is not None: st.header('Elo Ratings') st.dataframe(pd.DataFrame(st.session_state.elo_json,index=[0]),use_container_width=True) elo_progression_chart = create_elo_chart(st.session_state.elo_df) st.plotly_chart(elo_progression_chart, use_container_width=True) st.header('Head-to-Head Analysis') matrix_chart = create_head_to_head_matrix(st.session_state.df) st.plotly_chart(matrix_chart, use_container_width=True) else: st.write("No Data to show") if __name__ == "__main__": if 'logged_in' not in st.session_state: st.session_state.logged_in = False if st.session_state.logged_in: dashboard() else: with st.form("contact_us_form"): st.subheader("Please enter your email to view the scoreboard") email = st.text_input("Email") submit_button = st.form_submit_button("Submit") if submit_button: if not email: st.error("Please fill in all fields") else: if not validate_email(email): st.error("Please enter a valid email address") else: st.session_state.logged_in = True st.session_state.user_email = email write_email(st.session_state.user_email) st.success("Thanks for submitting your email") dashboard()