Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from PIL import Image | |
from io import BytesIO | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from pycaret.classification import load_model,predict_model,blend_models | |
import shap | |
import streamlit.components.v1 as components | |
from sklearn.ensemble import VotingClassifier | |
def load_data(): | |
return pd.read_csv("fighters_20_03_2021.csv") | |
def st_shap(plot, height=None): | |
shap_html = f"<head>{shap.getjs()}</head><body>{plot.html()}</body>" | |
components.html(shap_html, height=height) | |
def preprocess(dataframe): | |
data = dataframe.copy() | |
data['Men_or_women'] = data.weight_class.str.lower().str.contains('women').astype(int) | |
data['R_total_time_fought(mins)']=data['R_total_time_fought(seconds)']/60 | |
data['B_total_time_fought(mins)']=data['B_total_time_fought(seconds)']/60 | |
data['R_total_fights'] = data['R_wins']+data['R_draw']+data['R_losses'] | |
data['B_total_fights'] = data['B_wins']+data['B_draw']+data['B_losses'] | |
def home_definer(a,b): | |
if a=="unknown" or b=="unknown": | |
return "dunno" | |
if a==b: | |
return "yes" | |
return "no" | |
data['R_fighter_home'] = data.apply(lambda x:home_definer(x['R_fighter_country'],x['country_location']),axis=1) | |
data['B_fighter_home'] = data.apply(lambda x:home_definer(x['B_fighter_country'],x['country_location']),axis=1) | |
data['fighter_taller_but_not_rangier'] = data.apply(lambda x:(x['R_Height_cms']>x['B_Height_cms'] and x['R_Reach_cms']<=x['B_Reach_cms']),axis=1) | |
# time | |
data['B_avg_time_fought(mins)'] = data['B_total_time_fought(mins)']/(data['B_total_fights']+1) | |
data['R_avg_time_fought(mins)'] = data['R_total_time_fought(mins)']/(data['R_total_fights']+1) | |
# over fights (win and losses) | |
data['R_ratio_win_over_fights_exp']=(data['R_wins']/(data['R_total_fights']+1))*np.exp(data['R_total_fights']/4) | |
data['B_ratio_win_over_fights_exp']=(data['B_wins']/(data['B_total_fights']+1))*np.exp(data['B_total_fights']/4) | |
data['R_ratio_win']=(data['R_wins']/(data['R_total_fights']+1)) | |
data['B_ratio_win']=(data['B_wins']/(data['B_total_fights']+1)) | |
data['R_ratio_losses']=data['R_losses']/(data['R_total_fights']+1)*np.exp(data['R_total_fights']/4) | |
data['B_ratio_losses']=data['B_losses']/(data['B_total_fights']+1)*np.exp(data['B_total_fights']/4) | |
data['Underdog'] = ((data['R_current_win_streak']>=2) & ~(data['B_current_win_streak']>=2)).astype(int) | |
#data['Underdog_lose'] = (data['R_current_lose_streak']<=data['B_current_lose_streak']).astype(int) | |
numerical_columns = list(data.select_dtypes(include=['int64','float64']).columns.values) | |
print(numerical_columns) | |
win_columns = [col[2:] for col in numerical_columns if ('win' in col.lower() or 'lose' in col.lower() )and col.startswith('B_') and 'ratio' not in col] | |
numerical_columns_fighter = [col[2:] for col in numerical_columns if col.startswith('B_')] | |
for col in set(numerical_columns_fighter)-set(win_columns)-{'age'}: | |
data[col+'_diff'] = (data['R_'+col]/(data['R_total_fights']+1))-(data['B_'+col]/(data['B_total_fights']+1)) | |
#data[col+'_ratio'] = (data['R_'+col]*data['R_total_fights'])/(data['B_'+col]*data['B_total_fights']+1) | |
numerical_columns.extend([col+'_diff'])#,col+'_ratio']) | |
for f in ['R_','B_']: | |
for col in win_columns: | |
data[f+col+'_over_fights'] = data[f+col]/(data[f+'total_fights']+1) | |
numerical_columns.append(f+col+'_over_fights') | |
data['Weight_lbs_diff2'] = data['B_Weight_lbs']-data['R_Weight_lbs'] | |
data['Weight_lbs_diff2_ratio'] = data['Weight_lbs_diff2']/data[['R_Weight_lbs','B_Weight_lbs']].max(axis=1) | |
diff = np.log(data['R_age']-17)-np.log(data['B_age']-17) #17 because at 18 years old it will be 0 | |
data['age_diff2']=diff#*(np.abs(diff)>np.abs(np.log(32/24))) | |
data['age_diff_my_ratio']=(data['age_diff2'])/data[['B_age','R_age']].max(axis=1) | |
numerical_columns.remove('R_age') | |
numerical_columns.remove('B_age') | |
return data.drop(columns=['R_age','B_age']),numerical_columns | |
st.title('UFC FIGHTERS MACHINE LEARNING PREDICTION') | |
st.subheader("Junior N.") | |
all_athletes = '' | |
athlete = 'deiveson-figueiredo' | |
fighters = [] | |
col1, col2 = st.columns(2) | |
content = requests.get(f"https://www.ufc.com/athletes").content | |
soup = BeautifulSoup(content , features="lxml") | |
#print(soup) | |
athletes = soup.find_all(class_='ath-n__name ath-lf-fl') | |
liste_athletes = [(a.find('a').text.strip(),a.find('a').get('href')) for a in athletes ] | |
liste_athletes = dict(liste_athletes) | |
athlete1 = col1.selectbox( | |
'Choose Red fighter?', | |
tuple(liste_athletes.keys())) | |
#col1.write('Fighter 1:', athlete1) | |
athlete2 = col2.selectbox( | |
'Choose Blue fighter?', | |
tuple(liste_athletes.keys())) | |
#col2.write('Fighter 2:', athlete2) | |
selected_ = [(col1,athlete1,'Red'),(col2,athlete2,'Blue')] | |
for col,athlete,color in selected_: | |
#input() | |
content = requests.get(f"https://www.ufc.com{liste_athletes[athlete]}").content | |
soup = BeautifulSoup(content,features="lxml") | |
#print(soup) | |
img = soup.find(class_='hero-profile__image') | |
#print(img) | |
img_url = img.get('src') | |
response = requests.get(img_url) | |
img = Image.open(BytesIO(response.content)) | |
#fighters.append(img) | |
name = " ".join([e.capitalize() for e in athlete.split(" ")]) | |
new_title = f"<p style=\"font-family:sans-serif; color:{color}; font-size: 30px;\">{name}</p>" | |
col.markdown(new_title,unsafe_allow_html=True) | |
col.image(img, width=None, use_column_width=None, clamp=False, channels="RGB", output_format="auto") | |
fighters_dataset = load_data() | |
fighters_dataset = fighters_dataset.set_index('fighter') | |
fighters_dataset.index = [i.lower() for i in fighters_dataset.index] | |
#rer | |
st.dataframe(fighters_dataset.loc[[athlete1.lower(),athlete2.lower()]]) | |
fighter1_stats = fighters_dataset.loc[[athlete1.lower()]] | |
fighter1_stats.columns = [ 'R_'+col for col in fighter1_stats.columns] | |
fighter2_stats = fighters_dataset.loc[[athlete2.lower()]] | |
fighter2_stats.columns = [ 'B_'+col for col in fighter2_stats.columns] | |
st.text(f'Red fighter : {athlete1}, Blue fighter :{athlete2}') | |
merged_stats = pd.concat([fighter1_stats,fighter2_stats],axis=1) | |
merged_stats['title_bout'] = False | |
merged_stats['weight_class'] = 'Heavyweight' | |
merged_stats['country_location'] = 'usa' | |
merged_stats = merged_stats.reset_index(drop=True).loc[[0]] | |
#numerical_columns = list(merged_stats.select_dtypes(include=['int64','float64']).columns.values) | |
data1,numerical_columns = preprocess(merged_stats) | |
mylgbm = load_model('mylgbm_normal') | |
#mylgbm2 = load_model('mylgbm_inverse') | |
#blender = blend_models([mylgbm2, mylgbm]) | |
#combinedlgbm = VotingClassifier(estimators=[ | |
# ('normal', mylgbm.named_steps["trained_model"]), ('inverse', mylgbm2.named_steps["trained_model"])], voting='soft') | |
test_transformed = mylgbm[:-1].transform(data1) | |
#test_transformed2 = mylgbm2[:-1].transform(data) | |
explainer = shap.TreeExplainer(mylgbm.named_steps["trained_model"]) #mylgbm.named_steps["trained_model"] not used yet because we don't want finalized model (aka trained on validation) | |
shap_values = explainer.shap_values(test_transformed) | |
# Worst predictions on validation set | |
prediction = predict_model(mylgbm,data1)[['Score','Label']]#+descr_columns+['R_total_fights','B_total_fights']] | |
#comparison = pd.concat([valid_['Winner'],prediction],axis=1) | |
st.dataframe(prediction) | |
print("expected",explainer.expected_value[1]) | |
fight_idx = 0 | |
shap_values1 = shap_values | |
#st_shap(shap.force_plot(explainer.expected_value[1], shap_values[1][fight_idx,:], test_transformed.loc[fight_idx,:],link='logit')) | |
fighter1_stats = fighters_dataset.loc[[athlete1.lower()]] | |
fighter1_stats.columns = [ 'B_'+col for col in fighter1_stats.columns] | |
fighter2_stats = fighters_dataset.loc[[athlete2.lower()]] | |
fighter2_stats.columns = [ 'R_'+col for col in fighter2_stats.columns] | |
st.text(f'Red fighter : {athlete1}, Blue fighter :{athlete2}') | |
merged_stats = pd.concat([fighter1_stats,fighter2_stats],axis=1) | |
merged_stats['title_bout'] = False | |
merged_stats['weight_class'] = 'Heavyweight' | |
merged_stats['country_location'] = 'usa' | |
merged_stats = merged_stats.reset_index(drop=True).loc[[0]] | |
data2,numerical_columns = preprocess(merged_stats) | |
test_transformed2 = mylgbm[:-1].transform(data2) | |
shap_values = explainer.shap_values(test_transformed2) | |
# Worst predictions on validation set | |
prediction = predict_model(mylgbm,data2)[['Score','Label']]#+descr_columns+['R_total_fights','B_total_fights']] | |
#comparison = pd.concat([valid_['Winner'],prediction],axis=1) | |
st.dataframe(prediction) | |
fight_idx = 0 | |
print("expected",explainer.expected_value[1]) | |
st_shap(shap.force_plot(explainer.expected_value[1], (shap_values[1][fight_idx,:]+shap_values1[1][fight_idx,:])/2, test_transformed.loc[fight_idx,:],link='logit')) | |
#shap_values.values=shap_values.values[:,:,1] | |
#shap_values.base_values=shap_values.base_values[:,1] | |
# print(type(shap_values[1])) | |
# print(type(shap_values[1][fight_idx,:])) | |
# st_shap(shap.waterfall_plot(shap_values[0])) | |
#st_shap(shap.plots._waterfall.waterfall_legacy(explainer.expected_value[1], shap_values[1][fight_idx,:],test_transformed.loc[fight_idx,:])) | |