updating week format starting on Monday, new staking contracts and new weekly data
285f2a6
import pandas as pd | |
from typing import List | |
from utils import TMP_DIR, INC_TOOLS, DATA_DIR | |
def get_error_data_by_market( | |
tools_df: pd.DataFrame, inc_tools: List[str] | |
) -> pd.DataFrame: | |
"""Gets the error data for the given tools and calculates the error percentage.""" | |
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] | |
error = ( | |
tools_inc.groupby( | |
["tool", "request_month_year_week", "market_creator", "error"], sort=False | |
) | |
.size() | |
.unstack() | |
.fillna(0) | |
.reset_index() | |
) | |
error["error_perc"] = (error[1] / (error[0] + error[1])) * 100 | |
error["total_requests"] = error[0] + error[1] | |
return error | |
def get_tool_winning_rate_by_market( | |
tools_df: pd.DataFrame, inc_tools: List[str] | |
) -> pd.DataFrame: | |
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage.""" | |
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)] | |
tools_non_error = tools_inc[tools_inc["error"] != 1] | |
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace( | |
{"no": "No", "yes": "Yes"} | |
) | |
tools_non_error = tools_non_error[ | |
tools_non_error["currentAnswer"].isin(["Yes", "No"]) | |
] | |
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])] | |
tools_non_error["win"] = ( | |
tools_non_error["currentAnswer"] == tools_non_error["vote"] | |
).astype(int) | |
tools_non_error.columns = tools_non_error.columns.astype(str) | |
wins = ( | |
tools_non_error.groupby( | |
["tool", "request_month_year_week", "market_creator", "win"], sort=False | |
) | |
.size() | |
.unstack() | |
.fillna(0) | |
) | |
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100 | |
wins.reset_index(inplace=True) | |
wins["total_request"] = wins[0] + wins[1] | |
wins.columns = wins.columns.astype(str) | |
# Convert request_month_year_week to string and explicitly set type for Altair | |
# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str) | |
return wins | |
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame: | |
tools["request_time"] = pd.to_datetime(tools["request_time"]) | |
tools = tools.sort_values(by="request_time", ascending=True) | |
tools["request_month_year_week"] = ( | |
pd.to_datetime(tools["request_time"]) | |
.dt.to_period("W") | |
.dt.start_time.dt.strftime("%b-%d-%Y") | |
) | |
# preparing the tools graph | |
# adding the total | |
tools_all = tools.copy(deep=True) | |
tools_all["market_creator"] = "all" | |
# merging both dataframes | |
tools = pd.concat([tools, tools_all], ignore_index=True) | |
tools = tools.sort_values(by="request_time", ascending=True) | |
return tools | |
def compute_tools_based_datasets(): | |
try: | |
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet") | |
tools_df = prepare_tools(tools_df) | |
except Exception as e: | |
print(f"Error reading old tools parquet file {e}") | |
return None | |
# error by markets | |
error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS) | |
error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False) | |
try: | |
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet") | |
tools_df = prepare_tools(tools_df) | |
except Exception as e: | |
print(f"Error reading old tools parquet file {e}") | |
return None | |
winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS) | |
winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False) | |