rosacastillo's picture
updating week format starting on Monday, new staking contracts and new weekly data
285f2a6
raw
history blame
3.59 kB
import pandas as pd
from typing import List
from utils import TMP_DIR, INC_TOOLS, DATA_DIR
def get_error_data_by_market(
tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
"""Gets the error data for the given tools and calculates the error percentage."""
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
error = (
tools_inc.groupby(
["tool", "request_month_year_week", "market_creator", "error"], sort=False
)
.size()
.unstack()
.fillna(0)
.reset_index()
)
error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
error["total_requests"] = error[0] + error[1]
return error
def get_tool_winning_rate_by_market(
tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
tools_non_error = tools_inc[tools_inc["error"] != 1]
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
{"no": "No", "yes": "Yes"}
)
tools_non_error = tools_non_error[
tools_non_error["currentAnswer"].isin(["Yes", "No"])
]
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
tools_non_error["win"] = (
tools_non_error["currentAnswer"] == tools_non_error["vote"]
).astype(int)
tools_non_error.columns = tools_non_error.columns.astype(str)
wins = (
tools_non_error.groupby(
["tool", "request_month_year_week", "market_creator", "win"], sort=False
)
.size()
.unstack()
.fillna(0)
)
wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
wins.reset_index(inplace=True)
wins["total_request"] = wins[0] + wins[1]
wins.columns = wins.columns.astype(str)
# Convert request_month_year_week to string and explicitly set type for Altair
# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
return wins
def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
tools["request_time"] = pd.to_datetime(tools["request_time"])
tools = tools.sort_values(by="request_time", ascending=True)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"])
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
# preparing the tools graph
# adding the total
tools_all = tools.copy(deep=True)
tools_all["market_creator"] = "all"
# merging both dataframes
tools = pd.concat([tools, tools_all], ignore_index=True)
tools = tools.sort_values(by="request_time", ascending=True)
return tools
def compute_tools_based_datasets():
try:
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
tools_df = prepare_tools(tools_df)
except Exception as e:
print(f"Error reading old tools parquet file {e}")
return None
# error by markets
error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
try:
tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
tools_df = prepare_tools(tools_df)
except Exception as e:
print(f"Error reading old tools parquet file {e}")
return None
winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)