Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

olas-prediction-live-dashboard / scripts /tools_metrics.py

rosacastillo

removed dependency with tools.parquet and new mech calls computation timestamps based

278fab8 about 1 month ago

raw

history blame

3.55 kB

	import pandas as pd
	from typing import List
	from utils import TMP_DIR, INC_TOOLS, DATA_DIR


	def get_error_data_by_market(
	tools_df: pd.DataFrame, inc_tools: List[str]
	) -> pd.DataFrame:
	"""Gets the error data for the given tools and calculates the error percentage."""
	tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
	error = (
	tools_inc.groupby(
	["tool", "request_month_year_week", "market_creator", "error"], sort=False
	)
	.size()
	.unstack()
	.fillna(0)
	.reset_index()
	)
	error["error_perc"] = (error[1] / (error[0] + error[1])) * 100
	error["total_requests"] = error[0] + error[1]
	return error


	def get_tool_winning_rate_by_market(
	tools_df: pd.DataFrame, inc_tools: List[str]
	) -> pd.DataFrame:
	"""Gets the tool winning rate data for the given tools by market and calculates the winning percentage."""
	tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
	tools_non_error = tools_inc[tools_inc["error"] != 1]
	tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
	{"no": "No", "yes": "Yes"}
	)
	tools_non_error = tools_non_error[
	tools_non_error["currentAnswer"].isin(["Yes", "No"])
	]
	tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
	tools_non_error["win"] = (
	tools_non_error["currentAnswer"] == tools_non_error["vote"]
	).astype(int)
	tools_non_error.columns = tools_non_error.columns.astype(str)
	wins = (
	tools_non_error.groupby(
	["tool", "request_month_year_week", "market_creator", "win"], sort=False
	)
	.size()
	.unstack()
	.fillna(0)
	)
	wins["win_perc"] = (wins[1] / (wins[0] + wins[1])) * 100
	wins.reset_index(inplace=True)
	wins["total_request"] = wins[0] + wins[1]
	wins.columns = wins.columns.astype(str)
	# Convert request_month_year_week to string and explicitly set type for Altair
	# wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
	return wins


	def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
	tools["request_time"] = pd.to_datetime(tools["request_time"])
	tools = tools.sort_values(by="request_time", ascending=True)

	tools["request_month_year_week"] = (
	pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
	)
	# preparing the tools graph
	# adding the total
	tools_all = tools.copy(deep=True)
	tools_all["market_creator"] = "all"
	# merging both dataframes
	tools = pd.concat([tools, tools_all], ignore_index=True)
	tools = tools.sort_values(by="request_time", ascending=True)
	return tools


	def compute_tools_based_datasets():
	try:
	tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
	tools_df = prepare_tools(tools_df)
	except Exception as e:
	print(f"Error reading old tools parquet file {e}")
	return None
	# error by markets
	error_by_markets = get_error_data_by_market(tools_df=tools_df, inc_tools=INC_TOOLS)
	error_by_markets.to_parquet(DATA_DIR / "error_by_markets.parquet", index=False)
	try:
	tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
	tools_df = prepare_tools(tools_df)
	except Exception as e:
	print(f"Error reading old tools parquet file {e}")
	return None
	winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
	winning_df.to_parquet(DATA_DIR / "winning_df.parquet", index=False)