import pandas as pd from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime from tqdm import tqdm from typing import Dict, Any from collections import defaultdict from tools import IRRELEVANT_TOOLS import re def update_roi(row: pd.DataFrame) -> float: new_value = row.net_earnings / ( row.collateral_amount + row.trade_fee_amount + row.num_mech_calls * DEFAULT_MECH_FEE ) return new_value def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]: """Outputs a table with Mech statistics""" mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int)) for mech_request in mech_requests.values(): if ( "ipfs_contents" not in mech_request or "tool" not in mech_request["ipfs_contents"] or "prompt" not in mech_request["ipfs_contents"] ): continue if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS: continue prompt = mech_request["ipfs_contents"]["prompt"] prompt = prompt.replace("\n", " ") prompt = prompt.strip() prompt = re.sub(r"\s+", " ", prompt) prompt_match = re.search(r"\"(.*)\"", prompt) if prompt_match: question = prompt_match.group(1) else: question = prompt mech_statistics[question]["count"] += 1 mech_statistics[question]["fees"] += mech_request["fee"] return mech_statistics def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame: """filter trades coming from non-Olas traders that are placing no mech calls""" no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & ( trades_df["num_mech_calls"] == 0 ) no_mech_calls_df = trades_df.loc[no_mech_calls_mask] trades_df = trades_df.loc[~no_mech_calls_mask] return no_mech_calls_df, trades_df def update_trade_nr_mech_calls(non_agents: bool = False): try: all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet") tools = pd.read_parquet(DATA_DIR / "tools.parquet") except Exception as e: print(f"Error reading the profitability and tools parquet files") traders = list(all_trades_df.trader_address.unique()) if non_agents: traders = list( all_trades_df.loc[ all_trades_df["staking"] == "non_agent" ].trader_address.unique() ) print("before updating") print( all_trades_df.loc[ all_trades_df["staking"] == "non_agent" ].num_mech_calls.describe() ) for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"): tools_usage = tools[tools["trader_address"] == trader] if len(tools_usage) == 0: tqdm.write(f"trader with no tools usage found {trader}") all_trades_df.loc[ all_trades_df["trader_address"] == trader, "nr_mech_calls" ] = 0 # update roi all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1) print("after updating") print( all_trades_df.loc[ all_trades_df["staking"] == "non_agent" ].num_mech_calls.describe() ) # saving all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False) # print("Summarising trades...") # summary_df = summary_analyse(all_trades_df) # summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False) def get_daily_mech_calls_estimation( daily_trades: pd.DataFrame, daily_tools: pd.DataFrame ) -> list: # for each market daily_markets = daily_trades.title.unique() trader = daily_trades.iloc[0].trader_address day = daily_trades.iloc[0].creation_date estimations = [] for market in daily_markets: estimation_dict = {} estimation_dict["trader_address"] = trader estimation_dict["trading_day"] = day # tools usage of this market market_requests = daily_tools.loc[daily_tools["title"] == market] # trades done on this market market_trades = daily_trades[daily_trades["title"] == market] mech_calls_estimation = 0 total_trades = len(market_trades) total_requests = 0 if len(market_requests) > 0: total_requests = len(market_requests) mech_calls_estimation = total_requests / total_trades estimation_dict["total_trades"] = total_trades estimation_dict["total_mech_requests"] = total_requests estimation_dict["market"] = market estimation_dict["mech_calls_per_trade"] = mech_calls_estimation estimations.append(estimation_dict) return estimations def compute_daily_mech_calls( fpmmTrades: pd.DataFrame, tools: pd.DataFrame ) -> pd.DataFrame: """Function to compute the daily mech calls at the trader and market level""" nr_traders = len(fpmmTrades["trader_address"].unique()) fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True) tools["request_time"] = pd.to_datetime(tools["request_time"]) tools["request_date"] = tools["request_time"].dt.date tools = tools.sort_values(by="request_time", ascending=True) all_mech_calls = [] for trader in tqdm( fpmmTrades["trader_address"].unique(), total=nr_traders, desc="creating daily mech calls computation", ): # compute the mech calls estimations for each trader all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] all_tools = tools[tools["trader_address"] == trader] trading_days = all_trades.creation_date.unique() for trading_day in trading_days: daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day] daily_tools = all_tools.loc[all_tools["request_date"] == trading_day] trader_entry = {} trader_entry["trader_address"] = trader trader_entry["total_trades"] = len(daily_trades) trader_entry["trading_day"] = trading_day trader_entry["total_mech_calls"] = len(daily_tools) all_mech_calls.append(trader_entry) return pd.DataFrame.from_dict(all_mech_calls, orient="columns") def compute_mech_call_estimations( fpmmTrades: pd.DataFrame, tools: pd.DataFrame ) -> pd.DataFrame: """Function to compute the estimated mech calls needed per trade at the trader and market level""" nr_traders = len(fpmmTrades["trader_address"].unique()) fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date tools["request_time"] = pd.to_datetime(tools["request_time"]) tools["request_date"] = tools["request_time"].dt.date all_estimations = [] for trader in tqdm( fpmmTrades["trader_address"].unique(), total=nr_traders, desc="creating mech calls estimation dataframe", ): # compute the mech calls estimations for each trader all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] all_tools = tools[tools["trader_address"] == trader] trading_days = all_trades.creation_date.unique() for trading_day in trading_days: daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day] daily_tools = all_tools.loc[all_tools["request_date"] == trading_day] daily_estimations = get_daily_mech_calls_estimation( daily_trades=daily_trades, daily_tools=daily_tools ) all_estimations.extend(daily_estimations) return pd.DataFrame.from_dict(all_estimations, orient="columns") def compute_timestamp_mech_calls( all_trades: pd.DataFrame, all_tools: pd.DataFrame ) -> list: """Function to compute the mech calls based on timestamps but without repeating mech calls""" mech_calls_contents = [] request_timestamps_used = {} # intialize the dict with all markets all_markets = all_trades.title.unique() for market in all_markets: request_timestamps_used[market] = [] for i, trade in all_trades.iterrows(): trader = trade["trader_address"] trade_id = trade["id"] market = trade["title"] trade_ts = trade["creation_timestamp"] market_requests = all_tools.loc[ (all_tools["trader_address"] == trader) & (all_tools["title"] == market) ] # traverse market requests total_mech_calls = 0 for i, mech_request in market_requests.iterrows(): # check timestamp (before the trade) request_ts = mech_request["request_time"] if request_ts < trade_ts: # check the timestamp has not been used in a previous trade used_timestamps = request_timestamps_used[market] if request_ts not in used_timestamps: request_timestamps_used[market].append(request_ts) total_mech_calls += 1 # create enty for the dataframe mech_call_entry = {} mech_call_entry["trader_address"] = trader mech_call_entry["market"] = market mech_call_entry["trade_id"] = trade_id mech_call_entry["total_mech_calls"] = total_mech_calls mech_calls_contents.append(mech_call_entry) return mech_calls_contents def compute_mech_calls_based_on_timestamps( fpmmTrades: pd.DataFrame, tools: pd.DataFrame ) -> pd.DataFrame: """Function to compute the mech calls needed per trade at the trader and market level using timestamps""" nr_traders = len(fpmmTrades["trader_address"].unique()) fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True) tools["request_time"] = pd.to_datetime(tools["request_time"]) tools["request_date"] = tools["request_time"].dt.date tools = tools.sort_values(by="request_time", ascending=True) all_mech_calls = [] for trader in tqdm( fpmmTrades["trader_address"].unique(), total=nr_traders, desc="creating mech calls count based on timestamps", ): # compute the mech calls for each trader all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] all_tools = tools[tools["trader_address"] == trader] trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools) all_mech_calls.extend(trader_mech_calls) return pd.DataFrame.from_dict(all_mech_calls, orient="columns") if __name__ == "__main__": # update_trade_nr_mech_calls(non_agents=True) tools = pd.read_parquet(TMP_DIR / "tools.parquet") fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet") fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply( lambda x: transform_to_datetime(x) ) result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools) result.to_parquet(TMP_DIR / "result_df.parquet", index=False)