|
import pandas as pd |
|
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime |
|
from tqdm import tqdm |
|
|
|
from typing import Dict, Any |
|
from collections import defaultdict |
|
from tools import IRRELEVANT_TOOLS |
|
import re |
|
|
|
|
|
def update_roi(row: pd.DataFrame) -> float: |
|
new_value = row.net_earnings / ( |
|
row.collateral_amount |
|
+ row.trade_fee_amount |
|
+ row.num_mech_calls * DEFAULT_MECH_FEE |
|
) |
|
return new_value |
|
|
|
|
|
def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]: |
|
"""Outputs a table with Mech statistics""" |
|
|
|
mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int)) |
|
|
|
for mech_request in mech_requests.values(): |
|
if ( |
|
"ipfs_contents" not in mech_request |
|
or "tool" not in mech_request["ipfs_contents"] |
|
or "prompt" not in mech_request["ipfs_contents"] |
|
): |
|
continue |
|
|
|
if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS: |
|
continue |
|
|
|
prompt = mech_request["ipfs_contents"]["prompt"] |
|
prompt = prompt.replace("\n", " ") |
|
prompt = prompt.strip() |
|
prompt = re.sub(r"\s+", " ", prompt) |
|
prompt_match = re.search(r"\"(.*)\"", prompt) |
|
if prompt_match: |
|
question = prompt_match.group(1) |
|
else: |
|
question = prompt |
|
|
|
mech_statistics[question]["count"] += 1 |
|
mech_statistics[question]["fees"] += mech_request["fee"] |
|
|
|
return mech_statistics |
|
|
|
|
|
def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame: |
|
"""filter trades coming from non-Olas traders that are placing no mech calls""" |
|
no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & ( |
|
trades_df["num_mech_calls"] == 0 |
|
) |
|
no_mech_calls_df = trades_df.loc[no_mech_calls_mask] |
|
trades_df = trades_df.loc[~no_mech_calls_mask] |
|
return no_mech_calls_df, trades_df |
|
|
|
|
|
def update_trade_nr_mech_calls(non_agents: bool = False): |
|
try: |
|
all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet") |
|
tools = pd.read_parquet(DATA_DIR / "tools.parquet") |
|
except Exception as e: |
|
print(f"Error reading the profitability and tools parquet files") |
|
|
|
traders = list(all_trades_df.trader_address.unique()) |
|
if non_agents: |
|
traders = list( |
|
all_trades_df.loc[ |
|
all_trades_df["staking"] == "non_agent" |
|
].trader_address.unique() |
|
) |
|
|
|
print("before updating") |
|
print( |
|
all_trades_df.loc[ |
|
all_trades_df["staking"] == "non_agent" |
|
].num_mech_calls.describe() |
|
) |
|
for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"): |
|
tools_usage = tools[tools["trader_address"] == trader] |
|
if len(tools_usage) == 0: |
|
tqdm.write(f"trader with no tools usage found {trader}") |
|
all_trades_df.loc[ |
|
all_trades_df["trader_address"] == trader, "nr_mech_calls" |
|
] = 0 |
|
|
|
all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1) |
|
print("after updating") |
|
print( |
|
all_trades_df.loc[ |
|
all_trades_df["staking"] == "non_agent" |
|
].num_mech_calls.describe() |
|
) |
|
|
|
|
|
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False) |
|
|
|
|
|
|
|
|
|
|
|
def get_daily_mech_calls_estimation( |
|
daily_trades: pd.DataFrame, daily_tools: pd.DataFrame |
|
) -> list: |
|
|
|
daily_markets = daily_trades.title.unique() |
|
trader = daily_trades.iloc[0].trader_address |
|
day = daily_trades.iloc[0].creation_date |
|
estimations = [] |
|
for market in daily_markets: |
|
estimation_dict = {} |
|
estimation_dict["trader_address"] = trader |
|
estimation_dict["trading_day"] = day |
|
|
|
market_requests = daily_tools.loc[daily_tools["title"] == market] |
|
|
|
market_trades = daily_trades[daily_trades["title"] == market] |
|
mech_calls_estimation = 0 |
|
total_trades = len(market_trades) |
|
total_requests = 0 |
|
if len(market_requests) > 0: |
|
total_requests = len(market_requests) |
|
mech_calls_estimation = total_requests / total_trades |
|
estimation_dict["total_trades"] = total_trades |
|
estimation_dict["total_mech_requests"] = total_requests |
|
estimation_dict["market"] = market |
|
estimation_dict["mech_calls_per_trade"] = mech_calls_estimation |
|
estimations.append(estimation_dict) |
|
return estimations |
|
|
|
|
|
def compute_daily_mech_calls( |
|
fpmmTrades: pd.DataFrame, tools: pd.DataFrame |
|
) -> pd.DataFrame: |
|
"""Function to compute the daily mech calls at the trader and market level""" |
|
nr_traders = len(fpmmTrades["trader_address"].unique()) |
|
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) |
|
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date |
|
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True) |
|
tools["request_time"] = pd.to_datetime(tools["request_time"]) |
|
tools["request_date"] = tools["request_time"].dt.date |
|
tools = tools.sort_values(by="request_time", ascending=True) |
|
all_mech_calls = [] |
|
for trader in tqdm( |
|
fpmmTrades["trader_address"].unique(), |
|
total=nr_traders, |
|
desc="creating daily mech calls computation", |
|
): |
|
|
|
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] |
|
all_tools = tools[tools["trader_address"] == trader] |
|
trading_days = all_trades.creation_date.unique() |
|
for trading_day in trading_days: |
|
daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day] |
|
daily_tools = all_tools.loc[all_tools["request_date"] == trading_day] |
|
trader_entry = {} |
|
trader_entry["trader_address"] = trader |
|
trader_entry["total_trades"] = len(daily_trades) |
|
trader_entry["trading_day"] = trading_day |
|
trader_entry["total_mech_calls"] = len(daily_tools) |
|
all_mech_calls.append(trader_entry) |
|
return pd.DataFrame.from_dict(all_mech_calls, orient="columns") |
|
|
|
|
|
def compute_mech_call_estimations( |
|
fpmmTrades: pd.DataFrame, tools: pd.DataFrame |
|
) -> pd.DataFrame: |
|
"""Function to compute the estimated mech calls needed per trade at the trader and market level""" |
|
nr_traders = len(fpmmTrades["trader_address"].unique()) |
|
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) |
|
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date |
|
tools["request_time"] = pd.to_datetime(tools["request_time"]) |
|
tools["request_date"] = tools["request_time"].dt.date |
|
all_estimations = [] |
|
for trader in tqdm( |
|
fpmmTrades["trader_address"].unique(), |
|
total=nr_traders, |
|
desc="creating mech calls estimation dataframe", |
|
): |
|
|
|
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] |
|
all_tools = tools[tools["trader_address"] == trader] |
|
trading_days = all_trades.creation_date.unique() |
|
for trading_day in trading_days: |
|
daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day] |
|
daily_tools = all_tools.loc[all_tools["request_date"] == trading_day] |
|
daily_estimations = get_daily_mech_calls_estimation( |
|
daily_trades=daily_trades, daily_tools=daily_tools |
|
) |
|
all_estimations.extend(daily_estimations) |
|
return pd.DataFrame.from_dict(all_estimations, orient="columns") |
|
|
|
|
|
def compute_timestamp_mech_calls( |
|
all_trades: pd.DataFrame, all_tools: pd.DataFrame |
|
) -> list: |
|
"""Function to compute the mech calls based on timestamps but without repeating mech calls""" |
|
mech_calls_contents = [] |
|
request_timestamps_used = {} |
|
|
|
all_markets = all_trades.title.unique() |
|
for market in all_markets: |
|
request_timestamps_used[market] = [] |
|
|
|
for i, trade in all_trades.iterrows(): |
|
trader = trade["trader_address"] |
|
trade_id = trade["id"] |
|
market = trade["title"] |
|
trade_ts = trade["creation_timestamp"] |
|
market_requests = all_tools.loc[ |
|
(all_tools["trader_address"] == trader) & (all_tools["title"] == market) |
|
] |
|
|
|
total_mech_calls = 0 |
|
for i, mech_request in market_requests.iterrows(): |
|
|
|
request_ts = mech_request["request_time"] |
|
if request_ts < trade_ts: |
|
|
|
used_timestamps = request_timestamps_used[market] |
|
if request_ts not in used_timestamps: |
|
request_timestamps_used[market].append(request_ts) |
|
total_mech_calls += 1 |
|
|
|
mech_call_entry = {} |
|
mech_call_entry["trader_address"] = trader |
|
mech_call_entry["market"] = market |
|
mech_call_entry["trade_id"] = trade_id |
|
mech_call_entry["total_mech_calls"] = total_mech_calls |
|
mech_calls_contents.append(mech_call_entry) |
|
return mech_calls_contents |
|
|
|
|
|
def compute_mech_calls_based_on_timestamps( |
|
fpmmTrades: pd.DataFrame, tools: pd.DataFrame |
|
) -> pd.DataFrame: |
|
"""Function to compute the mech calls needed per trade at the trader and market level using timestamps""" |
|
nr_traders = len(fpmmTrades["trader_address"].unique()) |
|
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"]) |
|
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date |
|
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True) |
|
tools["request_time"] = pd.to_datetime(tools["request_time"]) |
|
tools["request_date"] = tools["request_time"].dt.date |
|
tools = tools.sort_values(by="request_time", ascending=True) |
|
all_mech_calls = [] |
|
for trader in tqdm( |
|
fpmmTrades["trader_address"].unique(), |
|
total=nr_traders, |
|
desc="creating mech calls count based on timestamps", |
|
): |
|
|
|
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader] |
|
all_tools = tools[tools["trader_address"] == trader] |
|
trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools) |
|
all_mech_calls.extend(trader_mech_calls) |
|
return pd.DataFrame.from_dict(all_mech_calls, orient="columns") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
tools = pd.read_parquet(TMP_DIR / "tools.parquet") |
|
fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet") |
|
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply( |
|
lambda x: transform_to_datetime(x) |
|
) |
|
result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools) |
|
result.to_parquet(TMP_DIR / "result_df.parquet", index=False) |
|
|