rosacastillo's picture
weekly data and some fixes
b60f995
raw
history blame
11.4 kB
import pandas as pd
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
from tqdm import tqdm
from typing import Dict, Any
from collections import defaultdict
from tools import IRRELEVANT_TOOLS
import re
def update_roi(row: pd.DataFrame) -> float:
new_value = row.net_earnings / (
row.collateral_amount
+ row.trade_fee_amount
+ row.num_mech_calls * DEFAULT_MECH_FEE
)
return new_value
def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
"""Outputs a table with Mech statistics"""
mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
for mech_request in mech_requests.values():
if (
"ipfs_contents" not in mech_request
or "tool" not in mech_request["ipfs_contents"]
or "prompt" not in mech_request["ipfs_contents"]
):
continue
if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS:
continue
prompt = mech_request["ipfs_contents"]["prompt"]
prompt = prompt.replace("\n", " ")
prompt = prompt.strip()
prompt = re.sub(r"\s+", " ", prompt)
prompt_match = re.search(r"\"(.*)\"", prompt)
if prompt_match:
question = prompt_match.group(1)
else:
question = prompt
mech_statistics[question]["count"] += 1
mech_statistics[question]["fees"] += mech_request["fee"]
return mech_statistics
def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame:
"""filter trades coming from non-Olas traders that are placing no mech calls"""
no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & (
trades_df["num_mech_calls"] == 0
)
no_mech_calls_df = trades_df.loc[no_mech_calls_mask]
trades_df = trades_df.loc[~no_mech_calls_mask]
return no_mech_calls_df, trades_df
def update_trade_nr_mech_calls(non_agents: bool = False):
try:
all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
tools = pd.read_parquet(DATA_DIR / "tools.parquet")
except Exception as e:
print(f"Error reading the profitability and tools parquet files")
traders = list(all_trades_df.trader_address.unique())
if non_agents:
traders = list(
all_trades_df.loc[
all_trades_df["staking"] == "non_agent"
].trader_address.unique()
)
print("before updating")
print(
all_trades_df.loc[
all_trades_df["staking"] == "non_agent"
].num_mech_calls.describe()
)
for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"):
tools_usage = tools[tools["trader_address"] == trader]
if len(tools_usage) == 0:
tqdm.write(f"trader with no tools usage found {trader}")
all_trades_df.loc[
all_trades_df["trader_address"] == trader, "nr_mech_calls"
] = 0
# update roi
all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1)
print("after updating")
print(
all_trades_df.loc[
all_trades_df["staking"] == "non_agent"
].num_mech_calls.describe()
)
# saving
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
# print("Summarising trades...")
# summary_df = summary_analyse(all_trades_df)
# summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
def get_daily_mech_calls_estimation(
daily_trades: pd.DataFrame, daily_tools: pd.DataFrame
) -> list:
# for each market
daily_markets = daily_trades.title.unique()
trader = daily_trades.iloc[0].trader_address
day = daily_trades.iloc[0].creation_date
estimations = []
for market in daily_markets:
estimation_dict = {}
estimation_dict["trader_address"] = trader
estimation_dict["trading_day"] = day
# tools usage of this market
market_requests = daily_tools.loc[daily_tools["title"] == market]
# trades done on this market
market_trades = daily_trades[daily_trades["title"] == market]
mech_calls_estimation = 0
total_trades = len(market_trades)
total_requests = 0
if len(market_requests) > 0:
total_requests = len(market_requests)
mech_calls_estimation = total_requests / total_trades
estimation_dict["total_trades"] = total_trades
estimation_dict["total_mech_requests"] = total_requests
estimation_dict["market"] = market
estimation_dict["mech_calls_per_trade"] = mech_calls_estimation
estimations.append(estimation_dict)
return estimations
def compute_daily_mech_calls(
fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
"""Function to compute the daily mech calls at the trader and market level"""
nr_traders = len(fpmmTrades["trader_address"].unique())
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
tools["request_time"] = pd.to_datetime(tools["request_time"])
tools["request_date"] = tools["request_time"].dt.date
tools = tools.sort_values(by="request_time", ascending=True)
all_mech_calls = []
for trader in tqdm(
fpmmTrades["trader_address"].unique(),
total=nr_traders,
desc="creating daily mech calls computation",
):
# compute the mech calls estimations for each trader
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
all_tools = tools[tools["trader_address"] == trader]
trading_days = all_trades.creation_date.unique()
for trading_day in trading_days:
daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
trader_entry = {}
trader_entry["trader_address"] = trader
trader_entry["total_trades"] = len(daily_trades)
trader_entry["trading_day"] = trading_day
trader_entry["total_mech_calls"] = len(daily_tools)
all_mech_calls.append(trader_entry)
return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
def compute_mech_call_estimations(
fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
"""Function to compute the estimated mech calls needed per trade at the trader and market level"""
nr_traders = len(fpmmTrades["trader_address"].unique())
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
tools["request_time"] = pd.to_datetime(tools["request_time"])
tools["request_date"] = tools["request_time"].dt.date
all_estimations = []
for trader in tqdm(
fpmmTrades["trader_address"].unique(),
total=nr_traders,
desc="creating mech calls estimation dataframe",
):
# compute the mech calls estimations for each trader
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
all_tools = tools[tools["trader_address"] == trader]
trading_days = all_trades.creation_date.unique()
for trading_day in trading_days:
daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
daily_estimations = get_daily_mech_calls_estimation(
daily_trades=daily_trades, daily_tools=daily_tools
)
all_estimations.extend(daily_estimations)
return pd.DataFrame.from_dict(all_estimations, orient="columns")
def compute_timestamp_mech_calls(
all_trades: pd.DataFrame, all_tools: pd.DataFrame
) -> list:
"""Function to compute the mech calls based on timestamps but without repeating mech calls"""
mech_calls_contents = []
request_timestamps_used = {}
# intialize the dict with all markets
all_markets = all_trades.title.unique()
for market in all_markets:
request_timestamps_used[market] = []
for i, trade in all_trades.iterrows():
trader = trade["trader_address"]
trade_id = trade["id"]
market = trade["title"]
trade_ts = trade["creation_timestamp"]
market_requests = all_tools.loc[
(all_tools["trader_address"] == trader) & (all_tools["title"] == market)
]
# traverse market requests
total_mech_calls = 0
for i, mech_request in market_requests.iterrows():
# check timestamp (before the trade)
request_ts = mech_request["request_time"]
if request_ts < trade_ts:
# check the timestamp has not been used in a previous trade
used_timestamps = request_timestamps_used[market]
if request_ts not in used_timestamps:
request_timestamps_used[market].append(request_ts)
total_mech_calls += 1
# create enty for the dataframe
mech_call_entry = {}
mech_call_entry["trader_address"] = trader
mech_call_entry["market"] = market
mech_call_entry["trade_id"] = trade_id
mech_call_entry["total_mech_calls"] = total_mech_calls
mech_calls_contents.append(mech_call_entry)
return mech_calls_contents
def compute_mech_calls_based_on_timestamps(
fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
"""Function to compute the mech calls needed per trade at the trader and market level using timestamps"""
nr_traders = len(fpmmTrades["trader_address"].unique())
fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
tools["request_time"] = pd.to_datetime(tools["request_time"])
tools["request_date"] = tools["request_time"].dt.date
tools = tools.sort_values(by="request_time", ascending=True)
all_mech_calls = []
for trader in tqdm(
fpmmTrades["trader_address"].unique(),
total=nr_traders,
desc="creating mech calls count based on timestamps",
):
# compute the mech calls for each trader
all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
all_tools = tools[tools["trader_address"] == trader]
trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
all_mech_calls.extend(trader_mech_calls)
return pd.DataFrame.from_dict(all_mech_calls, orient="columns")
if __name__ == "__main__":
# update_trade_nr_mech_calls(non_agents=True)
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
lambda x: transform_to_datetime(x)
)
result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
result.to_parquet(TMP_DIR / "result_df.parquet", index=False)