File size: 11,393 Bytes
e51ae04
b60f995
e51ae04
b60f995
f9ef62b
 
 
 
 
 
e51ae04
 
 
 
 
 
 
 
 
f9ef62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7c2ff7
 
 
 
 
 
 
 
 
 
e51ae04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7c2ff7
 
 
e51ae04
 
f9ef62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
960332d
f9ef62b
 
 
 
 
 
 
960332d
f9ef62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278fab8
f9ef62b
278fab8
f9ef62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278fab8
f9ef62b
 
 
 
 
 
 
278fab8
f9ef62b
278fab8
f9ef62b
 
 
 
 
 
 
e51ae04
f7c2ff7
278fab8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
import pandas as pd
from utils import DATA_DIR, DEFAULT_MECH_FEE, TMP_DIR, transform_to_datetime
from tqdm import tqdm

from typing import Dict, Any
from collections import defaultdict
from tools import IRRELEVANT_TOOLS
import re


def update_roi(row: pd.DataFrame) -> float:
    new_value = row.net_earnings / (
        row.collateral_amount
        + row.trade_fee_amount
        + row.num_mech_calls * DEFAULT_MECH_FEE
    )
    return new_value


def get_mech_statistics(mech_requests: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
    """Outputs a table with Mech statistics"""

    mech_statistics: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))

    for mech_request in mech_requests.values():
        if (
            "ipfs_contents" not in mech_request
            or "tool" not in mech_request["ipfs_contents"]
            or "prompt" not in mech_request["ipfs_contents"]
        ):
            continue

        if mech_request["ipfs_contents"]["tool"] in IRRELEVANT_TOOLS:
            continue

        prompt = mech_request["ipfs_contents"]["prompt"]
        prompt = prompt.replace("\n", " ")
        prompt = prompt.strip()
        prompt = re.sub(r"\s+", " ", prompt)
        prompt_match = re.search(r"\"(.*)\"", prompt)
        if prompt_match:
            question = prompt_match.group(1)
        else:
            question = prompt

        mech_statistics[question]["count"] += 1
        mech_statistics[question]["fees"] += mech_request["fee"]

    return mech_statistics


def create_unknown_traders_df(trades_df: pd.DataFrame) -> pd.DataFrame:
    """filter trades coming from non-Olas traders that are placing no mech calls"""
    no_mech_calls_mask = (trades_df["staking"] == "non_Olas") & (
        trades_df["num_mech_calls"] == 0
    )
    no_mech_calls_df = trades_df.loc[no_mech_calls_mask]
    trades_df = trades_df.loc[~no_mech_calls_mask]
    return no_mech_calls_df, trades_df


def update_trade_nr_mech_calls(non_agents: bool = False):
    try:
        all_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
        tools = pd.read_parquet(DATA_DIR / "tools.parquet")
    except Exception as e:
        print(f"Error reading the profitability and tools parquet files")

    traders = list(all_trades_df.trader_address.unique())
    if non_agents:
        traders = list(
            all_trades_df.loc[
                all_trades_df["staking"] == "non_agent"
            ].trader_address.unique()
        )

    print("before updating")
    print(
        all_trades_df.loc[
            all_trades_df["staking"] == "non_agent"
        ].num_mech_calls.describe()
    )
    for trader in tqdm(traders, desc=f"Updating Traders mech calls", unit="traders"):
        tools_usage = tools[tools["trader_address"] == trader]
        if len(tools_usage) == 0:
            tqdm.write(f"trader with no tools usage found {trader}")
            all_trades_df.loc[
                all_trades_df["trader_address"] == trader, "nr_mech_calls"
            ] = 0
    # update roi
    all_trades_df["roi"] = all_trades_df.apply(lambda x: update_roi(x), axis=1)
    print("after updating")
    print(
        all_trades_df.loc[
            all_trades_df["staking"] == "non_agent"
        ].num_mech_calls.describe()
    )

    # saving
    all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
    # print("Summarising trades...")
    # summary_df = summary_analyse(all_trades_df)
    # summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)


def get_daily_mech_calls_estimation(
    daily_trades: pd.DataFrame, daily_tools: pd.DataFrame
) -> list:
    # for each market
    daily_markets = daily_trades.title.unique()
    trader = daily_trades.iloc[0].trader_address
    day = daily_trades.iloc[0].creation_date
    estimations = []
    for market in daily_markets:
        estimation_dict = {}
        estimation_dict["trader_address"] = trader
        estimation_dict["trading_day"] = day
        # tools usage of this market
        market_requests = daily_tools.loc[daily_tools["title"] == market]
        # trades done on this market
        market_trades = daily_trades[daily_trades["title"] == market]
        mech_calls_estimation = 0
        total_trades = len(market_trades)
        total_requests = 0
        if len(market_requests) > 0:
            total_requests = len(market_requests)
            mech_calls_estimation = total_requests / total_trades
        estimation_dict["total_trades"] = total_trades
        estimation_dict["total_mech_requests"] = total_requests
        estimation_dict["market"] = market
        estimation_dict["mech_calls_per_trade"] = mech_calls_estimation
        estimations.append(estimation_dict)
    return estimations


def compute_daily_mech_calls(
    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
    """Function to compute the daily mech calls at the trader and market level"""
    nr_traders = len(fpmmTrades["trader_address"].unique())
    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
    fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
    tools["request_time"] = pd.to_datetime(tools["request_time"])
    tools["request_date"] = tools["request_time"].dt.date
    tools = tools.sort_values(by="request_time", ascending=True)
    all_mech_calls = []
    for trader in tqdm(
        fpmmTrades["trader_address"].unique(),
        total=nr_traders,
        desc="creating daily mech calls computation",
    ):
        # compute the mech calls estimations for each trader
        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
        all_tools = tools[tools["trader_address"] == trader]
        trading_days = all_trades.creation_date.unique()
        for trading_day in trading_days:
            daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
            daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
            trader_entry = {}
            trader_entry["trader_address"] = trader
            trader_entry["total_trades"] = len(daily_trades)
            trader_entry["trading_day"] = trading_day
            trader_entry["total_mech_calls"] = len(daily_tools)
            all_mech_calls.append(trader_entry)
    return pd.DataFrame.from_dict(all_mech_calls, orient="columns")


def compute_mech_call_estimations(
    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
    """Function to compute the estimated mech calls needed per trade at the trader and market level"""
    nr_traders = len(fpmmTrades["trader_address"].unique())
    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
    tools["request_time"] = pd.to_datetime(tools["request_time"])
    tools["request_date"] = tools["request_time"].dt.date
    all_estimations = []
    for trader in tqdm(
        fpmmTrades["trader_address"].unique(),
        total=nr_traders,
        desc="creating mech calls estimation dataframe",
    ):
        # compute the mech calls estimations for each trader
        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
        all_tools = tools[tools["trader_address"] == trader]
        trading_days = all_trades.creation_date.unique()
        for trading_day in trading_days:
            daily_trades = all_trades.loc[all_trades["creation_date"] == trading_day]
            daily_tools = all_tools.loc[all_tools["request_date"] == trading_day]
            daily_estimations = get_daily_mech_calls_estimation(
                daily_trades=daily_trades, daily_tools=daily_tools
            )
            all_estimations.extend(daily_estimations)
    return pd.DataFrame.from_dict(all_estimations, orient="columns")


def compute_timestamp_mech_calls(
    all_trades: pd.DataFrame, all_tools: pd.DataFrame
) -> list:
    """Function to compute the mech calls based on timestamps but without repeating mech calls"""
    mech_calls_contents = []
    request_timestamps_used = {}
    # intialize the dict with all markets
    all_markets = all_trades.title.unique()
    for market in all_markets:
        request_timestamps_used[market] = []

    for i, trade in all_trades.iterrows():
        trader = trade["trader_address"]
        trade_id = trade["id"]
        market = trade["title"]
        trade_ts = trade["creation_timestamp"]
        market_requests = all_tools.loc[
            (all_tools["trader_address"] == trader) & (all_tools["title"] == market)
        ]
        # traverse market requests
        total_mech_calls = 0
        for i, mech_request in market_requests.iterrows():
            # check timestamp (before the trade)
            request_ts = mech_request["request_time"]
            if request_ts < trade_ts:
                # check the timestamp has not been used in a previous trade
                used_timestamps = request_timestamps_used[market]
                if request_ts not in used_timestamps:
                    request_timestamps_used[market].append(request_ts)
                    total_mech_calls += 1
        # create enty for the dataframe
        mech_call_entry = {}
        mech_call_entry["trader_address"] = trader
        mech_call_entry["market"] = market
        mech_call_entry["trade_id"] = trade_id
        mech_call_entry["total_mech_calls"] = total_mech_calls
        mech_calls_contents.append(mech_call_entry)
    return mech_calls_contents


def compute_mech_calls_based_on_timestamps(
    fpmmTrades: pd.DataFrame, tools: pd.DataFrame
) -> pd.DataFrame:
    """Function to compute the mech calls needed per trade at the trader and market level using timestamps"""
    nr_traders = len(fpmmTrades["trader_address"].unique())
    fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
    fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
    fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
    tools["request_time"] = pd.to_datetime(tools["request_time"])
    tools["request_date"] = tools["request_time"].dt.date
    tools = tools.sort_values(by="request_time", ascending=True)
    all_mech_calls = []
    for trader in tqdm(
        fpmmTrades["trader_address"].unique(),
        total=nr_traders,
        desc="creating mech calls count based on timestamps",
    ):
        # compute the mech calls for each trader
        all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
        all_tools = tools[tools["trader_address"] == trader]
        trader_mech_calls = compute_timestamp_mech_calls(all_trades, all_tools)
        all_mech_calls.extend(trader_mech_calls)
    return pd.DataFrame.from_dict(all_mech_calls, orient="columns")


if __name__ == "__main__":
    # update_trade_nr_mech_calls(non_agents=True)
    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
    fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
    fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
        lambda x: transform_to_datetime(x)
    )
    result = compute_mech_calls_based_on_timestamps(fpmmTrades=fpmmTrades, tools=tools)
    result.to_parquet(TMP_DIR / "result_df.parquet", index=False)