File size: 12,536 Bytes
5f5eb85
 
 
f9ef62b
 
 
 
 
 
b60f995
f9ef62b
5f5eb85
9382590
 
964692a
 
 
 
 
 
 
9382590
964692a
5f5eb85
 
 
 
 
964692a
5f5eb85
 
964692a
 
5f5eb85
 
 
 
 
 
 
 
 
 
 
 
f0e76cb
 
5f5eb85
 
 
 
 
964692a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f5eb85
 
 
 
 
 
 
 
f0e76cb
 
 
5f5eb85
f0e76cb
5f5eb85
 
 
 
964692a
5f5eb85
dbb4d06
5f5eb85
3cfd212
 
5f5eb85
 
 
9382590
 
 
 
 
 
 
3859cbd
9382590
 
d0a3c49
9382590
 
 
 
3859cbd
 
 
 
 
 
6992ec1
 
 
 
960332d
 
 
 
 
6992ec1
960332d
 
 
 
 
 
 
 
 
9382590
 
3859cbd
 
 
 
 
 
 
9382590
 
681e0a4
9382590
 
3859cbd
278fab8
681e0a4
9382590
 
d0a3c49
9382590
3859cbd
 
 
 
 
 
 
 
 
 
 
 
 
 
681e0a4
3859cbd
 
681e0a4
 
9382590
 
 
278fab8
9382590
278fab8
9382590
 
 
3859cbd
 
278fab8
3859cbd
 
 
9382590
 
 
 
 
681e0a4
9382590
 
b3c2f09
 
 
681e0a4
9382590
 
278fab8
9382590
 
544f140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
964692a
 
 
 
 
 
 
 
 
 
5f5eb85
964692a
 
5f5eb85
 
 
 
 
 
 
 
f0e76cb
5f5eb85
 
 
 
 
964692a
 
 
 
 
 
5f5eb85
 
 
 
 
 
 
 
 
 
 
 
 
964692a
5f5eb85
 
 
9382590
b60f995
9382590
 
 
 
 
 
285f2a6
278fab8
 
 
9382590
 
 
 
 
 
 
 
 
 
 
3859cbd
 
 
 
 
 
 
9382590
964692a
9382590
f9ef62b
9382590
 
 
 
f9ef62b
9382590
f9ef62b
9382590
 
 
964692a
f9ef62b
26538e1
 
9382590
 
964692a
9382590
 
 
 
 
 
 
 
964692a
9382590
b60f995
9382590
964692a
 
5f5eb85
278fab8
964692a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
from string import Template
from typing import Any
from datetime import datetime, timedelta, UTC
from utils import (
    SUBGRAPH_API_KEY,
    measure_execution_time,
    DATA_DIR,
    TMP_DIR,
    NETWORK_SUBGRAPH_URL,
    transform_to_datetime,
)
import requests
import pandas as pd
import numpy as np
from mech_request_utils import (
    collect_all_mech_delivers,
    collect_all_mech_requests,
    clean_mech_delivers,
    fix_duplicate_requestIds,
    merge_requests_delivers,
    get_ipfs_data,
    merge_json_files,
)

SUBGRAPH_HEADERS = {
    "Accept": "application/json, multipart/mixed",
    "Content-Type": "application/json",
}

QUERY_BATCH_SIZE = 1000
DATETIME_60_DAYS_AGO = datetime.now(UTC) - timedelta(days=60)
DATETIME_10_DAYS_AGO = datetime.now(UTC) - timedelta(days=10)
DATETIME_10_HOURS_AGO = datetime.now(UTC) - timedelta(hours=10)
BLOCK_NUMBER = Template(
    """
    { 
        blocks(
            first: 1,
            orderBy: timestamp,
            orderDirection: asc,
            where: {
                timestamp_gte: "${timestamp_from}",
                timestamp_lte: "${timestamp_to}"
            }
        ){
            id,
            number,
        }
    }
    """
)

LATEST_BLOCK_QUERY = """
    { 
        blocks(
            first: 1,
            orderBy: timestamp,
            orderDirection: desc,
        ){
            id,
            number,
        }
    }
    """


def fetch_last_block_number() -> dict:
    # print(f"Sending query for the subgraph = {query}")
    network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
        subgraph_api_key=SUBGRAPH_API_KEY
    )
    query = LATEST_BLOCK_QUERY
    response = requests.post(
        network_subgraph_url,
        headers=SUBGRAPH_HEADERS,
        json={"query": query},
        timeout=300,
    )

    result_json = response.json()
    print(f"Response of the query={result_json}")
    blocks = result_json.get("data", {}).get("blocks", "")
    if len(blocks) == 0:
        raise ValueError(f"The query {query} did not return any results")
    return blocks[0]


def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
    """Get a block number by its timestamp margins."""

    query = BLOCK_NUMBER.substitute(
        timestamp_from=timestamp_from, timestamp_to=timestamp_to
    )
    # print(f"Sending query for the subgraph = {query}")
    network_subgraph_url = NETWORK_SUBGRAPH_URL.substitute(
        subgraph_api_key=SUBGRAPH_API_KEY
    )
    response = requests.post(
        network_subgraph_url,
        headers=SUBGRAPH_HEADERS,
        json={"query": query},
        timeout=300,
    )
    # print(f"block query: {query}")
    result_json = response.json()
    print(f"Response of the query={result_json}")
    blocks = result_json.get("data", {}).get("blocks", "")
    if len(blocks) == 0:
        raise ValueError(f"The query {query} did not return any results")
    return blocks[0]


def update_json_files():
    merge_json_files("mech_requests.json", "new_mech_requests.json")
    merge_json_files("mech_delivers.json", "new_mech_delivers.json")
    merge_json_files("merged_requests.json", "new_merged_requests.json")
    merge_json_files("tools_info.json", "new_tools_info.json")


def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
    # Read old trades parquet file
    try:
        old_trades_df = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
    except Exception as e:
        print(f"Error reading old trades parquet file {e}")
        return None

    try:
        new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
    except Exception as e:
        print(f"Error reading new trades parquet file {e}")
        return None

    # lowercase and strip creator_address
    new_trades_df["trader_address"] = (
        new_trades_df["trader_address"].str.lower().str.strip()
    )
    # ensure creationTimestamp compatibility
    try:
        new_trades_df["creationTimestamp"] = new_trades_df["creationTimestamp"].apply(
            lambda x: transform_to_datetime(x)
        )

    except Exception as e:
        print(f"Transformation not needed")
    try:
        old_trades_df["creationTimestamp"] = old_trades_df["creationTimestamp"].apply(
            lambda x: transform_to_datetime(x)
        )
    except Exception as e:
        print(f"Transformation not needed")

    # merge two dataframes
    merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
    # avoid numpy objects
    merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
        bool
    )
    merge_df["fpmm.isPendingArbitration"] = merge_df[
        "fpmm.isPendingArbitration"
    ].astype(bool)

    # Check for duplicates
    print(f"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}")

    # Remove duplicates
    # fpmm.outcomes is a numpy array
    merge_df.drop_duplicates("id", keep="last", inplace=True)
    print(f"Final length after removing duplicates in fpmmTrades= {len(merge_df)}")

    # save the parquet file
    merge_df.to_parquet(TMP_DIR / "fpmmTrades.parquet", index=False)

    return


def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
    # Read old all_trades parquet file
    try:
        old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
    except Exception as e:
        print(f"Error reading old trades parquet file {e}")
        return None
    # merge two dataframes
    merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)

    # Check for duplicates
    print(f"Initial length before removing duplicates in all_trades= {len(merge_df)}")

    # Remove duplicates
    merge_df.drop_duplicates("trade_id", inplace=True)
    print(f"Final length after removing duplicates in all_trades = {len(merge_df)}")
    return merge_df


def update_tools_parquet(new_tools_filename: pd.DataFrame):
    try:
        old_tools_df = pd.read_parquet(TMP_DIR / "tools.parquet")
    except Exception as e:
        print(f"Error reading old tools parquet file {e}")
        return None
    try:
        new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)

    except Exception as e:
        print(f"Error reading new trades parquet file {e}")
        return None

    # merge two dataframes
    merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)

    # Check for duplicates
    print(f"Initial length before removing duplicates in tools= {len(merge_df)}")

    # Remove duplicates
    merge_df.drop_duplicates(
        subset=["request_id", "request_time"], keep="last", inplace=True
    )
    print(f"Final length after removing duplicates in tools= {len(merge_df)}")

    # save the parquet file
    merge_df.to_parquet(TMP_DIR / "tools.parquet", index=False)


def get_mech_info_2024() -> dict[str, Any]:
    """Query the subgraph to get the 2024 information from mech."""

    date = "2024-01-01"
    datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
    timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
    margin = timedelta(seconds=5)
    timestamp_jan_2024_plus_margin = int((datetime_jan_2024 + margin).timestamp())

    jan_block_number = fetch_block_number(
        timestamp_jan_2024, timestamp_jan_2024_plus_margin
    )
    # expecting only one block
    jan_block_number = jan_block_number.get("number", "")
    if jan_block_number.isdigit():
        jan_block_number = int(jan_block_number)

    if jan_block_number == "":
        raise ValueError(
            "Could not find a valid block number for the first of January 2024"
        )
    MECH_TO_INFO = {
        # this block number is when the creator had its first tx ever, and after this mech's creation
        "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
            "old_mech_abi.json",
            jan_block_number,
        ),
        # this block number is when this mech was created
        "0x77af31de935740567cf4ff1986d04b2c964a786a": (
            "new_mech_abi.json",
            jan_block_number,
        ),
    }
    return MECH_TO_INFO


def get_last_block_number() -> int:
    last_block_number = fetch_last_block_number()
    # expecting only one block
    last_block_number = last_block_number.get("number", "")
    if last_block_number.isdigit():
        last_block_number = int(last_block_number)

    if last_block_number == "":
        raise ValueError("Could not find a valid block number for last month data")
    return last_block_number


def get_last_60_days_block_number() -> int:
    timestamp_60_days_ago = int((DATETIME_60_DAYS_AGO).timestamp())
    margin = timedelta(seconds=5)
    timestamp_60_days_ago_plus_margin = int((DATETIME_60_DAYS_AGO + margin).timestamp())

    last_month_block_number = fetch_block_number(
        timestamp_60_days_ago, timestamp_60_days_ago_plus_margin
    )
    # expecting only one block
    last_month_block_number = last_month_block_number.get("number", "")
    if last_month_block_number.isdigit():
        last_month_block_number = int(last_month_block_number)

    if last_month_block_number == "":
        raise ValueError("Could not find a valid block number for last month data")
    return last_month_block_number


def get_mech_info_last_60_days() -> dict[str, Any]:
    """Query the subgraph to get the last 60 days of information from mech."""
    last_month_block_number = get_last_60_days_block_number()

    MECH_TO_INFO = {
        # this block number is when the creator had its first tx ever, and after this mech's creation
        "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
            "old_mech_abi.json",
            last_month_block_number,
        ),
        # this block number is when this mech was created
        "0x77af31de935740567cf4ff1986d04b2c964a786a": (
            "new_mech_abi.json",
            last_month_block_number,
        ),
    }
    print(f"last 60 days block number {last_month_block_number}")
    return MECH_TO_INFO


@measure_execution_time
def get_mech_events_since_last_run(logger):
    """Function to download only the new events since the last execution."""

    # Read the latest date from stored data
    try:
        all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
        latest_timestamp = max(all_trades.creation_timestamp)
        # cutoff_date = "2024-12-22"
        # latest_timestamp = pd.Timestamp(
        #     datetime.strptime(cutoff_date, "%Y-%m-%d")
        # ).tz_localize("UTC")
        print(f"Updating data since {latest_timestamp}")
    except Exception:
        print("Error while reading the profitability parquet file")
        return None

    # Get the block number of lastest date
    five_seconds = np.timedelta64(5, "s")
    last_run_block_number = fetch_block_number(
        int(latest_timestamp.timestamp()),
        int((latest_timestamp + five_seconds).timestamp()),
    )
    # expecting only one block
    last_run_block_number = last_run_block_number.get("number", "")
    if last_run_block_number.isdigit():
        last_run_block_number = int(last_run_block_number)

    if last_run_block_number == "":
        raise ValueError("Could not find a valid block number for last collected data")
    last_block_number = get_last_block_number()

    # mech requests
    requests_dict, duplicatedReqId, nr_errors = collect_all_mech_requests(
        from_block=last_run_block_number,
        to_block=last_block_number,
        filename="new_mech_requests.json",
    )
    print(f"NUMBER OF MECH REQUEST ERRORS={nr_errors}")
    # mech delivers
    delivers_dict, duplicatedIds, nr_errors = collect_all_mech_delivers(
        from_block=last_run_block_number,
        to_block=last_block_number,
        filename="new_mech_delivers.json",
    )
    print(f"NUMBER OF MECH DELIVER ERRORS={nr_errors}")
    if delivers_dict is None:
        return None
    # clean delivers
    clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")

    # solve duplicated requestIds
    block_map = fix_duplicate_requestIds(
        "new_mech_requests.json", "new_mech_delivers.json"
    )
    # merge the two files into one source
    not_found = merge_requests_delivers(
        "new_mech_requests.json", "new_mech_delivers.json", "new_merged_requests.json"
    )

    # Add ipfs contents
    get_ipfs_data("new_merged_requests.json", "new_tools_info.json", logger)
    return latest_timestamp


if __name__ == "__main__":
    get_mech_events_since_last_run()
    # result = get_mech_info_last_60_days()
    # print(result)