rosacastillo commited on
Commit
9382590
·
1 Parent(s): adf5b52

updated information of the last week

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83eed2f1cf4c39db6ca8bf152af976614dc1bffa36c9409b78fd311ab6629705
3
- size 2511824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d1564ac5393d51c6a7218a9d6afabb78c6166f2661afa5c68fe8ec166ba213
3
+ size 2885806
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f5340dc0529ae5f549a97b4967de8dc412c008c6f81d54bab7e7eec3cbd360c
3
- size 7422987
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c95bdbd08b0fa875044c53bfc983c0e4a76d861c703da2f4a04cdec20a9f13c
3
+ size 8600942
data/fpmms.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d807d167e6c0aa9bb01dc507c57a4ef01aadc04b35cb77b112cc6ae7fe693c9
3
- size 483517
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b4ad4e1780f6dcb812787d397a738141e53db0fc6b588f386586dddabca034e
3
+ size 494787
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f87d6a2d1d7b0bdd4c516eae9aa845a49c61511bd46a6c5e4a8424e5d044bc2
3
- size 69418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8199109e6224b609408037098e23f11f6e38a2f2526f9376fca895ee2728edd9
3
+ size 77169
data/outliers.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e210fd22652fa64353d6193810d6f6a548b446fd8a80157fdf7ea06f6f8aba
3
  size 18274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3910fd14580aac1b02c49152dbc5fb7b282aaa52b81e3e634801bf673590c8fb
3
  size 18274
data/service_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a84f3c384646679893e6ecfc28345b9c967f4adcc757c30fc2496240fc60addf
3
- size 91915
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e587b4db61a24ab6787a3e11d7ab3936e8f6ba63614a39ba086db1819915434a
3
+ size 91939
data/summary_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de28befef8686ecb1d8e62f29dc562c14db69f649a7e4ac3a7719f44008a80bb
3
- size 54717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c7253ea40d66ba0f06955bb2b96a589fb20e154eb2cfd95db85872d80d80a4b
3
+ size 71470
data/t_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6bac74e8f4b7491a7f6d400ca974d1ae83a3c1cfddb331b4342f7301fada75e
3
- size 20731045
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0705cfa2166283351129e1dac63aab41a4231e60f6873d3026eb23da5cdbf27
3
+ size 21870626
data/tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcc1ba4e141ee4cc15b0740449e077ec0affa9ab05c77bf470e290a982da5288
3
- size 319097335
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5594bd432a7db7fc7bf8ccbf5c40a10bd452643e2a884b5ae221a9bfe21271e
3
+ size 342399362
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa0da340e70a8823b0f5a494df206d32dd33f452460a40920810f119a83dd1a5
3
- size 1014
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ca0e77ced7ed3d627b0cbc2028af4116629135ed781b4e12c3c410316208a72
3
+ size 1121
scripts/get_mech_info.py CHANGED
@@ -3,6 +3,9 @@ from typing import Any
3
  from datetime import datetime, timedelta, UTC
4
  from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
5
  import requests
 
 
 
6
  from mech_request_utils import (
7
  collect_all_mech_delivers,
8
  collect_all_mech_requests,
@@ -11,6 +14,7 @@ from mech_request_utils import (
11
  merge_requests_delivers,
12
  get_ipfs_data,
13
  only_delivers_loop,
 
14
  )
15
 
16
  OLD_MECH_SUBGRAPH_URL = (
@@ -109,6 +113,78 @@ def fetch_block_number(timestamp_from: int, timestamp_to: int) -> dict:
109
  return blocks[0]
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def get_mech_info_2024() -> dict[str, Any]:
113
  """Query the subgraph to get the 2024 information from mech."""
114
 
@@ -195,38 +271,55 @@ def get_mech_info_last_60_days() -> dict[str, Any]:
195
  return MECH_TO_INFO
196
 
197
 
198
- def get_mech_info_last_10_days() -> dict[str, Any]:
199
- """Query the subgraph to get the last 10 days of information from mech."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- timestamp_10_days_ago = int((DATETIME_10_DAYS_AGO).timestamp())
202
- margin = timedelta(seconds=5)
203
- timestamp_10_days_ago_plus_margin = int((DATETIME_10_DAYS_AGO + margin).timestamp())
 
 
 
204
 
205
- last_month_block_number = fetch_block_number(
206
- timestamp_10_days_ago, timestamp_10_days_ago_plus_margin
 
 
 
207
  )
208
- # expecting only one block
209
- last_month_block_number = last_month_block_number.get("number", "")
210
- if last_month_block_number.isdigit():
211
- last_month_block_number = int(last_month_block_number)
212
 
213
- if last_month_block_number == "":
214
- raise ValueError("Could not find a valid block number for last month data")
 
 
 
 
 
 
215
 
216
- MECH_TO_INFO = {
217
- # this block number is when the creator had its first tx ever, and after this mech's creation
218
- "0xff82123dfb52ab75c417195c5fdb87630145ae81": (
219
- "old_mech_abi.json",
220
- last_month_block_number,
221
- ),
222
- # this block number is when this mech was created
223
- "0x77af31de935740567cf4ff1986d04b2c964a786a": (
224
- "new_mech_abi.json",
225
- last_month_block_number,
226
- ),
227
- }
228
- print(f"last 10 days block number {last_month_block_number}")
229
- return MECH_TO_INFO
230
 
231
 
232
  @measure_execution_time
@@ -235,25 +328,31 @@ def get_mech_events_last_60_days():
235
  last_block_number = get_last_block_number()
236
  # mech requests
237
  requests_dict, duplicatedReqId = collect_all_mech_requests(
238
- from_block=earliest_block_number, to_block=last_block_number
 
 
239
  )
240
 
241
  # mech delivers
242
  delivers_dict, duplicatedIds = collect_all_mech_delivers(
243
- from_block=earliest_block_number, to_block=last_block_number
 
 
244
  )
245
 
246
  # clean delivers
247
- clean_mech_delivers()
248
 
249
  # solve duplicated requestIds
250
- block_map = fix_duplicate_requestIds()
251
 
252
  # merge the two files into one source
253
- not_found = merge_requests_delivers()
 
 
254
 
255
  # Add ipfs contents
256
- get_ipfs_data()
257
 
258
 
259
  if __name__ == "__main__":
 
3
  from datetime import datetime, timedelta, UTC
4
  from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
5
  import requests
6
+ import json
7
+ import pandas as pd
8
+ import numpy as np
9
  from mech_request_utils import (
10
  collect_all_mech_delivers,
11
  collect_all_mech_requests,
 
14
  merge_requests_delivers,
15
  get_ipfs_data,
16
  only_delivers_loop,
17
+ merge_json_files,
18
  )
19
 
20
  OLD_MECH_SUBGRAPH_URL = (
 
113
  return blocks[0]
114
 
115
 
116
+ def update_json_files():
117
+ merge_json_files("mech_requests.json", "new_mech_requests.json")
118
+ merge_json_files("mech_delivers.json", "new_mech_delivers.json")
119
+ merge_json_files("merged_requests.json", "new_merged_requests.json")
120
+ merge_json_files("tools_info.json", "new_tools_info.json")
121
+
122
+
123
+ def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
124
+ # Read old trades parquet file
125
+ try:
126
+ old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
127
+ except Exception as e:
128
+ print(f"Error reading old trades parquet file {e}")
129
+ return None
130
+
131
+ # merge two dataframes
132
+ merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
133
+
134
+ # Check for duplicates
135
+ duplicates = merge_df.duplicated()
136
+
137
+ # Print the duplicates
138
+ print(duplicates)
139
+
140
+ # Get the number of duplicates
141
+ num_duplicates = duplicates.sum()
142
+ print("Number of duplicates:", num_duplicates)
143
+
144
+ # Get the rows with duplicates
145
+ duplicate_rows = merge_df[duplicates]
146
+ print("Duplicate rows:\n", duplicate_rows)
147
+
148
+ # Remove duplicates
149
+ merge_df.drop_duplicates(inplace=True)
150
+
151
+ # save the parquet file
152
+ merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
153
+
154
+ return merge_df
155
+
156
+
157
+ def update_tools_parquet(new_tools_df: pd.DataFrame):
158
+ try:
159
+ old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
160
+ except Exception as e:
161
+ print(f"Error reading old tools parquet file {e}")
162
+ return None
163
+
164
+ # merge two dataframes
165
+ merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
166
+
167
+ # Check for duplicates
168
+ duplicates = merge_df.duplicated()
169
+
170
+ # Print the duplicates
171
+ print(duplicates)
172
+
173
+ # Get the number of duplicates
174
+ num_duplicates = duplicates.sum()
175
+ print("Number of duplicates:", num_duplicates)
176
+
177
+ # Get the rows with duplicates
178
+ duplicate_rows = merge_df[duplicates]
179
+ print("Duplicate rows:\n", duplicate_rows)
180
+
181
+ # Remove duplicates
182
+ merge_df.drop_duplicates(inplace=True)
183
+
184
+ # save the parquet file
185
+ merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
186
+
187
+
188
  def get_mech_info_2024() -> dict[str, Any]:
189
  """Query the subgraph to get the 2024 information from mech."""
190
 
 
271
  return MECH_TO_INFO
272
 
273
 
274
+ @measure_execution_time
275
+ def get_mech_events_since_last_run():
276
+ """Function to download only the new events since the last execution."""
277
+
278
+ # Read the latest date from stored data
279
+ try:
280
+ all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
281
+ latest_timestamp = max(all_trades.creation_timestamp)
282
+ print(f"Updating data since {latest_timestamp}")
283
+ except Exception:
284
+ print("Error while reading the profitability parquet file")
285
+ return None
286
+
287
+ # Get the block number of lastest date
288
+ five_seconds = np.timedelta64(5, "s")
289
+ last_run_block_number = fetch_block_number(
290
+ int(latest_timestamp.timestamp()),
291
+ int((latest_timestamp + five_seconds).timestamp()),
292
+ )
293
+ last_block_number = get_last_block_number()
294
 
295
+ # mech requests
296
+ requests_dict, duplicatedReqId = collect_all_mech_requests(
297
+ from_block=last_run_block_number,
298
+ to_block=last_block_number,
299
+ filename="new_mech_requests.json",
300
+ )
301
 
302
+ # mech delivers
303
+ delivers_dict, duplicatedIds = collect_all_mech_delivers(
304
+ from_block=last_run_block_number,
305
+ to_block=last_block_number,
306
+ filename="new_mech_delivers.json",
307
  )
308
+ # clean delivers
309
+ clean_mech_delivers("new_mech_requests.json", "new_mech_delivers.json")
 
 
310
 
311
+ # solve duplicated requestIds
312
+ block_map = fix_duplicate_requestIds(
313
+ "new_mech_requests.json", "new_mech_delivers.json"
314
+ )
315
+ # merge the two files into one source
316
+ not_found = merge_requests_delivers(
317
+ "new_mech_requests.json", "new_mech_delivers.json", "new_merged_requests.json"
318
+ )
319
 
320
+ # Add ipfs contents
321
+ get_ipfs_data("new_merged_requests.json", "new_tools_info.json")
322
+ return latest_timestamp
 
 
 
 
 
 
 
 
 
 
 
323
 
324
 
325
  @measure_execution_time
 
328
  last_block_number = get_last_block_number()
329
  # mech requests
330
  requests_dict, duplicatedReqId = collect_all_mech_requests(
331
+ from_block=earliest_block_number,
332
+ to_block=last_block_number,
333
+ filename="mech_requests.json",
334
  )
335
 
336
  # mech delivers
337
  delivers_dict, duplicatedIds = collect_all_mech_delivers(
338
+ from_block=earliest_block_number,
339
+ to_block=last_block_number,
340
+ filename="mech_delivers.json",
341
  )
342
 
343
  # clean delivers
344
+ clean_mech_delivers("mech_requests.json", "mech_delivers.json")
345
 
346
  # solve duplicated requestIds
347
+ block_map = fix_duplicate_requestIds("mech_requests.json", "mech_delivers.json")
348
 
349
  # merge the two files into one source
350
+ not_found = merge_requests_delivers(
351
+ "mech_requests.json", "mech_delivers.json", "merged_requests.json"
352
+ )
353
 
354
  # Add ipfs contents
355
+ get_ipfs_data("merged_requests.json", "tools_info.json")
356
 
357
 
358
  if __name__ == "__main__":
scripts/mech_request_utils.py CHANGED
@@ -107,7 +107,7 @@ query delivers_query($requestId: BigInt, $blockNumber_gte: BigInt, $blockNumber_
107
  """
108
 
109
 
110
- def collect_all_mech_requests(from_block: int, to_block: int) -> Tuple:
111
 
112
  print(f"Fetching all mech requests from {from_block} to {to_block}")
113
  mech_requests = {}
@@ -146,17 +146,18 @@ def collect_all_mech_requests(from_block: int, to_block: int) -> Tuple:
146
  print(f"New execution for id_gt = {id_gt}")
147
  if len(duplicated_reqIds) > 0:
148
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
149
- save_json_file(mech_requests, "mech_requests.json")
150
 
151
  print(f"Number of requests = {len(mech_requests)}")
152
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
153
- save_json_file(mech_requests, "mech_requests.json")
154
  return mech_requests, duplicated_reqIds
155
 
156
 
157
- def collect_all_mech_delivers(from_block: int, to_block: int) -> Tuple:
158
 
159
  print(f"Fetching all mech delivers from {from_block} to {to_block}")
 
160
  mech_delivers = {}
161
  duplicated_requestIds = []
162
  transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
@@ -196,10 +197,10 @@ def collect_all_mech_delivers(from_block: int, to_block: int) -> Tuple:
196
  print(f"New execution for id_gt = {id_gt}")
197
  if len(duplicated_requestIds) > 0:
198
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
199
- save_json_file(mech_delivers, "mech_delivers.json")
200
  print(f"Number of delivers = {len(mech_delivers)}")
201
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
202
- save_json_file(mech_delivers, "mech_delivers.json")
203
  return mech_delivers, duplicated_requestIds
204
 
205
 
@@ -343,10 +344,27 @@ def save_json_file(data: Dict[str, Any], filename: str):
343
  json.dump(data, file, indent=2)
344
 
345
 
346
- def clean_mech_delivers() -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
348
  # read mech requests
349
- with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
350
  mech_requests = json.load(file)
351
 
352
  list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
@@ -354,7 +372,7 @@ def clean_mech_delivers() -> None:
354
  list_reqIds = list(set(list_reqIds))
355
 
356
  # remove requestIds from delivers that are not in this list
357
- with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
358
  mech_delivers = json.load(file)
359
 
360
  print(f"original size of the file {len(mech_delivers)}")
@@ -366,7 +384,7 @@ def clean_mech_delivers() -> None:
366
  for r in to_delete:
367
  mech_delivers.pop(r, None)
368
  print(f"final size of the file {len(mech_delivers)}")
369
- save_json_file(mech_delivers, "mech_delivers.json")
370
 
371
 
372
  def get_request_block_numbers(
@@ -387,11 +405,11 @@ def update_block_request_map(block_request_id_map: dict) -> None:
387
  pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
388
 
389
 
390
- def fix_duplicate_requestIds() -> dict:
391
- with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
392
  data_delivers = json.load(file)
393
 
394
- with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
395
  mech_requests = json.load(file)
396
  list_request_Ids = list(data_delivers.keys())
397
 
@@ -423,12 +441,15 @@ def fix_duplicate_requestIds() -> dict:
423
  return block_request_id_map
424
 
425
 
426
- def merge_requests_delivers() -> None:
 
 
 
427
  """Function to map requests and delivers"""
428
- with open(JSON_DATA_DIR / "mech_delivers.json", "r") as file:
429
  mech_delivers = json.load(file)
430
 
431
- with open(JSON_DATA_DIR / "mech_requests.json", "r") as file:
432
  mech_requests = json.load(file)
433
 
434
  # read the block map for duplicated requestIds
@@ -458,12 +479,12 @@ def merge_requests_delivers() -> None:
458
 
459
  # extract the info and append it to the original mech request dictionary
460
  mech_req["deliver"] = deliver_dict
461
- save_json_file(mech_requests, "merged_requests.json")
462
  return
463
 
464
 
465
- def get_ipfs_data():
466
- with open(JSON_DATA_DIR / "merged_requests.json", "r") as file:
467
  mech_requests = json.load(file)
468
 
469
  total_keys_to_traverse = list(mech_requests.keys())
@@ -491,7 +512,7 @@ def get_ipfs_data():
491
  partial_dict = future.result()
492
  updated_mech_requests.update(partial_dict)
493
 
494
- save_json_file(updated_mech_requests, "tools_info.json")
495
 
496
  # delivers
497
  print("UPDATING IPFS CONTENTS OF DELIVERS")
@@ -517,7 +538,7 @@ def get_ipfs_data():
517
  partial_dict = future.result()
518
  final_tools_content.update(partial_dict)
519
 
520
- save_json_file(final_tools_content, "tools_info.json")
521
 
522
 
523
  def only_delivers_loop():
 
107
  """
108
 
109
 
110
+ def collect_all_mech_requests(from_block: int, to_block: int, filename: str) -> Tuple:
111
 
112
  print(f"Fetching all mech requests from {from_block} to {to_block}")
113
  mech_requests = {}
 
146
  print(f"New execution for id_gt = {id_gt}")
147
  if len(duplicated_reqIds) > 0:
148
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
149
+ save_json_file(mech_requests, filename)
150
 
151
  print(f"Number of requests = {len(mech_requests)}")
152
  print(f"Number of duplicated req Ids = {len(duplicated_reqIds)}")
153
+ save_json_file(mech_requests, filename)
154
  return mech_requests, duplicated_reqIds
155
 
156
 
157
+ def collect_all_mech_delivers(from_block: int, to_block: int, filename: str) -> Tuple:
158
 
159
  print(f"Fetching all mech delivers from {from_block} to {to_block}")
160
+ # TODO save as new json file, check how to merge later json files
161
  mech_delivers = {}
162
  duplicated_requestIds = []
163
  transport = RequestsHTTPTransport(url=THEGRAPH_ENDPOINT)
 
197
  print(f"New execution for id_gt = {id_gt}")
198
  if len(duplicated_requestIds) > 0:
199
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
200
+ save_json_file(mech_delivers, filename)
201
  print(f"Number of delivers = {len(mech_delivers)}")
202
  print(f"Number of duplicated request id = {len(duplicated_requestIds)}")
203
+ save_json_file(mech_delivers, filename)
204
  return mech_delivers, duplicated_requestIds
205
 
206
 
 
344
  json.dump(data, file, indent=2)
345
 
346
 
347
+ def merge_json_files(old_file: str, new_file: str):
348
+ # read old file
349
+ with open(JSON_DATA_DIR / old_file, "r") as f:
350
+ old_data = json.load(f)
351
+
352
+ # read the new file
353
+ with open(JSON_DATA_DIR / new_file, "r") as f:
354
+ new_data = json.load(f)
355
+
356
+ # Merge the two JSON files and remove duplicates
357
+ old_data.update(new_data)
358
+
359
+ # Save the merged JSON file
360
+ print(f"{old_file} updated")
361
+ save_json_file(old_data, old_file)
362
+
363
+
364
+ def clean_mech_delivers(requests_filename: str, delivers_filename: str) -> None:
365
  """Function to remove from the delivers json file the request Ids that are not in the mech requests"""
366
  # read mech requests
367
+ with open(JSON_DATA_DIR / requests_filename, "r") as file:
368
  mech_requests = json.load(file)
369
 
370
  list_reqIds = [mech_requests[k].get("requestId") for k in mech_requests.keys()]
 
372
  list_reqIds = list(set(list_reqIds))
373
 
374
  # remove requestIds from delivers that are not in this list
375
+ with open(JSON_DATA_DIR / delivers_filename, "r") as file:
376
  mech_delivers = json.load(file)
377
 
378
  print(f"original size of the file {len(mech_delivers)}")
 
384
  for r in to_delete:
385
  mech_delivers.pop(r, None)
386
  print(f"final size of the file {len(mech_delivers)}")
387
+ save_json_file(mech_delivers, delivers_filename)
388
 
389
 
390
  def get_request_block_numbers(
 
405
  pickle.dump(block_request_id_map, handle, protocol=pickle.HIGHEST_PROTOCOL)
406
 
407
 
408
+ def fix_duplicate_requestIds(requests_filename: str, delivers_filename: str) -> dict:
409
+ with open(JSON_DATA_DIR / delivers_filename, "r") as file:
410
  data_delivers = json.load(file)
411
 
412
+ with open(JSON_DATA_DIR / requests_filename, "r") as file:
413
  mech_requests = json.load(file)
414
  list_request_Ids = list(data_delivers.keys())
415
 
 
441
  return block_request_id_map
442
 
443
 
444
+ def merge_requests_delivers(
445
+ requests_filename: str, delivers_filename: str, filename: str
446
+ ) -> None:
447
+ # TODO
448
  """Function to map requests and delivers"""
449
+ with open(JSON_DATA_DIR / delivers_filename, "r") as file:
450
  mech_delivers = json.load(file)
451
 
452
+ with open(JSON_DATA_DIR / requests_filename, "r") as file:
453
  mech_requests = json.load(file)
454
 
455
  # read the block map for duplicated requestIds
 
479
 
480
  # extract the info and append it to the original mech request dictionary
481
  mech_req["deliver"] = deliver_dict
482
+ save_json_file(mech_requests, filename)
483
  return
484
 
485
 
486
+ def get_ipfs_data(input_filename: str, output_filename: str):
487
+ with open(JSON_DATA_DIR / input_filename, "r") as file:
488
  mech_requests = json.load(file)
489
 
490
  total_keys_to_traverse = list(mech_requests.keys())
 
512
  partial_dict = future.result()
513
  updated_mech_requests.update(partial_dict)
514
 
515
+ save_json_file(updated_mech_requests, output_filename)
516
 
517
  # delivers
518
  print("UPDATING IPFS CONTENTS OF DELIVERS")
 
538
  partial_dict = future.result()
539
  final_tools_content.update(partial_dict)
540
 
541
+ save_json_file(final_tools_content, output_filename)
542
 
543
 
544
  def only_delivers_loop():
scripts/profitability.py CHANGED
@@ -29,7 +29,11 @@ from tqdm import tqdm
29
  import numpy as np
30
  import os
31
  from pathlib import Path
32
- from get_mech_info import DATETIME_60_DAYS_AGO
 
 
 
 
33
  from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
34
  from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
35
  from staking import label_trades_by_staking
@@ -318,9 +322,9 @@ def create_fpmmTrades(rpc: str, from_timestamp: float = DEFAULT_FROM_TIMESTAMP):
318
 
319
  def prepare_profitalibity_data(
320
  rpc: str,
321
- tools_filename: str = "tools.parquet",
322
- trades_filename: str = "fpmmTrades.parquet",
323
- from_timestamp: float = DEFAULT_60_DAYS_AGO_TIMESTAMP,
324
  ):
325
  """Prepare data for profitalibity analysis."""
326
 
@@ -343,13 +347,12 @@ def prepare_profitalibity_data(
343
  return
344
 
345
  # Check if fpmmTrades.parquet is in the same directory
 
346
  try:
347
- fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
348
- print(f"{trades_filename} loaded")
349
- except FileNotFoundError:
350
- print("fpmmTrades.parquet not found. Creating fpmmTrades.parquet...")
351
  fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
352
- fpmmTrades.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
 
 
353
 
354
  # make sure trader_address is in the columns
355
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
@@ -547,9 +550,10 @@ def summary_analyse(df):
547
 
548
  def run_profitability_analysis(
549
  rpc: str,
550
- tools_filename: str = "tools.parquet",
551
- trades_filename: str = "fpmmTrades.parquet",
552
- from_timestamp: float = DEFAULT_60_DAYS_AGO_TIMESTAMP,
 
553
  ):
554
  """Create all trades analysis."""
555
 
@@ -558,12 +562,16 @@ def run_profitability_analysis(
558
  fpmmTrades, tools = prepare_profitalibity_data(
559
  rpc, tools_filename, trades_filename, from_timestamp
560
  )
561
- tools["trader_address"] = tools["trader_address"].str.lower()
562
 
563
  # all trades profitability df
564
  print("Analysing trades...")
565
  all_trades_df = analyse_all_traders(fpmmTrades, tools)
566
 
 
 
 
 
 
567
  # filter invalid markets. Condition: "is_invalid" is True
568
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
569
  invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
 
29
  import numpy as np
30
  import os
31
  from pathlib import Path
32
+ from get_mech_info import (
33
+ DATETIME_60_DAYS_AGO,
34
+ update_trades_parquet,
35
+ update_tools_parquet,
36
+ )
37
  from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
38
  from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
39
  from staking import label_trades_by_staking
 
322
 
323
  def prepare_profitalibity_data(
324
  rpc: str,
325
+ tools_filename: str,
326
+ trades_filename: str,
327
+ from_timestamp: float,
328
  ):
329
  """Prepare data for profitalibity analysis."""
330
 
 
347
  return
348
 
349
  # Check if fpmmTrades.parquet is in the same directory
350
+ print("Generating the trades file")
351
  try:
 
 
 
 
352
  fpmmTrades = create_fpmmTrades(rpc, from_timestamp=from_timestamp)
353
+ fpmmTrades.to_parquet(DATA_DIR / trades_filename, index=False)
354
+ except FileNotFoundError:
355
+ print(f"Error creating {trades_filename} file .")
356
 
357
  # make sure trader_address is in the columns
358
  assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
 
550
 
551
  def run_profitability_analysis(
552
  rpc: str,
553
+ tools_filename: str,
554
+ trades_filename: str,
555
+ from_timestamp: float,
556
+ merge: bool = False,
557
  ):
558
  """Create all trades analysis."""
559
 
 
562
  fpmmTrades, tools = prepare_profitalibity_data(
563
  rpc, tools_filename, trades_filename, from_timestamp
564
  )
 
565
 
566
  # all trades profitability df
567
  print("Analysing trades...")
568
  all_trades_df = analyse_all_traders(fpmmTrades, tools)
569
 
570
+ # TODO merge previous files if requested
571
+ if merge:
572
+ all_trades_df = update_trades_parquet(all_trades_df)
573
+ update_tools_parquet(tools_filename)
574
+
575
  # filter invalid markets. Condition: "is_invalid" is True
576
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
577
  invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
scripts/pull_data.py CHANGED
@@ -12,14 +12,13 @@ from markets import (
12
  etl as mkt_etl,
13
  DEFAULT_FILENAME as MARKETS_FILENAME,
14
  )
15
- from tools import (
16
- DEFAULT_FILENAME as TOOLS_FILENAME,
17
- generate_tools_file,
18
- )
19
- from profitability import run_profitability_analysis
20
  from utils import get_question, current_answer, RPC
21
  from get_mech_info import (
22
  get_mech_events_last_60_days,
 
 
23
  )
24
  from update_tools_accuracy import compute_tools_accuracy
25
  import gc
@@ -48,10 +47,10 @@ def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> li
48
  return results
49
 
50
 
51
- def add_current_answer():
52
  # Get currentAnswer from FPMMS
53
  fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
54
- tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
55
 
56
  # Get the question from the tools
57
  logging.info("Getting the question and current answer for the tools")
@@ -61,14 +60,14 @@ def add_current_answer():
61
  tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
62
  tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
63
  # Save the tools data after the updates on the content
64
- tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
65
  del fpmms
66
 
67
 
68
- def updating_timestamps(rpc: str):
69
  web3 = Web3(Web3.HTTPProvider(rpc))
70
 
71
- tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
72
 
73
  # Convert block number to timestamp
74
  logging.info("Converting block number to timestamp")
@@ -100,7 +99,7 @@ def updating_timestamps(rpc: str):
100
  )
101
 
102
  # Save the tools data after the updates on the content
103
- tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
104
 
105
  # Update t_map with new timestamps
106
  new_timestamps = (
@@ -120,6 +119,57 @@ def updating_timestamps(rpc: str):
120
  gc.collect()
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def weekly_analysis():
124
  """Run weekly analysis for the FPMMS project."""
125
  rpc = RPC
@@ -130,26 +180,29 @@ def weekly_analysis():
130
 
131
  # New tools ETL
132
  logging.info("Generating the mech json files")
 
133
  get_mech_events_last_60_days()
134
  logging.info("Finished generating the mech json files")
135
 
136
  # Run tools ETL
137
  logging.info("Generate and parse the tools content")
138
- generate_tools_file()
 
139
  logging.info("Tools ETL completed")
 
140
 
141
  # Run profitability analysis
142
  logging.info("Running profitability analysis")
143
- if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
144
- os.remove(DATA_DIR / "fpmmTrades.parquet")
145
  run_profitability_analysis(
146
  rpc=rpc,
 
 
 
147
  )
148
-
149
  logging.info("Profitability analysis completed")
150
- add_current_answer()
151
  try:
152
- updating_timestamps(rpc)
153
  except Exception as e:
154
  logging.error("Error while updating timestamps of tools")
155
  print(e)
 
12
  etl as mkt_etl,
13
  DEFAULT_FILENAME as MARKETS_FILENAME,
14
  )
15
+ from tools import DEFAULT_FILENAME as TOOLS_FILENAME, generate_tools_file
16
+ from profitability import run_profitability_analysis, DEFAULT_60_DAYS_AGO_TIMESTAMP
 
 
 
17
  from utils import get_question, current_answer, RPC
18
  from get_mech_info import (
19
  get_mech_events_last_60_days,
20
+ get_mech_events_since_last_run,
21
+ update_json_files,
22
  )
23
  from update_tools_accuracy import compute_tools_accuracy
24
  import gc
 
47
  return results
48
 
49
 
50
+ def add_current_answer(tools_filename: str):
51
  # Get currentAnswer from FPMMS
52
  fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
53
+ tools = pd.read_parquet(DATA_DIR / tools_filename)
54
 
55
  # Get the question from the tools
56
  logging.info("Getting the question and current answer for the tools")
 
60
  tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
61
  tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
62
  # Save the tools data after the updates on the content
63
+ tools.to_parquet(DATA_DIR / tools_filename, index=False)
64
  del fpmms
65
 
66
 
67
+ def updating_timestamps(rpc: str, tools_filename: str):
68
  web3 = Web3(Web3.HTTPProvider(rpc))
69
 
70
+ tools = pd.read_parquet(DATA_DIR / tools_filename)
71
 
72
  # Convert block number to timestamp
73
  logging.info("Converting block number to timestamp")
 
99
  )
100
 
101
  # Save the tools data after the updates on the content
102
+ tools.to_parquet(DATA_DIR / tools_filename, index=False)
103
 
104
  # Update t_map with new timestamps
105
  new_timestamps = (
 
119
  gc.collect()
120
 
121
 
122
+ def only_new_weekly_analysis():
123
+ """Run weekly analysis for the FPMMS project."""
124
+ rpc = RPC
125
+ # Run markets ETL
126
+ logging.info("Running markets ETL")
127
+ mkt_etl(MARKETS_FILENAME)
128
+ logging.info("Markets ETL completed")
129
+
130
+ # New tools ETL
131
+ logging.info("Generating the mech json files")
132
+ # get only new data
133
+ latest_timestamp = get_mech_events_since_last_run()
134
+ logging.info(f"Finished generating the mech json files from {latest_timestamp}")
135
+
136
+ # Run tools ETL
137
+ logging.info("Generate and parse the tools content")
138
+ # generate only new file
139
+ generate_tools_file("new_tools_info.json", "new_tools.parquet")
140
+ logging.info("Tools ETL completed")
141
+
142
+ add_current_answer("new_tools.parquet")
143
+
144
+ # Run profitability analysis
145
+ logging.info("Running profitability analysis")
146
+
147
+ run_profitability_analysis(
148
+ rpc=rpc,
149
+ tools_filename="new_tools.parquet",
150
+ trades_filename="new_fpmmTrades.parquet",
151
+ from_timestamp=int(latest_timestamp.timestamp()),
152
+ merge=True,
153
+ )
154
+
155
+ logging.info("Profitability analysis completed")
156
+
157
+ # merge new json files with old json files
158
+ update_json_files()
159
+
160
+ # TODO move new parquet files to a tmp folder
161
+
162
+ try:
163
+ updating_timestamps(rpc, TOOLS_FILENAME)
164
+ except Exception as e:
165
+ logging.error("Error while updating timestamps of tools")
166
+ print(e)
167
+
168
+ compute_tools_accuracy()
169
+
170
+ logging.info("Weekly analysis files generated and saved")
171
+
172
+
173
  def weekly_analysis():
174
  """Run weekly analysis for the FPMMS project."""
175
  rpc = RPC
 
180
 
181
  # New tools ETL
182
  logging.info("Generating the mech json files")
183
+
184
  get_mech_events_last_60_days()
185
  logging.info("Finished generating the mech json files")
186
 
187
  # Run tools ETL
188
  logging.info("Generate and parse the tools content")
189
+
190
+ generate_tools_file("tools_info.json", TOOLS_FILENAME)
191
  logging.info("Tools ETL completed")
192
+ add_current_answer(TOOLS_FILENAME)
193
 
194
  # Run profitability analysis
195
  logging.info("Running profitability analysis")
 
 
196
  run_profitability_analysis(
197
  rpc=rpc,
198
+ tools_filename=TOOLS_FILENAME,
199
+ trades_filename="fpmmTrades.parquet",
200
+ from_timestamp=DEFAULT_60_DAYS_AGO_TIMESTAMP,
201
  )
 
202
  logging.info("Profitability analysis completed")
203
+
204
  try:
205
+ updating_timestamps(rpc, TOOLS_FILENAME)
206
  except Exception as e:
207
  logging.error("Error while updating timestamps of tools")
208
  print(e)
scripts/tools.py CHANGED
@@ -560,9 +560,7 @@ def etl(
560
  return tools
561
 
562
 
563
- def parse_store_json_events_parallel(
564
- json_events: Dict[str, Any], filename: str = DEFAULT_FILENAME
565
- ):
566
  total_nr_events = len(json_events)
567
  ids_to_traverse = list(json_events.keys())
568
  print(f"Parsing {total_nr_events} events")
@@ -596,19 +594,19 @@ def parse_store_json_events_parallel(
596
  try:
597
  if "result" in tools.columns:
598
  tools = tools.drop(columns=["result"])
599
- tools.to_parquet(DATA_DIR / filename, index=False)
600
  except Exception as e:
601
  print(f"Failed to write tools data: {e}")
602
 
603
  return tools
604
 
605
 
606
- def generate_tools_file():
607
  """Function to parse the json mech events and generate the parquet tools file"""
608
  try:
609
- with open(JSON_DATA_DIR / "tools_info.json", "r") as file:
610
  file_contents = json.load(file)
611
- parse_store_json_events_parallel(file_contents)
612
  except Exception as e:
613
  print(f"An Exception happened while parsing the json events {e}")
614
 
 
560
  return tools
561
 
562
 
563
+ def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filename: str):
 
 
564
  total_nr_events = len(json_events)
565
  ids_to_traverse = list(json_events.keys())
566
  print(f"Parsing {total_nr_events} events")
 
594
  try:
595
  if "result" in tools.columns:
596
  tools = tools.drop(columns=["result"])
597
+ tools.to_parquet(DATA_DIR / output_filename, index=False)
598
  except Exception as e:
599
  print(f"Failed to write tools data: {e}")
600
 
601
  return tools
602
 
603
 
604
+ def generate_tools_file(input_filename: str, output_filename: str):
605
  """Function to parse the json mech events and generate the parquet tools file"""
606
  try:
607
+ with open(JSON_DATA_DIR / input_filename, "r") as file:
608
  file_contents = json.load(file)
609
+ parse_store_json_events_parallel(file_contents, output_filename)
610
  except Exception as e:
611
  print(f"An Exception happened while parsing the json events {e}")
612
 
scripts/update_tools_accuracy.py CHANGED
@@ -66,19 +66,32 @@ def update_tools_accuracy(
66
  print(tools_to_update)
67
  existing_tools = list(tools_acc["tool"].values)
68
  for tool in tools_to_update:
 
 
 
 
 
 
 
 
69
  if tool in existing_tools:
70
- new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
71
- new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
72
- if no_timeline_info:
73
- new_min_timeline = None
74
- new_max_timeline = None
75
- else:
76
- new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
77
- new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
78
  tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
79
  tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
80
  tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
81
  tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
 
 
 
 
 
 
 
 
 
 
 
 
82
  print(tools_acc)
83
  return tools_acc
84
 
 
66
  print(tools_to_update)
67
  existing_tools = list(tools_acc["tool"].values)
68
  for tool in tools_to_update:
69
+ new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
70
+ new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
71
+ if no_timeline_info:
72
+ new_min_timeline = None
73
+ new_max_timeline = None
74
+ else:
75
+ new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
76
+ new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
77
  if tool in existing_tools:
78
+
 
 
 
 
 
 
 
79
  tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
80
  tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
81
  tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
82
  tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
83
+ else:
84
+ # new tool to add to the file
85
+ # tool,tool_accuracy,total_requests,min,max
86
+ new_row = {
87
+ "tool": tool,
88
+ "tool_accuracy": new_accuracy,
89
+ "total_requests": new_volume,
90
+ "min": new_min_timeline,
91
+ "max": new_max_timeline,
92
+ }
93
+ tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)
94
+
95
  print(tools_acc)
96
  return tools_acc
97