rosacastillo commited on
Commit
3859cbd
·
1 Parent(s): 9382590

new weekly data using new pipeline

Browse files
data/all_trades_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54d1564ac5393d51c6a7218a9d6afabb78c6166f2661afa5c68fe8ec166ba213
3
- size 2885806
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df952a693ba00cc0b11bca8ff4c6805415f2d006b3cd47242e43e7cdc7d5fe1
3
+ size 3266876
data/fpmmTrades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c95bdbd08b0fa875044c53bfc983c0e4a76d861c703da2f4a04cdec20a9f13c
3
- size 8600942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390f96495582e69ae82225a61e6473c1fe6536081b326a6bd11617be45ce672a
3
+ size 10816943
data/fpmms.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b4ad4e1780f6dcb812787d397a738141e53db0fc6b588f386586dddabca034e
3
- size 494787
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5dcd19c7922e3f7168a139b0d63c335c921343faa15852b6ae04888f7e006a
3
+ size 504817
data/invalid_trades.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8199109e6224b609408037098e23f11f6e38a2f2526f9376fca895ee2728edd9
3
- size 77169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e0fa52d0c605961b5a12bec47bc3b0288b02b814c61cc7f8a33ad793f8bd30
3
+ size 84013
data/summary_profitability.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c7253ea40d66ba0f06955bb2b96a589fb20e154eb2cfd95db85872d80d80a4b
3
- size 71470
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a55a6c4c7ef5da8db27b61c268eccbd1d426c456a0d17efa4b22b7c69ed1454d
3
+ size 78788
data/t_map.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0705cfa2166283351129e1dac63aab41a4231e60f6873d3026eb23da5cdbf27
3
- size 21870626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b67cf178943b82b5286b7c2adb6329e1e23fffce807ebf299684746813f55de
3
+ size 22992649
data/tools.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5594bd432a7db7fc7bf8ccbf5c40a10bd452643e2a884b5ae221a9bfe21271e
3
- size 342399362
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e4ace8d172836c379ee23bde678f19d9eeec28e7bd31bf9e95dc914ac5c9bc5
3
+ size 407088092
data/tools_accuracy.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ca0e77ced7ed3d627b0cbc2028af4116629135ed781b4e12c3c410316208a72
3
- size 1121
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7a3622338d1eb2f23824031733ecdd77ae77eff7cb2b1c879aba05b0966d2cc
3
+ size 1133
scripts/get_mech_info.py CHANGED
@@ -120,7 +120,7 @@ def update_json_files():
120
  merge_json_files("tools_info.json", "new_tools_info.json")
121
 
122
 
123
- def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
124
  # Read old trades parquet file
125
  try:
126
  old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
@@ -128,58 +128,78 @@ def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
128
  print(f"Error reading old trades parquet file {e}")
129
  return None
130
 
 
 
 
 
 
 
131
  # merge two dataframes
132
  merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
 
 
 
 
 
 
 
133
 
134
  # Check for duplicates
135
- duplicates = merge_df.duplicated()
136
-
137
- # Print the duplicates
138
- print(duplicates)
139
-
140
- # Get the number of duplicates
141
- num_duplicates = duplicates.sum()
142
- print("Number of duplicates:", num_duplicates)
143
-
144
- # Get the rows with duplicates
145
- duplicate_rows = merge_df[duplicates]
146
- print("Duplicate rows:\n", duplicate_rows)
147
 
148
  # Remove duplicates
149
- merge_df.drop_duplicates(inplace=True)
 
 
 
 
150
 
151
  # save the parquet file
152
  merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  return merge_df
155
 
156
 
157
- def update_tools_parquet(new_tools_df: pd.DataFrame):
158
  try:
159
  old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
160
  except Exception as e:
161
  print(f"Error reading old tools parquet file {e}")
162
  return None
 
 
 
 
 
163
 
164
  # merge two dataframes
165
  merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
166
 
167
  # Check for duplicates
168
- duplicates = merge_df.duplicated()
169
-
170
- # Print the duplicates
171
- print(duplicates)
172
-
173
- # Get the number of duplicates
174
- num_duplicates = duplicates.sum()
175
- print("Number of duplicates:", num_duplicates)
176
-
177
- # Get the rows with duplicates
178
- duplicate_rows = merge_df[duplicates]
179
- print("Duplicate rows:\n", duplicate_rows)
180
 
181
  # Remove duplicates
182
  merge_df.drop_duplicates(inplace=True)
 
183
 
184
  # save the parquet file
185
  merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
@@ -290,6 +310,13 @@ def get_mech_events_since_last_run():
290
  int(latest_timestamp.timestamp()),
291
  int((latest_timestamp + five_seconds).timestamp()),
292
  )
 
 
 
 
 
 
 
293
  last_block_number = get_last_block_number()
294
 
295
  # mech requests
 
120
  merge_json_files("tools_info.json", "new_tools_info.json")
121
 
122
 
123
+ def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
124
  # Read old trades parquet file
125
  try:
126
  old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
 
128
  print(f"Error reading old trades parquet file {e}")
129
  return None
130
 
131
+ try:
132
+ new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
133
+ except Exception as e:
134
+ print(f"Error reading new trades parquet file {e}")
135
+ return None
136
+
137
  # merge two dataframes
138
  merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
139
+ # avoid numpy objects
140
+ merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
141
+ bool
142
+ )
143
+ merge_df["fpmm.isPendingArbitration"] = merge_df[
144
+ "fpmm.isPendingArbitration"
145
+ ].astype(bool)
146
 
147
  # Check for duplicates
148
+ print(f"Initial length before removing duplicates= {len(merge_df)}")
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  # Remove duplicates
151
+ # fpmm.outcomes is a numpy array
152
+ merge_df = merge_df.drop_duplicates(
153
+ subset=[col for col in merge_df.columns if col != "fpmm.outcomes"]
154
+ )
155
+ print(f"Final length before removing duplicates= {len(merge_df)}")
156
 
157
  # save the parquet file
158
  merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
159
 
160
+ return
161
+
162
+
163
+ def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
164
+ # Read old all_trades parquet file
165
+ try:
166
+ old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
167
+ except Exception as e:
168
+ print(f"Error reading old trades parquet file {e}")
169
+ return None
170
+ # merge two dataframes
171
+ merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
172
+
173
+ # Check for duplicates
174
+ print(f"Initial length before removing duplicates= {len(merge_df)}")
175
+
176
+ # Remove duplicates
177
+ merge_df.drop_duplicates(inplace=True)
178
+ print(f"Final length before removing duplicates= {len(merge_df)}")
179
  return merge_df
180
 
181
 
182
+ def update_tools_parquet(new_tools_filename: pd.DataFrame):
183
  try:
184
  old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
185
  except Exception as e:
186
  print(f"Error reading old tools parquet file {e}")
187
  return None
188
+ try:
189
+ new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
190
+ except Exception as e:
191
+ print(f"Error reading new trades parquet file {e}")
192
+ return None
193
 
194
  # merge two dataframes
195
  merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
196
 
197
  # Check for duplicates
198
+ print(f"Initial length before removing duplicates= {len(merge_df)}")
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  # Remove duplicates
201
  merge_df.drop_duplicates(inplace=True)
202
+ print(f"Final length before removing duplicates= {len(merge_df)}")
203
 
204
  # save the parquet file
205
  merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
 
310
  int(latest_timestamp.timestamp()),
311
  int((latest_timestamp + five_seconds).timestamp()),
312
  )
313
+ # expecting only one block
314
+ last_run_block_number = last_run_block_number.get("number", "")
315
+ if last_run_block_number.isdigit():
316
+ last_run_block_number = int(last_run_block_number)
317
+
318
+ if last_run_block_number == "":
319
+ raise ValueError("Could not find a valid block number for last collected data")
320
  last_block_number = get_last_block_number()
321
 
322
  # mech requests
scripts/profitability.py CHANGED
@@ -31,10 +31,18 @@ import os
31
  from pathlib import Path
32
  from get_mech_info import (
33
  DATETIME_60_DAYS_AGO,
34
- update_trades_parquet,
35
  update_tools_parquet,
 
 
 
 
 
 
 
 
 
36
  )
37
- from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
38
  from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
39
  from staking import label_trades_by_staking
40
 
@@ -567,13 +575,19 @@ def run_profitability_analysis(
567
  print("Analysing trades...")
568
  all_trades_df = analyse_all_traders(fpmmTrades, tools)
569
 
570
- # TODO merge previous files if requested
571
  if merge:
572
- all_trades_df = update_trades_parquet(all_trades_df)
573
  update_tools_parquet(tools_filename)
 
574
 
575
  # filter invalid markets. Condition: "is_invalid" is True
576
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
 
 
 
 
 
577
  invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
578
 
579
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
 
31
  from pathlib import Path
32
  from get_mech_info import (
33
  DATETIME_60_DAYS_AGO,
34
+ update_fpmmTrades_parquet,
35
  update_tools_parquet,
36
+ update_all_trades_parquet,
37
+ )
38
+ from utils import (
39
+ SUBGRAPH_API_KEY,
40
+ wei_to_unit,
41
+ convert_hex_to_int,
42
+ _to_content,
43
+ read_parquet_files,
44
+ JSON_DATA_DIR,
45
  )
 
46
  from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
47
  from staking import label_trades_by_staking
48
 
 
575
  print("Analysing trades...")
576
  all_trades_df = analyse_all_traders(fpmmTrades, tools)
577
 
578
+ # merge previous files if requested
579
  if merge:
580
+ update_fpmmTrades_parquet(trades_filename)
581
  update_tools_parquet(tools_filename)
582
+ all_trades_df = update_all_trades_parquet(all_trades_df)
583
 
584
  # filter invalid markets. Condition: "is_invalid" is True
585
  invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
586
+ if merge:
587
+ try:
588
+ old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
589
+ merge_df = pd.concat([old_invalid_trades, invalid_trades], ignore_index=True)
590
+ invalid_trades = merge_df.drop_duplicates()
591
  invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
592
 
593
  all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
scripts/pull_data.py CHANGED
@@ -141,9 +141,8 @@ def only_new_weekly_analysis():
141
 
142
  add_current_answer("new_tools.parquet")
143
 
144
- # Run profitability analysis
145
  logging.info("Running profitability analysis")
146
-
147
  run_profitability_analysis(
148
  rpc=rpc,
149
  tools_filename="new_tools.parquet",
@@ -213,7 +212,8 @@ def weekly_analysis():
213
 
214
 
215
  if __name__ == "__main__":
216
- weekly_analysis()
 
217
  # rpc = RPC
218
  # updating_timestamps(rpc)
219
  # compute_tools_accuracy()
 
141
 
142
  add_current_answer("new_tools.parquet")
143
 
144
+ # # Run profitability analysis
145
  logging.info("Running profitability analysis")
 
146
  run_profitability_analysis(
147
  rpc=rpc,
148
  tools_filename="new_tools.parquet",
 
212
 
213
 
214
  if __name__ == "__main__":
215
+ only_new_weekly_analysis()
216
+ # weekly_analysis()
217
  # rpc = RPC
218
  # updating_timestamps(rpc)
219
  # compute_tools_accuracy()
scripts/utils.py CHANGED
@@ -428,3 +428,33 @@ def _to_content(q: str) -> dict[str, Any]:
428
  "extensions": {"headers": None},
429
  }
430
  return finalized_query
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  "extensions": {"headers": None},
429
  }
430
  return finalized_query
431
+
432
+
433
+ def read_parquet_files(tools_filename: str, trades_filename: str):
434
+ # Check if tools.parquet is in the same directory
435
+ try:
436
+ tools = pd.read_parquet(DATA_DIR / tools_filename)
437
+
438
+ # make sure creator_address is in the columns
439
+ assert "trader_address" in tools.columns, "trader_address column not found"
440
+
441
+ # lowercase and strip creator_address
442
+ tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
443
+
444
+ # drop duplicates
445
+ tools.drop_duplicates(inplace=True)
446
+
447
+ print(f"{tools_filename} loaded")
448
+ except FileNotFoundError:
449
+ print("tools.parquet not found. Please run tools.py first.")
450
+ return
451
+ try:
452
+ fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
453
+ fpmmTrades["trader_address"] = (
454
+ fpmmTrades["trader_address"].str.lower().str.strip()
455
+ )
456
+ except FileNotFoundError:
457
+ print("fpmmsTrades.parquet not found.")
458
+ return
459
+
460
+ return tools, fpmmTrades