rosacastillo
commited on
Commit
·
3859cbd
1
Parent(s):
9382590
new weekly data using new pipeline
Browse files- data/all_trades_profitability.parquet +2 -2
- data/fpmmTrades.parquet +2 -2
- data/fpmms.parquet +2 -2
- data/invalid_trades.parquet +2 -2
- data/summary_profitability.parquet +2 -2
- data/t_map.pkl +2 -2
- data/tools.parquet +2 -2
- data/tools_accuracy.csv +2 -2
- scripts/get_mech_info.py +54 -27
- scripts/profitability.py +18 -4
- scripts/pull_data.py +3 -3
- scripts/utils.py +30 -0
data/all_trades_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df952a693ba00cc0b11bca8ff4c6805415f2d006b3cd47242e43e7cdc7d5fe1
|
3 |
+
size 3266876
|
data/fpmmTrades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:390f96495582e69ae82225a61e6473c1fe6536081b326a6bd11617be45ce672a
|
3 |
+
size 10816943
|
data/fpmms.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b5dcd19c7922e3f7168a139b0d63c335c921343faa15852b6ae04888f7e006a
|
3 |
+
size 504817
|
data/invalid_trades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30e0fa52d0c605961b5a12bec47bc3b0288b02b814c61cc7f8a33ad793f8bd30
|
3 |
+
size 84013
|
data/summary_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a55a6c4c7ef5da8db27b61c268eccbd1d426c456a0d17efa4b22b7c69ed1454d
|
3 |
+
size 78788
|
data/t_map.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b67cf178943b82b5286b7c2adb6329e1e23fffce807ebf299684746813f55de
|
3 |
+
size 22992649
|
data/tools.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e4ace8d172836c379ee23bde678f19d9eeec28e7bd31bf9e95dc914ac5c9bc5
|
3 |
+
size 407088092
|
data/tools_accuracy.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7a3622338d1eb2f23824031733ecdd77ae77eff7cb2b1c879aba05b0966d2cc
|
3 |
+
size 1133
|
scripts/get_mech_info.py
CHANGED
@@ -120,7 +120,7 @@ def update_json_files():
|
|
120 |
merge_json_files("tools_info.json", "new_tools_info.json")
|
121 |
|
122 |
|
123 |
-
def
|
124 |
# Read old trades parquet file
|
125 |
try:
|
126 |
old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
@@ -128,58 +128,78 @@ def update_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
|
|
128 |
print(f"Error reading old trades parquet file {e}")
|
129 |
return None
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
# merge two dataframes
|
132 |
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
# Check for duplicates
|
135 |
-
duplicates
|
136 |
-
|
137 |
-
# Print the duplicates
|
138 |
-
print(duplicates)
|
139 |
-
|
140 |
-
# Get the number of duplicates
|
141 |
-
num_duplicates = duplicates.sum()
|
142 |
-
print("Number of duplicates:", num_duplicates)
|
143 |
-
|
144 |
-
# Get the rows with duplicates
|
145 |
-
duplicate_rows = merge_df[duplicates]
|
146 |
-
print("Duplicate rows:\n", duplicate_rows)
|
147 |
|
148 |
# Remove duplicates
|
149 |
-
|
|
|
|
|
|
|
|
|
150 |
|
151 |
# save the parquet file
|
152 |
merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
return merge_df
|
155 |
|
156 |
|
157 |
-
def update_tools_parquet(
|
158 |
try:
|
159 |
old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
|
160 |
except Exception as e:
|
161 |
print(f"Error reading old tools parquet file {e}")
|
162 |
return None
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
# merge two dataframes
|
165 |
merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
|
166 |
|
167 |
# Check for duplicates
|
168 |
-
duplicates
|
169 |
-
|
170 |
-
# Print the duplicates
|
171 |
-
print(duplicates)
|
172 |
-
|
173 |
-
# Get the number of duplicates
|
174 |
-
num_duplicates = duplicates.sum()
|
175 |
-
print("Number of duplicates:", num_duplicates)
|
176 |
-
|
177 |
-
# Get the rows with duplicates
|
178 |
-
duplicate_rows = merge_df[duplicates]
|
179 |
-
print("Duplicate rows:\n", duplicate_rows)
|
180 |
|
181 |
# Remove duplicates
|
182 |
merge_df.drop_duplicates(inplace=True)
|
|
|
183 |
|
184 |
# save the parquet file
|
185 |
merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
|
@@ -290,6 +310,13 @@ def get_mech_events_since_last_run():
|
|
290 |
int(latest_timestamp.timestamp()),
|
291 |
int((latest_timestamp + five_seconds).timestamp()),
|
292 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
last_block_number = get_last_block_number()
|
294 |
|
295 |
# mech requests
|
|
|
120 |
merge_json_files("tools_info.json", "new_tools_info.json")
|
121 |
|
122 |
|
123 |
+
def update_fpmmTrades_parquet(trades_filename: str) -> pd.DataFrame:
|
124 |
# Read old trades parquet file
|
125 |
try:
|
126 |
old_trades_df = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
|
|
128 |
print(f"Error reading old trades parquet file {e}")
|
129 |
return None
|
130 |
|
131 |
+
try:
|
132 |
+
new_trades_df = pd.read_parquet(DATA_DIR / trades_filename)
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Error reading new trades parquet file {e}")
|
135 |
+
return None
|
136 |
+
|
137 |
# merge two dataframes
|
138 |
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
139 |
+
# avoid numpy objects
|
140 |
+
merge_df["fpmm.arbitrationOccurred"] = merge_df["fpmm.arbitrationOccurred"].astype(
|
141 |
+
bool
|
142 |
+
)
|
143 |
+
merge_df["fpmm.isPendingArbitration"] = merge_df[
|
144 |
+
"fpmm.isPendingArbitration"
|
145 |
+
].astype(bool)
|
146 |
|
147 |
# Check for duplicates
|
148 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
# Remove duplicates
|
151 |
+
# fpmm.outcomes is a numpy array
|
152 |
+
merge_df = merge_df.drop_duplicates(
|
153 |
+
subset=[col for col in merge_df.columns if col != "fpmm.outcomes"]
|
154 |
+
)
|
155 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
156 |
|
157 |
# save the parquet file
|
158 |
merge_df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
159 |
|
160 |
+
return
|
161 |
+
|
162 |
+
|
163 |
+
def update_all_trades_parquet(new_trades_df: pd.DataFrame) -> pd.DataFrame:
|
164 |
+
# Read old all_trades parquet file
|
165 |
+
try:
|
166 |
+
old_trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
167 |
+
except Exception as e:
|
168 |
+
print(f"Error reading old trades parquet file {e}")
|
169 |
+
return None
|
170 |
+
# merge two dataframes
|
171 |
+
merge_df = pd.concat([old_trades_df, new_trades_df], ignore_index=True)
|
172 |
+
|
173 |
+
# Check for duplicates
|
174 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
175 |
+
|
176 |
+
# Remove duplicates
|
177 |
+
merge_df.drop_duplicates(inplace=True)
|
178 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
179 |
return merge_df
|
180 |
|
181 |
|
182 |
+
def update_tools_parquet(new_tools_filename: pd.DataFrame):
|
183 |
try:
|
184 |
old_tools_df = pd.read_parquet(DATA_DIR / "tools.parquet")
|
185 |
except Exception as e:
|
186 |
print(f"Error reading old tools parquet file {e}")
|
187 |
return None
|
188 |
+
try:
|
189 |
+
new_tools_df = pd.read_parquet(DATA_DIR / new_tools_filename)
|
190 |
+
except Exception as e:
|
191 |
+
print(f"Error reading new trades parquet file {e}")
|
192 |
+
return None
|
193 |
|
194 |
# merge two dataframes
|
195 |
merge_df = pd.concat([old_tools_df, new_tools_df], ignore_index=True)
|
196 |
|
197 |
# Check for duplicates
|
198 |
+
print(f"Initial length before removing duplicates= {len(merge_df)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
# Remove duplicates
|
201 |
merge_df.drop_duplicates(inplace=True)
|
202 |
+
print(f"Final length before removing duplicates= {len(merge_df)}")
|
203 |
|
204 |
# save the parquet file
|
205 |
merge_df.to_parquet(DATA_DIR / "tools.parquet", index=False)
|
|
|
310 |
int(latest_timestamp.timestamp()),
|
311 |
int((latest_timestamp + five_seconds).timestamp()),
|
312 |
)
|
313 |
+
# expecting only one block
|
314 |
+
last_run_block_number = last_run_block_number.get("number", "")
|
315 |
+
if last_run_block_number.isdigit():
|
316 |
+
last_run_block_number = int(last_run_block_number)
|
317 |
+
|
318 |
+
if last_run_block_number == "":
|
319 |
+
raise ValueError("Could not find a valid block number for last collected data")
|
320 |
last_block_number = get_last_block_number()
|
321 |
|
322 |
# mech requests
|
scripts/profitability.py
CHANGED
@@ -31,10 +31,18 @@ import os
|
|
31 |
from pathlib import Path
|
32 |
from get_mech_info import (
|
33 |
DATETIME_60_DAYS_AGO,
|
34 |
-
|
35 |
update_tools_parquet,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
-
from utils import SUBGRAPH_API_KEY, wei_to_unit, convert_hex_to_int, _to_content
|
38 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
39 |
from staking import label_trades_by_staking
|
40 |
|
@@ -567,13 +575,19 @@ def run_profitability_analysis(
|
|
567 |
print("Analysing trades...")
|
568 |
all_trades_df = analyse_all_traders(fpmmTrades, tools)
|
569 |
|
570 |
-
#
|
571 |
if merge:
|
572 |
-
|
573 |
update_tools_parquet(tools_filename)
|
|
|
574 |
|
575 |
# filter invalid markets. Condition: "is_invalid" is True
|
576 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
|
|
|
|
|
|
|
|
|
|
577 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
578 |
|
579 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
|
|
31 |
from pathlib import Path
|
32 |
from get_mech_info import (
|
33 |
DATETIME_60_DAYS_AGO,
|
34 |
+
update_fpmmTrades_parquet,
|
35 |
update_tools_parquet,
|
36 |
+
update_all_trades_parquet,
|
37 |
+
)
|
38 |
+
from utils import (
|
39 |
+
SUBGRAPH_API_KEY,
|
40 |
+
wei_to_unit,
|
41 |
+
convert_hex_to_int,
|
42 |
+
_to_content,
|
43 |
+
read_parquet_files,
|
44 |
+
JSON_DATA_DIR,
|
45 |
)
|
|
|
46 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
47 |
from staking import label_trades_by_staking
|
48 |
|
|
|
575 |
print("Analysing trades...")
|
576 |
all_trades_df = analyse_all_traders(fpmmTrades, tools)
|
577 |
|
578 |
+
# merge previous files if requested
|
579 |
if merge:
|
580 |
+
update_fpmmTrades_parquet(trades_filename)
|
581 |
update_tools_parquet(tools_filename)
|
582 |
+
all_trades_df = update_all_trades_parquet(all_trades_df)
|
583 |
|
584 |
# filter invalid markets. Condition: "is_invalid" is True
|
585 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
586 |
+
if merge:
|
587 |
+
try:
|
588 |
+
old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
|
589 |
+
merge_df = pd.concat([old_invalid_trades, invalid_trades], ignore_index=True)
|
590 |
+
invalid_trades = merge_df.drop_duplicates()
|
591 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
592 |
|
593 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
scripts/pull_data.py
CHANGED
@@ -141,9 +141,8 @@ def only_new_weekly_analysis():
|
|
141 |
|
142 |
add_current_answer("new_tools.parquet")
|
143 |
|
144 |
-
# Run profitability analysis
|
145 |
logging.info("Running profitability analysis")
|
146 |
-
|
147 |
run_profitability_analysis(
|
148 |
rpc=rpc,
|
149 |
tools_filename="new_tools.parquet",
|
@@ -213,7 +212,8 @@ def weekly_analysis():
|
|
213 |
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
-
|
|
|
217 |
# rpc = RPC
|
218 |
# updating_timestamps(rpc)
|
219 |
# compute_tools_accuracy()
|
|
|
141 |
|
142 |
add_current_answer("new_tools.parquet")
|
143 |
|
144 |
+
# # Run profitability analysis
|
145 |
logging.info("Running profitability analysis")
|
|
|
146 |
run_profitability_analysis(
|
147 |
rpc=rpc,
|
148 |
tools_filename="new_tools.parquet",
|
|
|
212 |
|
213 |
|
214 |
if __name__ == "__main__":
|
215 |
+
only_new_weekly_analysis()
|
216 |
+
# weekly_analysis()
|
217 |
# rpc = RPC
|
218 |
# updating_timestamps(rpc)
|
219 |
# compute_tools_accuracy()
|
scripts/utils.py
CHANGED
@@ -428,3 +428,33 @@ def _to_content(q: str) -> dict[str, Any]:
|
|
428 |
"extensions": {"headers": None},
|
429 |
}
|
430 |
return finalized_query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
"extensions": {"headers": None},
|
429 |
}
|
430 |
return finalized_query
|
431 |
+
|
432 |
+
|
433 |
+
def read_parquet_files(tools_filename: str, trades_filename: str):
|
434 |
+
# Check if tools.parquet is in the same directory
|
435 |
+
try:
|
436 |
+
tools = pd.read_parquet(DATA_DIR / tools_filename)
|
437 |
+
|
438 |
+
# make sure creator_address is in the columns
|
439 |
+
assert "trader_address" in tools.columns, "trader_address column not found"
|
440 |
+
|
441 |
+
# lowercase and strip creator_address
|
442 |
+
tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
|
443 |
+
|
444 |
+
# drop duplicates
|
445 |
+
tools.drop_duplicates(inplace=True)
|
446 |
+
|
447 |
+
print(f"{tools_filename} loaded")
|
448 |
+
except FileNotFoundError:
|
449 |
+
print("tools.parquet not found. Please run tools.py first.")
|
450 |
+
return
|
451 |
+
try:
|
452 |
+
fpmmTrades = pd.read_parquet(DATA_DIR / trades_filename)
|
453 |
+
fpmmTrades["trader_address"] = (
|
454 |
+
fpmmTrades["trader_address"].str.lower().str.strip()
|
455 |
+
)
|
456 |
+
except FileNotFoundError:
|
457 |
+
print("fpmmsTrades.parquet not found.")
|
458 |
+
return
|
459 |
+
|
460 |
+
return tools, fpmmTrades
|