rosacastillo
commited on
Commit
·
ae57283
1
Parent(s):
3859cbd
updating data and new cleaning script
Browse files- data/all_trades_profitability.parquet +2 -2
- data/fpmmTrades.parquet +2 -2
- data/fpmms.parquet +2 -2
- data/invalid_trades.parquet +2 -2
- data/new_fpmmTrades.parquet +3 -0
- data/new_tools.parquet +3 -0
- data/outliers.parquet +1 -1
- data/summary_profitability.parquet +2 -2
- data/t_map.pkl +2 -2
- data/tools.parquet +2 -2
- data/tools_accuracy.csv +2 -2
- scripts/cleaning_old_info.py +66 -0
- scripts/get_mech_info.py +0 -2
- scripts/profitability.py +7 -2
- scripts/pull_data.py +1 -1
- scripts/staking.py +5 -2
- scripts/tools.py +3 -2
data/all_trades_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:804722496702da46e6034175b54c73778fd4c5b7794d29967dccb2f2f6432603
|
3 |
+
size 3290989
|
data/fpmmTrades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59bd9c58e15de9dcb4ae76cd8adca7750b460abfa2bdf79ee5042d3e3b5c396e
|
3 |
+
size 13934569
|
data/fpmms.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16b0570a9c07e0ef5d137ede96584fcfe1645a784a7380a83b9bdfa5829ad3e2
|
3 |
+
size 515347
|
data/invalid_trades.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18f4b47e3c764b8c7f157b4b408d0c97e3436f58d86eb39edecf2a7cf2748a21
|
3 |
+
size 84033
|
data/new_fpmmTrades.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84442b8ab800d01ec66e0c78efebfc31a5d954d3fdddfaf5fab41a75030a3967
|
3 |
+
size 3267040
|
data/new_tools.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67da51ef342b56056a5bfb49f78ecab7354731e2cd88d16ccbffeaa141e175ec
|
3 |
+
size 64443733
|
data/outliers.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18274
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fa72b86e20493b5a02dff2cf9173d394546b5eaba1de21469bb66593f7939e1
|
3 |
size 18274
|
data/summary_profitability.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:522003ca4b76df815bf662ebc92478bc103652ac9f82dc82718578c26c650509
|
3 |
+
size 87497
|
data/t_map.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69d6fcd0360c5bbd646fa748b3f5a1e4bcccae358f32c85aa96509cdb6319c76
|
3 |
+
size 24153722
|
data/tools.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa6d6407da787ae9d2ed80233d939f57feae82cd66a8193937b861c601f24828
|
3 |
+
size 406224765
|
data/tools_accuracy.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0113d5cbc3c5eb981b5f1c5a7776f616fc52f44b15b5f96880a16989fa07d16
|
3 |
+
size 1240
|
scripts/cleaning_old_info.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from profitability import DATA_DIR, summary_analyse
|
6 |
+
from staking import label_trades_by_staking
|
7 |
+
|
8 |
+
|
9 |
+
def clean_old_data_from_parquet_files(cutoff_date: str):
|
10 |
+
# Convert the string to datetime64[ns, UTC]
|
11 |
+
min_date_utc = pd.to_datetime(cutoff_date, format="%Y-%m-%d", utc=True)
|
12 |
+
|
13 |
+
# clean tools.parquet
|
14 |
+
try:
|
15 |
+
tools = pd.read_parquet(DATA_DIR / "tools.parquet")
|
16 |
+
|
17 |
+
# make sure creator_address is in the columns
|
18 |
+
assert "trader_address" in tools.columns, "trader_address column not found"
|
19 |
+
|
20 |
+
# lowercase and strip creator_address
|
21 |
+
tools["trader_address"] = tools["trader_address"].str.lower().str.strip()
|
22 |
+
|
23 |
+
tools["request_time"] = pd.to_datetime(tools["request_time"], utc=True)
|
24 |
+
|
25 |
+
print(f"length before filtering {len(tools)}")
|
26 |
+
tools = tools.loc[tools["request_time"] > min_date_utc]
|
27 |
+
print(f"length after filtering {len(tools)}")
|
28 |
+
tools.to_parquet(DATA_DIR / "tools.parquet", index=False)
|
29 |
+
|
30 |
+
except Exception as e:
|
31 |
+
print(f"Error cleaning tools file {e}")
|
32 |
+
|
33 |
+
# clean all_trades_profitability.parquet
|
34 |
+
try:
|
35 |
+
all_trades = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
36 |
+
|
37 |
+
all_trades["creation_timestamp"] = pd.to_datetime(
|
38 |
+
all_trades["creation_timestamp"], utc=True
|
39 |
+
)
|
40 |
+
|
41 |
+
print(f"length before filtering {len(all_trades)}")
|
42 |
+
all_trades = all_trades.loc[all_trades["creation_timestamp"] > min_date_utc]
|
43 |
+
print(f"length after filtering {len(all_trades)}")
|
44 |
+
|
45 |
+
except Exception as e:
|
46 |
+
print(f"Error cleaning all trades profitability file {e}")
|
47 |
+
|
48 |
+
# generate again summary_profitability.parquet
|
49 |
+
try:
|
50 |
+
print("Summarising trades...")
|
51 |
+
summary_df = summary_analyse(all_trades)
|
52 |
+
|
53 |
+
# add staking labels
|
54 |
+
label_trades_by_staking(trades_df=all_trades, update=False)
|
55 |
+
|
56 |
+
# save to parquet
|
57 |
+
all_trades.to_parquet(
|
58 |
+
DATA_DIR / "all_trades_profitability.parquet", index=False
|
59 |
+
)
|
60 |
+
summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
|
61 |
+
except Exception as e:
|
62 |
+
print(f"Error generating summary and saving all trades profitability file {e}")
|
63 |
+
|
64 |
+
|
65 |
+
if __name__ == "__main__":
|
66 |
+
clean_old_data_from_parquet_files("2024-09-15")
|
scripts/get_mech_info.py
CHANGED
@@ -3,7 +3,6 @@ from typing import Any
|
|
3 |
from datetime import datetime, timedelta, UTC
|
4 |
from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
|
5 |
import requests
|
6 |
-
import json
|
7 |
import pandas as pd
|
8 |
import numpy as np
|
9 |
from mech_request_utils import (
|
@@ -13,7 +12,6 @@ from mech_request_utils import (
|
|
13 |
fix_duplicate_requestIds,
|
14 |
merge_requests_delivers,
|
15 |
get_ipfs_data,
|
16 |
-
only_delivers_loop,
|
17 |
merge_json_files,
|
18 |
)
|
19 |
|
|
|
3 |
from datetime import datetime, timedelta, UTC
|
4 |
from utils import SUBGRAPH_API_KEY, measure_execution_time, DATA_DIR
|
5 |
import requests
|
|
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
8 |
from mech_request_utils import (
|
|
|
12 |
fix_duplicate_requestIds,
|
13 |
merge_requests_delivers,
|
14 |
get_ipfs_data,
|
|
|
15 |
merge_json_files,
|
16 |
)
|
17 |
|
scripts/profitability.py
CHANGED
@@ -40,7 +40,6 @@ from utils import (
|
|
40 |
wei_to_unit,
|
41 |
convert_hex_to_int,
|
42 |
_to_content,
|
43 |
-
read_parquet_files,
|
44 |
JSON_DATA_DIR,
|
45 |
)
|
46 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
@@ -581,13 +580,19 @@ def run_profitability_analysis(
|
|
581 |
update_tools_parquet(tools_filename)
|
582 |
all_trades_df = update_all_trades_parquet(all_trades_df)
|
583 |
|
|
|
|
|
584 |
# filter invalid markets. Condition: "is_invalid" is True
|
585 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
586 |
if merge:
|
587 |
try:
|
588 |
old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
|
589 |
-
merge_df = pd.concat(
|
|
|
|
|
590 |
invalid_trades = merge_df.drop_duplicates()
|
|
|
|
|
591 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
592 |
|
593 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
|
|
40 |
wei_to_unit,
|
41 |
convert_hex_to_int,
|
42 |
_to_content,
|
|
|
43 |
JSON_DATA_DIR,
|
44 |
)
|
45 |
from queries import omen_xdai_trades_query, conditional_tokens_gc_user_query
|
|
|
580 |
update_tools_parquet(tools_filename)
|
581 |
all_trades_df = update_all_trades_parquet(all_trades_df)
|
582 |
|
583 |
+
# debugging purposes
|
584 |
+
all_trades_df.to_parquet(JSON_DATA_DIR / "all_trades_df.parquets")
|
585 |
# filter invalid markets. Condition: "is_invalid" is True
|
586 |
invalid_trades = all_trades_df.loc[all_trades_df["is_invalid"] == True]
|
587 |
if merge:
|
588 |
try:
|
589 |
old_invalid_trades = pd.read_parquet(DATA_DIR / "invalid_trades.parquet")
|
590 |
+
merge_df = pd.concat(
|
591 |
+
[old_invalid_trades, invalid_trades], ignore_index=True
|
592 |
+
)
|
593 |
invalid_trades = merge_df.drop_duplicates()
|
594 |
+
except Exception as e:
|
595 |
+
print(f"Error updating the invalid trades parquet {e}")
|
596 |
invalid_trades.to_parquet(DATA_DIR / "invalid_trades.parquet", index=False)
|
597 |
|
598 |
all_trades_df = all_trades_df.loc[all_trades_df["is_invalid"] == False]
|
scripts/pull_data.py
CHANGED
@@ -124,7 +124,7 @@ def only_new_weekly_analysis():
|
|
124 |
rpc = RPC
|
125 |
# Run markets ETL
|
126 |
logging.info("Running markets ETL")
|
127 |
-
mkt_etl(MARKETS_FILENAME)
|
128 |
logging.info("Markets ETL completed")
|
129 |
|
130 |
# New tools ETL
|
|
|
124 |
rpc = RPC
|
125 |
# Run markets ETL
|
126 |
logging.info("Running markets ETL")
|
127 |
+
# mkt_etl(MARKETS_FILENAME)
|
128 |
logging.info("Markets ETL completed")
|
129 |
|
130 |
# New tools ETL
|
scripts/staking.py
CHANGED
@@ -174,13 +174,16 @@ def get_trader_address_staking(trader_address: str, service_map: dict) -> str:
|
|
174 |
return check_owner_staking_contract(owner_address=owner)
|
175 |
|
176 |
|
177 |
-
def label_trades_by_staking(
|
|
|
|
|
178 |
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
179 |
service_map = pickle.load(f)
|
180 |
# get the last service id
|
181 |
keys = service_map.keys()
|
182 |
last_key = max(keys)
|
183 |
-
|
|
|
184 |
all_traders = trades_df.trader_address.unique()
|
185 |
trades_df["staking"] = ""
|
186 |
for trader in tqdm(all_traders, desc="Labeling traders by staking", unit="trader"):
|
|
|
174 |
return check_owner_staking_contract(owner_address=owner)
|
175 |
|
176 |
|
177 |
+
def label_trades_by_staking(
|
178 |
+
trades_df: pd.DataFrame, update: bool = True
|
179 |
+
) -> pd.DataFrame:
|
180 |
with open(DATA_DIR / "service_map.pkl", "rb") as f:
|
181 |
service_map = pickle.load(f)
|
182 |
# get the last service id
|
183 |
keys = service_map.keys()
|
184 |
last_key = max(keys)
|
185 |
+
if update:
|
186 |
+
update_service_map(start=last_key)
|
187 |
all_traders = trades_df.trader_address.unique()
|
188 |
trades_df["staking"] = ""
|
189 |
for trader in tqdm(all_traders, desc="Labeling traders by staking", unit="trader"):
|
scripts/tools.py
CHANGED
@@ -98,7 +98,8 @@ HTTP_TIMEOUT = 10
|
|
98 |
N_IPFS_RETRIES = 1
|
99 |
N_RPC_RETRIES = 100
|
100 |
RPC_POLL_INTERVAL = 0.05
|
101 |
-
IPFS_POLL_INTERVAL = 0.05
|
|
|
102 |
IRRELEVANT_TOOLS = [
|
103 |
"openai-text-davinci-002",
|
104 |
"openai-text-davinci-003",
|
@@ -585,7 +586,7 @@ def parse_store_json_events_parallel(json_events: Dict[str, Any], output_filenam
|
|
585 |
contents.append(current_mech_contents)
|
586 |
|
587 |
tools = pd.concat(contents, ignore_index=True)
|
588 |
-
print(f"Adding market creators info. Length of the tools file = {tools}")
|
589 |
tools = add_market_creator(tools)
|
590 |
print(
|
591 |
f"Length of the tools dataframe after adding market creators info= {len(tools)}"
|
|
|
98 |
N_IPFS_RETRIES = 1
|
99 |
N_RPC_RETRIES = 100
|
100 |
RPC_POLL_INTERVAL = 0.05
|
101 |
+
# IPFS_POLL_INTERVAL = 0.05 # low speed
|
102 |
+
IPFS_POLL_INTERVAL = 0.2 # high speed
|
103 |
IRRELEVANT_TOOLS = [
|
104 |
"openai-text-davinci-002",
|
105 |
"openai-text-davinci-003",
|
|
|
586 |
contents.append(current_mech_contents)
|
587 |
|
588 |
tools = pd.concat(contents, ignore_index=True)
|
589 |
+
print(f"Adding market creators info. Length of the tools file = {len(tools)}")
|
590 |
tools = add_market_creator(tools)
|
591 |
print(
|
592 |
f"Length of the tools dataframe after adding market creators info= {len(tools)}"
|