rosacastillo's picture
cleaning and refactoring code
24d09a6
raw
history blame
4.09 kB
import logging
import os
import pickle
from web3 import Web3
import pandas as pd
from functools import partial
from datetime import datetime
from markets import (
etl as mkt_etl,
DEFAULT_FILENAME as MARKETS_FILENAME,
)
TOOLS_FILENAME = "tools_2024.parquet"
from tools import (
etl as tools_etl,
)
from pull_data import (
DATA_DIR,
parallelize_timestamp_conversion,
block_number_to_timestamp,
)
from profitability import run_profitability_analysis
from get_mech_info import get_mech_info_2024
from utils import get_question, current_answer
import gc
logging.basicConfig(level=logging.INFO)
def roi_analysis():
"""Run ROI analysis for the trades done in 2024."""
rpc = "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a"
web3 = Web3(Web3.HTTPProvider(rpc))
# Run markets ETL
logging.info("Running markets ETL")
mkt_etl(MARKETS_FILENAME)
logging.info("Markets ETL completed")
# Run tools ETL
logging.info("Running tools ETL")
# This etl is saving already the tools parquet file
tools_etl(
rpcs=[rpc],
mech_info=get_mech_info_2024(),
filename=TOOLS_FILENAME,
)
logging.info("Tools ETL completed")
# Run profitability analysis
if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
os.remove(DATA_DIR / "fpmmTrades.parquet")
logging.info("Running profitability analysis")
date = "2024-01-01"
datetime_jan_2024 = datetime.strptime(date, "%Y-%m-%d")
timestamp_jan_2024 = int(datetime_jan_2024.timestamp())
run_profitability_analysis(
rpc=rpc,
tools_filename=TOOLS_FILENAME,
trades_filename="fpmmTrades.parquet",
from_timestamp=timestamp_jan_2024,
)
logging.info("Profitability analysis completed")
# Get currentAnswer from FPMMS
fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
# Get the question from the tools
logging.info("Getting the question and current answer for the tools")
tools["title"] = tools["prompt_request"].apply(lambda x: get_question(x))
tools["currentAnswer"] = tools["title"].apply(lambda x: current_answer(x, fpmms))
tools["currentAnswer"] = tools["currentAnswer"].str.replace("yes", "Yes")
tools["currentAnswer"] = tools["currentAnswer"].str.replace("no", "No")
# Convert block number to timestamp
logging.info("Converting block number to timestamp")
t_map = pickle.load(open(DATA_DIR / "t_map.pkl", "rb"))
tools["request_time"] = tools["request_block"].map(t_map)
# Identify tools with missing request_time and fill them
missing_time_indices = tools[tools["request_time"].isna()].index
if not missing_time_indices.empty:
partial_block_number_to_timestamp = partial(
block_number_to_timestamp, web3=web3
)
missing_timestamps = parallelize_timestamp_conversion(
tools.loc[missing_time_indices], partial_block_number_to_timestamp
)
# Update the original DataFrame with the missing timestamps
for i, timestamp in zip(missing_time_indices, missing_timestamps):
tools.at[i, "request_time"] = timestamp
tools["request_month_year"] = pd.to_datetime(tools["request_time"]).dt.strftime(
"%Y-%m"
)
tools["request_month_year_week"] = (
pd.to_datetime(tools["request_time"]).dt.to_period("W").astype(str)
)
# Save the tools data after the updates on the content
tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
# Update t_map with new timestamps
new_timestamps = (
tools[["request_block", "request_time"]]
.dropna()
.set_index("request_block")
.to_dict()["request_time"]
)
t_map.update(new_timestamps)
with open(DATA_DIR / "t_map_2024.pkl", "wb") as f:
pickle.dump(t_map, f)
# clean and release all memory
del tools
del fpmms
del t_map
gc.collect()
logging.info("ROI analysis files generated and saved")
if __name__ == "__main__":
roi_analysis()