olas-prediction-live-dashboard / scripts /update_tools_accuracy.py
rosacastillo's picture
updating ipfs server with gcp one?
b3b7123
raw
history blame
4.69 kB
import os
import pandas as pd
import ipfshttpclient
from utils import INC_TOOLS
from typing import List
from utils import TMP_DIR, DATA_DIR
ACCURACY_FILENAME = "tools_accuracy.csv"
OLD_IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
IPFS_SERVER = "/dns/registry.gcp.autonolas.tech/tcp/443/https"
def update_tools_accuracy(
tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
"""To compute/update the latest accuracy information for the different mech tools"""
# computation of the accuracy information
tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
# filtering errors
tools_non_error = tools_inc[tools_inc["error"] != 1]
tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
{"no": "No", "yes": "Yes"}
)
tools_non_error = tools_non_error[
tools_non_error["currentAnswer"].isin(["Yes", "No"])
]
tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
tools_non_error["win"] = (
tools_non_error["currentAnswer"] == tools_non_error["vote"]
).astype(int)
tools_non_error.columns = tools_non_error.columns.astype(str)
wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
wins.reset_index(inplace=True)
wins["total_requests"] = wins[0] + wins[1]
wins.columns = wins.columns.astype(str)
wins = wins[["tool", "tool_accuracy", "total_requests"]]
no_timeline_info = False
try:
timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
print("timeline dataset")
print(timeline.head())
acc_info = wins.merge(timeline, how="left", on="tool")
except:
print("NO REQUEST TIME INFORMATION AVAILABLE")
no_timeline_info = True
acc_info = wins
if tools_acc is None:
print("Creating accuracy file for the first time")
return acc_info
# update the old information
print("Updating accuracy information")
tools_to_update = list(acc_info["tool"].values)
print("tools to update")
print(tools_to_update)
existing_tools = list(tools_acc["tool"].values)
# dt.strftime("%Y-%m-%d %H:%M:%S")
acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
for tool in tools_to_update:
new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
if no_timeline_info:
new_min_timeline = None
new_max_timeline = None
else:
new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
if tool in existing_tools:
tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
else:
# new tool to add to the file
# tool,tool_accuracy,total_requests,min,max
new_row = {
"tool": tool,
"tool_accuracy": new_accuracy,
"total_requests": new_volume,
"min": new_min_timeline,
"max": new_max_timeline,
}
tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)
print(tools_acc)
return tools_acc
def compute_tools_accuracy():
print("Computing accuracy of tools")
print("Reading tools parquet file")
tools = pd.read_parquet(TMP_DIR / "tools.parquet")
# Computing tools accuracy information
print("Computing tool accuracy information")
# Check if the file exists
acc_data = None
if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)
# save acc_data into a CSV file
print("Saving into a csv file")
acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
print(acc_data.head())
# save the data into IPFS
client = ipfshttpclient.connect(IPFS_SERVER)
result = client.add(DATA_DIR / ACCURACY_FILENAME)
print(f"HASH of the tools accuracy file: {result['Hash']}")
if __name__ == "__main__":
compute_tools_accuracy()