File size: 4,688 Bytes
0120490
 
 
 
24d09a6
278fab8
0120490
 
b3b7123
 
0120490
 
24d09a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278fab8
7aa7dd1
 
24d09a6
9382590
 
 
 
 
 
 
 
24d09a6
9382590
24d09a6
 
 
 
9382590
 
 
 
 
 
 
 
 
 
 
 
24d09a6
 
 
 
0120490
1df3214
0120490
278fab8
0120490
 
 
 
 
 
 
544f140
0120490
 
 
544f140
0120490
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import pandas as pd
import ipfshttpclient
from utils import INC_TOOLS
from typing import List
from utils import TMP_DIR, DATA_DIR

ACCURACY_FILENAME = "tools_accuracy.csv"
OLD_IPFS_SERVER = "/dns/registry.autonolas.tech/tcp/443/https"
IPFS_SERVER = "/dns/registry.gcp.autonolas.tech/tcp/443/https"


def update_tools_accuracy(
    tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
) -> pd.DataFrame:
    """To compute/update the latest accuracy information for the different mech tools"""

    # computation of the accuracy information
    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
    # filtering errors
    tools_non_error = tools_inc[tools_inc["error"] != 1]
    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
        {"no": "No", "yes": "Yes"}
    )
    tools_non_error = tools_non_error[
        tools_non_error["currentAnswer"].isin(["Yes", "No"])
    ]
    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
    tools_non_error["win"] = (
        tools_non_error["currentAnswer"] == tools_non_error["vote"]
    ).astype(int)
    tools_non_error.columns = tools_non_error.columns.astype(str)

    wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
    wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
    wins.reset_index(inplace=True)
    wins["total_requests"] = wins[0] + wins[1]
    wins.columns = wins.columns.astype(str)
    wins = wins[["tool", "tool_accuracy", "total_requests"]]

    no_timeline_info = False
    try:
        timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
        print("timeline dataset")
        print(timeline.head())
        acc_info = wins.merge(timeline, how="left", on="tool")
    except:
        print("NO REQUEST TIME INFORMATION AVAILABLE")
        no_timeline_info = True
        acc_info = wins

    if tools_acc is None:
        print("Creating accuracy file for the first time")
        return acc_info

    # update the old information
    print("Updating accuracy information")
    tools_to_update = list(acc_info["tool"].values)
    print("tools to update")
    print(tools_to_update)
    existing_tools = list(tools_acc["tool"].values)
    # dt.strftime("%Y-%m-%d %H:%M:%S")
    acc_info["min"] = acc_info["min"].dt.strftime("%Y-%m-%d %H:%M:%S")
    acc_info["max"] = acc_info["max"].dt.strftime("%Y-%m-%d %H:%M:%S")
    for tool in tools_to_update:
        new_accuracy = acc_info[acc_info["tool"] == tool]["tool_accuracy"].values[0]
        new_volume = acc_info[acc_info["tool"] == tool]["total_requests"].values[0]
        if no_timeline_info:
            new_min_timeline = None
            new_max_timeline = None
        else:
            new_min_timeline = acc_info[acc_info["tool"] == tool]["min"].values[0]
            new_max_timeline = acc_info[acc_info["tool"] == tool]["max"].values[0]
        if tool in existing_tools:

            tools_acc.loc[tools_acc["tool"] == tool, "tool_accuracy"] = new_accuracy
            tools_acc.loc[tools_acc["tool"] == tool, "total_requests"] = new_volume
            tools_acc.loc[tools_acc["tool"] == tool, "min"] = new_min_timeline
            tools_acc.loc[tools_acc["tool"] == tool, "max"] = new_max_timeline
        else:
            # new tool to add to the file
            # tool,tool_accuracy,total_requests,min,max
            new_row = {
                "tool": tool,
                "tool_accuracy": new_accuracy,
                "total_requests": new_volume,
                "min": new_min_timeline,
                "max": new_max_timeline,
            }
            tools_acc = pd.concat([tools_acc, pd.DataFrame(new_row)], ignore_index=True)

    print(tools_acc)
    return tools_acc


def compute_tools_accuracy():
    print("Computing accuracy of tools")
    print("Reading tools parquet file")
    tools = pd.read_parquet(TMP_DIR / "tools.parquet")
    # Computing tools accuracy information
    print("Computing tool accuracy information")
    # Check if the file exists
    acc_data = None
    if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
        acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
    acc_data = update_tools_accuracy(acc_data, tools, INC_TOOLS)

    # save acc_data into a CSV file
    print("Saving into a csv file")
    acc_data.to_csv(DATA_DIR / ACCURACY_FILENAME, index=False)
    print(acc_data.head())

    # save the data into IPFS
    client = ipfshttpclient.connect(IPFS_SERVER)
    result = client.add(DATA_DIR / ACCURACY_FILENAME)
    print(f"HASH of the tools accuracy file: {result['Hash']}")


if __name__ == "__main__":
    compute_tools_accuracy()