Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

cyberosa commited on Jun 14, 2024

Commit

b3b3ee6

1 Parent(s): 3cfd212

adding tools accuracy info

Browse files

Files changed (4) hide show

notebooks/analysis.ipynb +458 -4
scripts/profitability.py +2 -4
scripts/pull_data.py +13 -0
scripts/tools.py +36 -14

notebooks/analysis.ipynb CHANGED Viewed

@@ -16,9 +16,463 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fpmms = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard/data/fpmms.parquet')\n",
-    "tools = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard/data/tools.parquet')\n",
-    "trades = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard/data/all_trades_profitability.parquet')"
    ]
   },
   {
@@ -2048,7 +2502,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
   }
  },
  "nbformat": 4,

    "metadata": {},
    "outputs": [],
    "source": [
+    "fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
+    "tools = pd.read_parquet('../data/tools.parquet')\n",
+    "trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INC_TOOLS = [\n",
+    "    \"prediction-online\",\n",
+    "    \"prediction-offline\",\n",
+    "    \"claude-prediction-online\",\n",
+    "    \"claude-prediction-offline\",\n",
+    "    \"prediction-offline-sme\",\n",
+    "    \"prediction-online-sme\",\n",
+    "    \"prediction-request-rag\",\n",
+    "    \"prediction-request-reasoning\",\n",
+    "    \"prediction-url-cot-claude\",\n",
+    "    \"prediction-request-rag-claude\",\n",
+    "    \"prediction-request-reasoning-claude\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>win</th>\n",
+       "      <th>tool</th>\n",
+       "      <th>tool_accuracy</th>\n",
+       "      <th>total_requests</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>claude-prediction-offline</td>\n",
+       "      <td>66.308244</td>\n",
+       "      <td>279</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>claude-prediction-online</td>\n",
+       "      <td>58.914027</td>\n",
+       "      <td>1105</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>prediction-offline</td>\n",
+       "      <td>67.717915</td>\n",
+       "      <td>2283</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>prediction-offline-sme</td>\n",
+       "      <td>55.555556</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>prediction-online</td>\n",
+       "      <td>65.459066</td>\n",
+       "      <td>5631</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>prediction-online-sme</td>\n",
+       "      <td>67.417656</td>\n",
+       "      <td>8167</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>prediction-request-rag</td>\n",
+       "      <td>64.217072</td>\n",
+       "      <td>1769</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>prediction-request-rag-claude</td>\n",
+       "      <td>69.554566</td>\n",
+       "      <td>4490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>prediction-request-reasoning</td>\n",
+       "      <td>68.813594</td>\n",
+       "      <td>9828</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>prediction-request-reasoning-claude</td>\n",
+       "      <td>68.910256</td>\n",
+       "      <td>2184</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>prediction-url-cot-claude</td>\n",
+       "      <td>64.584980</td>\n",
+       "      <td>1265</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "win                                 tool  tool_accuracy  total_requests\n",
+       "0              claude-prediction-offline      66.308244             279\n",
+       "1               claude-prediction-online      58.914027            1105\n",
+       "2                     prediction-offline      67.717915            2283\n",
+       "3                 prediction-offline-sme      55.555556              18\n",
+       "4                      prediction-online      65.459066            5631\n",
+       "5                  prediction-online-sme      67.417656            8167\n",
+       "6                 prediction-request-rag      64.217072            1769\n",
+       "7          prediction-request-rag-claude      69.554566            4490\n",
+       "8           prediction-request-reasoning      68.813594            9828\n",
+       "9    prediction-request-reasoning-claude      68.910256            2184\n",
+       "10             prediction-url-cot-claude      64.584980            1265"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
+    "# filtering errors\n",
+    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
+    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
+    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
+    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
+    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
+    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
+    "wins = tools_non_error.groupby(['tool', 'win']).size().unstack().fillna(0)\n",
+    "wins['tool_accuracy'] = (wins[1] / (wins[0] + wins[1])) * 100\n",
+    "wins.reset_index(inplace=True)\n",
+    "wins['total_requests'] = wins[0] + wins[1]\n",
+    "wins.columns = wins.columns.astype(str)\n",
+    "wins = wins[[\"tool\", \"tool_accuracy\", \"total_requests\"]]\n",
+    "wins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>min</th>\n",
+       "      <th>max</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>tool</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>claude-prediction-offline</th>\n",
+       "      <td>2024-04-23 13:09:30</td>\n",
+       "      <td>2024-06-10 00:31:30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>claude-prediction-online</th>\n",
+       "      <td>2024-04-12 12:24:20</td>\n",
+       "      <td>2024-06-09 21:41:20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-offline</th>\n",
+       "      <td>2024-04-12 12:20:10</td>\n",
+       "      <td>2024-06-08 23:45:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-offline-sme</th>\n",
+       "      <td>2024-04-16 07:58:45</td>\n",
+       "      <td>2024-04-29 20:45:15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-online</th>\n",
+       "      <td>2024-04-16 05:52:40</td>\n",
+       "      <td>2024-06-09 21:47:20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-online-sme</th>\n",
+       "      <td>2024-04-12 11:51:30</td>\n",
+       "      <td>2024-06-10 00:06:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-request-rag</th>\n",
+       "      <td>2024-04-12 11:39:40</td>\n",
+       "      <td>2024-06-09 21:17:45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-request-rag-claude</th>\n",
+       "      <td>2024-04-12 11:14:30</td>\n",
+       "      <td>2024-06-07 11:42:30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-request-reasoning</th>\n",
+       "      <td>2024-04-12 11:57:05</td>\n",
+       "      <td>2024-06-09 21:50:45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-request-reasoning-claude</th>\n",
+       "      <td>2024-04-12 11:53:55</td>\n",
+       "      <td>2024-06-05 05:00:10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>prediction-url-cot-claude</th>\n",
+       "      <td>2024-04-12 11:37:15</td>\n",
+       "      <td>2024-06-05 05:21:10</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                     min                  max\n",
+       "tool                                                                         \n",
+       "claude-prediction-offline            2024-04-23 13:09:30  2024-06-10 00:31:30\n",
+       "claude-prediction-online             2024-04-12 12:24:20  2024-06-09 21:41:20\n",
+       "prediction-offline                   2024-04-12 12:20:10  2024-06-08 23:45:00\n",
+       "prediction-offline-sme               2024-04-16 07:58:45  2024-04-29 20:45:15\n",
+       "prediction-online                    2024-04-16 05:52:40  2024-06-09 21:47:20\n",
+       "prediction-online-sme                2024-04-12 11:51:30  2024-06-10 00:06:00\n",
+       "prediction-request-rag               2024-04-12 11:39:40  2024-06-09 21:17:45\n",
+       "prediction-request-rag-claude        2024-04-12 11:14:30  2024-06-07 11:42:30\n",
+       "prediction-request-reasoning         2024-04-12 11:57:05  2024-06-09 21:50:45\n",
+       "prediction-request-reasoning-claude  2024-04-12 11:53:55  2024-06-05 05:00:10\n",
+       "prediction-url-cot-claude            2024-04-12 11:37:15  2024-06-05 05:21:10"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
+    "# filtering errors\n",
+    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
+    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
+    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
+    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
+    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
+    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
+    "timeline = tools_non_error.groupby(['tool'])[\"request_time\"].agg([\"min\",\"max\"])\n",
+    "timeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tool</th>\n",
+       "      <th>tool_accuracy</th>\n",
+       "      <th>total_requests</th>\n",
+       "      <th>min</th>\n",
+       "      <th>max</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>claude-prediction-offline</td>\n",
+       "      <td>66.308244</td>\n",
+       "      <td>279</td>\n",
+       "      <td>2024-04-23 13:09:30</td>\n",
+       "      <td>2024-06-10 00:31:30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>claude-prediction-online</td>\n",
+       "      <td>58.914027</td>\n",
+       "      <td>1105</td>\n",
+       "      <td>2024-04-12 12:24:20</td>\n",
+       "      <td>2024-06-09 21:41:20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>prediction-offline</td>\n",
+       "      <td>67.717915</td>\n",
+       "      <td>2283</td>\n",
+       "      <td>2024-04-12 12:20:10</td>\n",
+       "      <td>2024-06-08 23:45:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>prediction-offline-sme</td>\n",
+       "      <td>55.555556</td>\n",
+       "      <td>18</td>\n",
+       "      <td>2024-04-16 07:58:45</td>\n",
+       "      <td>2024-04-29 20:45:15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>prediction-online</td>\n",
+       "      <td>65.459066</td>\n",
+       "      <td>5631</td>\n",
+       "      <td>2024-04-16 05:52:40</td>\n",
+       "      <td>2024-06-09 21:47:20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>prediction-online-sme</td>\n",
+       "      <td>67.417656</td>\n",
+       "      <td>8167</td>\n",
+       "      <td>2024-04-12 11:51:30</td>\n",
+       "      <td>2024-06-10 00:06:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>prediction-request-rag</td>\n",
+       "      <td>64.217072</td>\n",
+       "      <td>1769</td>\n",
+       "      <td>2024-04-12 11:39:40</td>\n",
+       "      <td>2024-06-09 21:17:45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>prediction-request-rag-claude</td>\n",
+       "      <td>69.554566</td>\n",
+       "      <td>4490</td>\n",
+       "      <td>2024-04-12 11:14:30</td>\n",
+       "      <td>2024-06-07 11:42:30</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>prediction-request-reasoning</td>\n",
+       "      <td>68.813594</td>\n",
+       "      <td>9828</td>\n",
+       "      <td>2024-04-12 11:57:05</td>\n",
+       "      <td>2024-06-09 21:50:45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>prediction-request-reasoning-claude</td>\n",
+       "      <td>68.910256</td>\n",
+       "      <td>2184</td>\n",
+       "      <td>2024-04-12 11:53:55</td>\n",
+       "      <td>2024-06-05 05:00:10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>prediction-url-cot-claude</td>\n",
+       "      <td>64.584980</td>\n",
+       "      <td>1265</td>\n",
+       "      <td>2024-04-12 11:37:15</td>\n",
+       "      <td>2024-06-05 05:21:10</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                   tool  tool_accuracy  total_requests  \\\n",
+       "0             claude-prediction-offline      66.308244             279   \n",
+       "1              claude-prediction-online      58.914027            1105   \n",
+       "2                    prediction-offline      67.717915            2283   \n",
+       "3                prediction-offline-sme      55.555556              18   \n",
+       "4                     prediction-online      65.459066            5631   \n",
+       "5                 prediction-online-sme      67.417656            8167   \n",
+       "6                prediction-request-rag      64.217072            1769   \n",
+       "7         prediction-request-rag-claude      69.554566            4490   \n",
+       "8          prediction-request-reasoning      68.813594            9828   \n",
+       "9   prediction-request-reasoning-claude      68.910256            2184   \n",
+       "10            prediction-url-cot-claude      64.584980            1265   \n",
+       "\n",
+       "                    min                  max  \n",
+       "0   2024-04-23 13:09:30  2024-06-10 00:31:30  \n",
+       "1   2024-04-12 12:24:20  2024-06-09 21:41:20  \n",
+       "2   2024-04-12 12:20:10  2024-06-08 23:45:00  \n",
+       "3   2024-04-16 07:58:45  2024-04-29 20:45:15  \n",
+       "4   2024-04-16 05:52:40  2024-06-09 21:47:20  \n",
+       "5   2024-04-12 11:51:30  2024-06-10 00:06:00  \n",
+       "6   2024-04-12 11:39:40  2024-06-09 21:17:45  \n",
+       "7   2024-04-12 11:14:30  2024-06-07 11:42:30  \n",
+       "8   2024-04-12 11:57:05  2024-06-09 21:50:45  \n",
+       "9   2024-04-12 11:53:55  2024-06-05 05:00:10  \n",
+       "10  2024-04-12 11:37:15  2024-06-05 05:21:10  "
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total = wins.merge(timeline,how=\"left\", on=\"tool\")\n",
+    "total"
    ]
   },
   {
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,

scripts/profitability.py CHANGED Viewed

@@ -419,8 +419,6 @@ def prepare_profitalibity_data(rpc: str):
         timestamp_60_days_ago = (DATETIME_60_DAYS_AGO).timestamp()
         fpmmTrades = create_fpmmTrades(rpc, from_timestamp=timestamp_60_days_ago)
         fpmmTrades.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
-        # This is not needed
-        # fpmmTrades = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
     # make sure trader_address is in the columns
     assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
@@ -610,7 +608,7 @@ def summary_analyse(df):
 def run_profitability_analysis(rpc):
     """Create all trades analysis."""
-    # load dfs from csv for analysis
     print("Preparing data...")
     fpmmTrades, tools = prepare_profitalibity_data(rpc)
     tools["trader_address"] = tools["trader_address"].str.lower()
@@ -623,7 +621,7 @@ def run_profitability_analysis(rpc):
     print("Summarising trades...")
     summary_df = summary_analyse(all_trades_df)
-    # save to csv
     all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
     summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)

         timestamp_60_days_ago = (DATETIME_60_DAYS_AGO).timestamp()
         fpmmTrades = create_fpmmTrades(rpc, from_timestamp=timestamp_60_days_ago)
         fpmmTrades.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
     # make sure trader_address is in the columns
     assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
 def run_profitability_analysis(rpc):
     """Create all trades analysis."""
+    # load dfs from data folder for analysis
     print("Preparing data...")
     fpmmTrades, tools = prepare_profitalibity_data(rpc)
     tools["trader_address"] = tools["trader_address"].str.lower()
     print("Summarising trades...")
     summary_df = summary_analyse(all_trades_df)
+    # save to parquet
     all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
     summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)

scripts/pull_data.py CHANGED Viewed

@@ -17,7 +17,9 @@ from markets import (
 from tools import (
     etl as tools_etl,
     DEFAULT_FILENAME as TOOLS_FILENAME,
 )
 from profitability import run_profitability_analysis
 import gc
@@ -27,6 +29,7 @@ logging.basicConfig(level=logging.INFO)
 SCRIPTS_DIR = Path(__file__).parent
 ROOT_DIR = SCRIPTS_DIR.parent
 DATA_DIR = ROOT_DIR / "data"
 def get_question(text: str) -> str:
@@ -149,6 +152,16 @@ def weekly_analysis():
     with open(DATA_DIR / "t_map.pkl", "wb") as f:
         pickle.dump(t_map, f)
     # clean and release all memory
     del tools
     del fpmms

 from tools import (
     etl as tools_etl,
     DEFAULT_FILENAME as TOOLS_FILENAME,
+    update_tools_accuracy,
 )
+from app import INC_TOOLS
 from profitability import run_profitability_analysis
 import gc
 SCRIPTS_DIR = Path(__file__).parent
 ROOT_DIR = SCRIPTS_DIR.parent
 DATA_DIR = ROOT_DIR / "data"
+ACCURACY_FILENAME = "tools_accuracy.csv"
 def get_question(text: str) -> str:
     with open(DATA_DIR / "t_map.pkl", "wb") as f:
         pickle.dump(t_map, f)
+    # Computing tools accuracy information
+    print("Computing tool accuracy information")
+    # Check if the file exists
+    acc_data = None
+    if os.path.exists(DATA_DIR / ACCURACY_FILENAME):
+        acc_data = pd.read_csv(DATA_DIR / ACCURACY_FILENAME)
+    update_tools_accuracy(acc_data, tools, INC_TOOLS)
+    # TODO save acc_data into a CSV file
     # clean and release all memory
     del tools
     del fpmms

scripts/tools.py CHANGED Viewed

@@ -470,20 +470,6 @@ def etl(
         transformed = transformer(contents)
-        # Remove appending data, always new files
-        # if os.path.exists(DATA_DIR / events_filename):
-        #     old = pd.read_parquet(DATA_DIR / events_filename)
-        #     # Reset index to avoid index conflicts
-        #     old.reset_index(drop=True, inplace=True)
-        #     transformed.reset_index(drop=True, inplace=True)
-        #     # Concatenate DataFrames
-        #     transformed = pd.concat([old, transformed], ignore_index=True)
-        #     # Drop duplicates if necessary
-        #     transformed.drop_duplicates(subset=REQUEST_ID_FIELD, inplace=True)
         event_to_contents[event_name] = transformed.copy()
     # Store progress
@@ -495,6 +481,42 @@ def etl(
     return tools
 if __name__ == "__main__":
     RPCs = [
         "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",

         transformed = transformer(contents)
         event_to_contents[event_name] = transformed.copy()
     # Store progress
     return tools
+def update_tools_accuracy(
+    tools_acc: pd.DataFrame, tools_df: pd.DataFrame, inc_tools: List[str]
+) -> pd.DataFrame:
+    """To compute/update the latest accuracy information for the different mech tools"""
+    # computation of the accuracy information
+    tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
+    # filtering errors
+    tools_non_error = tools_inc[tools_inc["error"] != 1]
+    tools_non_error.loc[:, "currentAnswer"] = tools_non_error["currentAnswer"].replace(
+        {"no": "No", "yes": "Yes"}
+    )
+    tools_non_error = tools_non_error[
+        tools_non_error["currentAnswer"].isin(["Yes", "No"])
+    ]
+    tools_non_error = tools_non_error[tools_non_error["vote"].isin(["Yes", "No"])]
+    tools_non_error["win"] = (
+        tools_non_error["currentAnswer"] == tools_non_error["vote"]
+    ).astype(int)
+    tools_non_error.columns = tools_non_error.columns.astype(str)
+    wins = tools_non_error.groupby(["tool", "win"]).size().unstack().fillna(0)
+    wins["tool_accuracy"] = (wins[1] / (wins[0] + wins[1])) * 100
+    wins.reset_index(inplace=True)
+    wins["total_requests"] = wins[0] + wins[1]
+    wins.columns = wins.columns.astype(str)
+    wins = wins[["tool", "tool_accuracy", "total_requests"]]
+    timeline = tools_non_error.groupby(["tool"])["request_time"].agg(["min", "max"])
+    acc_info = wins.merge(timeline, how="left", on="tool")
+    if tools_acc is None:
+        print("Creating accuracy file for the first time")
+        return acc_info
+    # TODO update the old information
 if __name__ == "__main__":
     RPCs = [
         "https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a",