diff --git "a/notebooks/analysis.ipynb" "b/notebooks/analysis.ipynb"
deleted file mode 100644--- "a/notebooks/analysis.ipynb"
+++ /dev/null
@@ -1,2782 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import re"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import csv\n",
-    "import requests\n",
-    "hash = \"QmR8etyW3TPFadNtNrW54vfnFqmh8vBrMARWV76EmxCZyk\"\n",
-    "ipfs_address = \"https://gateway.autonolas.tech/ipfs/\"\n",
-    "\n",
-    "accuracy_link= ipfs_address + hash\n",
-    "response = requests.get(accuracy_link)\n",
-    "print(response)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "headers = ['tool', 'tool_accuracy', 'total_requests', 'min', 'max']\n"
-     ]
-    }
-   ],
-   "source": [
-    "from io import StringIO\n",
-    "accuracy_store = {}\n",
-    "data = StringIO(response.text)\n",
-    "csv_reader = csv.reader(data, delimiter=',')\n",
-    "for row in csv_reader:\n",
-    "    if row[0] == \"tool\":\n",
-    "        print(f\"headers = {row}\")\n",
-    "        continue\n",
-    "    accuracy_store[row[0]] = [\n",
-    "        row[2],\n",
-    "        row[1],\n",
-    "    ]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'claude-prediction-offline': ['481', '57.380457380457386'], 'claude-prediction-online': ['1055', '61.137440758293835'], 'prediction-offline': ['4465', '67.41321388577828'], 'prediction-offline-sme': ['61', '70.49180327868852'], 'prediction-online': ['9490', '66.00632244467862'], 'prediction-online-sme': ['14642', '65.67408823931157'], 'prediction-request-rag': ['2691', '63.58231140839836'], 'prediction-request-rag-claude': ['7428', '65.64351103931072'], 'prediction-request-reasoning': ['17372', '67.11374625834677'], 'prediction-request-reasoning-claude': ['2470', '66.72064777327935'], 'prediction-url-cot-claude': ['1596', '61.904761904761905']}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(accuracy_store)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
-    "tools = pd.read_parquet('../data/tools.parquet')\n",
-    "trades = pd.read_parquet('../data/all_trades_profitability.parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "INC_TOOLS = [\n",
-    "    \"prediction-online\",\n",
-    "    \"prediction-offline\",\n",
-    "    \"claude-prediction-online\",\n",
-    "    \"claude-prediction-offline\",\n",
-    "    \"prediction-offline-sme\",\n",
-    "    \"prediction-online-sme\",\n",
-    "    \"prediction-request-rag\",\n",
-    "    \"prediction-request-reasoning\",\n",
-    "    \"prediction-url-cot-claude\",\n",
-    "    \"prediction-request-rag-claude\",\n",
-    "    \"prediction-request-reasoning-claude\",\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th>win</th>\n",
-       "      <th>tool</th>\n",
-       "      <th>tool_accuracy</th>\n",
-       "      <th>total_requests</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>66.308244</td>\n",
-       "      <td>279</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>58.914027</td>\n",
-       "      <td>1105</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>67.717915</td>\n",
-       "      <td>2283</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-offline-sme</td>\n",
-       "      <td>55.555556</td>\n",
-       "      <td>18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>65.459066</td>\n",
-       "      <td>5631</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>67.417656</td>\n",
-       "      <td>8167</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>64.217072</td>\n",
-       "      <td>1769</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>69.554566</td>\n",
-       "      <td>4490</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>68.813594</td>\n",
-       "      <td>9828</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>68.910256</td>\n",
-       "      <td>2184</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>64.584980</td>\n",
-       "      <td>1265</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "win                                 tool  tool_accuracy  total_requests\n",
-       "0              claude-prediction-offline      66.308244             279\n",
-       "1               claude-prediction-online      58.914027            1105\n",
-       "2                     prediction-offline      67.717915            2283\n",
-       "3                 prediction-offline-sme      55.555556              18\n",
-       "4                      prediction-online      65.459066            5631\n",
-       "5                  prediction-online-sme      67.417656            8167\n",
-       "6                 prediction-request-rag      64.217072            1769\n",
-       "7          prediction-request-rag-claude      69.554566            4490\n",
-       "8           prediction-request-reasoning      68.813594            9828\n",
-       "9    prediction-request-reasoning-claude      68.910256            2184\n",
-       "10             prediction-url-cot-claude      64.584980            1265"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
-    "# filtering errors\n",
-    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
-    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
-    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
-    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
-    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
-    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
-    "wins = tools_non_error.groupby(['tool', 'win']).size().unstack().fillna(0)\n",
-    "wins['tool_accuracy'] = (wins[1] / (wins[0] + wins[1])) * 100\n",
-    "wins.reset_index(inplace=True)\n",
-    "wins['total_requests'] = wins[0] + wins[1]\n",
-    "wins.columns = wins.columns.astype(str)\n",
-    "wins = wins[[\"tool\", \"tool_accuracy\", \"total_requests\"]]\n",
-    "wins"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>min</th>\n",
-       "      <th>max</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tool</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>claude-prediction-offline</th>\n",
-       "      <td>2024-04-23 13:09:30</td>\n",
-       "      <td>2024-06-10 00:31:30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>claude-prediction-online</th>\n",
-       "      <td>2024-04-12 12:24:20</td>\n",
-       "      <td>2024-06-09 21:41:20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-offline</th>\n",
-       "      <td>2024-04-12 12:20:10</td>\n",
-       "      <td>2024-06-08 23:45:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-offline-sme</th>\n",
-       "      <td>2024-04-16 07:58:45</td>\n",
-       "      <td>2024-04-29 20:45:15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-online</th>\n",
-       "      <td>2024-04-16 05:52:40</td>\n",
-       "      <td>2024-06-09 21:47:20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-online-sme</th>\n",
-       "      <td>2024-04-12 11:51:30</td>\n",
-       "      <td>2024-06-10 00:06:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-rag</th>\n",
-       "      <td>2024-04-12 11:39:40</td>\n",
-       "      <td>2024-06-09 21:17:45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-rag-claude</th>\n",
-       "      <td>2024-04-12 11:14:30</td>\n",
-       "      <td>2024-06-07 11:42:30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-reasoning</th>\n",
-       "      <td>2024-04-12 11:57:05</td>\n",
-       "      <td>2024-06-09 21:50:45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-reasoning-claude</th>\n",
-       "      <td>2024-04-12 11:53:55</td>\n",
-       "      <td>2024-06-05 05:00:10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-url-cot-claude</th>\n",
-       "      <td>2024-04-12 11:37:15</td>\n",
-       "      <td>2024-06-05 05:21:10</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                     min                  max\n",
-       "tool                                                                         \n",
-       "claude-prediction-offline            2024-04-23 13:09:30  2024-06-10 00:31:30\n",
-       "claude-prediction-online             2024-04-12 12:24:20  2024-06-09 21:41:20\n",
-       "prediction-offline                   2024-04-12 12:20:10  2024-06-08 23:45:00\n",
-       "prediction-offline-sme               2024-04-16 07:58:45  2024-04-29 20:45:15\n",
-       "prediction-online                    2024-04-16 05:52:40  2024-06-09 21:47:20\n",
-       "prediction-online-sme                2024-04-12 11:51:30  2024-06-10 00:06:00\n",
-       "prediction-request-rag               2024-04-12 11:39:40  2024-06-09 21:17:45\n",
-       "prediction-request-rag-claude        2024-04-12 11:14:30  2024-06-07 11:42:30\n",
-       "prediction-request-reasoning         2024-04-12 11:57:05  2024-06-09 21:50:45\n",
-       "prediction-request-reasoning-claude  2024-04-12 11:53:55  2024-06-05 05:00:10\n",
-       "prediction-url-cot-claude            2024-04-12 11:37:15  2024-06-05 05:21:10"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n",
-    "# filtering errors\n",
-    "tools_non_error = tools_inc[tools_inc['error'] != 1]\n",
-    "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n",
-    "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n",
-    "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n",
-    "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n",
-    "tools_non_error.columns = tools_non_error.columns.astype(str)\n",
-    "timeline = tools_non_error.groupby(['tool'])[\"request_time\"].agg([\"min\",\"max\"])\n",
-    "timeline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>tool_accuracy</th>\n",
-       "      <th>total_requests</th>\n",
-       "      <th>min</th>\n",
-       "      <th>max</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>66.308244</td>\n",
-       "      <td>279</td>\n",
-       "      <td>2024-04-23 13:09:30</td>\n",
-       "      <td>2024-06-10 00:31:30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>58.914027</td>\n",
-       "      <td>1105</td>\n",
-       "      <td>2024-04-12 12:24:20</td>\n",
-       "      <td>2024-06-09 21:41:20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>67.717915</td>\n",
-       "      <td>2283</td>\n",
-       "      <td>2024-04-12 12:20:10</td>\n",
-       "      <td>2024-06-08 23:45:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-offline-sme</td>\n",
-       "      <td>55.555556</td>\n",
-       "      <td>18</td>\n",
-       "      <td>2024-04-16 07:58:45</td>\n",
-       "      <td>2024-04-29 20:45:15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>65.459066</td>\n",
-       "      <td>5631</td>\n",
-       "      <td>2024-04-16 05:52:40</td>\n",
-       "      <td>2024-06-09 21:47:20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>67.417656</td>\n",
-       "      <td>8167</td>\n",
-       "      <td>2024-04-12 11:51:30</td>\n",
-       "      <td>2024-06-10 00:06:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>64.217072</td>\n",
-       "      <td>1769</td>\n",
-       "      <td>2024-04-12 11:39:40</td>\n",
-       "      <td>2024-06-09 21:17:45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>69.554566</td>\n",
-       "      <td>4490</td>\n",
-       "      <td>2024-04-12 11:14:30</td>\n",
-       "      <td>2024-06-07 11:42:30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>68.813594</td>\n",
-       "      <td>9828</td>\n",
-       "      <td>2024-04-12 11:57:05</td>\n",
-       "      <td>2024-06-09 21:50:45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>68.910256</td>\n",
-       "      <td>2184</td>\n",
-       "      <td>2024-04-12 11:53:55</td>\n",
-       "      <td>2024-06-05 05:00:10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>64.584980</td>\n",
-       "      <td>1265</td>\n",
-       "      <td>2024-04-12 11:37:15</td>\n",
-       "      <td>2024-06-05 05:21:10</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                   tool  tool_accuracy  total_requests  \\\n",
-       "0             claude-prediction-offline      66.308244             279   \n",
-       "1              claude-prediction-online      58.914027            1105   \n",
-       "2                    prediction-offline      67.717915            2283   \n",
-       "3                prediction-offline-sme      55.555556              18   \n",
-       "4                     prediction-online      65.459066            5631   \n",
-       "5                 prediction-online-sme      67.417656            8167   \n",
-       "6                prediction-request-rag      64.217072            1769   \n",
-       "7         prediction-request-rag-claude      69.554566            4490   \n",
-       "8          prediction-request-reasoning      68.813594            9828   \n",
-       "9   prediction-request-reasoning-claude      68.910256            2184   \n",
-       "10            prediction-url-cot-claude      64.584980            1265   \n",
-       "\n",
-       "                    min                  max  \n",
-       "0   2024-04-23 13:09:30  2024-06-10 00:31:30  \n",
-       "1   2024-04-12 12:24:20  2024-06-09 21:41:20  \n",
-       "2   2024-04-12 12:20:10  2024-06-08 23:45:00  \n",
-       "3   2024-04-16 07:58:45  2024-04-29 20:45:15  \n",
-       "4   2024-04-16 05:52:40  2024-06-09 21:47:20  \n",
-       "5   2024-04-12 11:51:30  2024-06-10 00:06:00  \n",
-       "6   2024-04-12 11:39:40  2024-06-09 21:17:45  \n",
-       "7   2024-04-12 11:14:30  2024-06-07 11:42:30  \n",
-       "8   2024-04-12 11:57:05  2024-06-09 21:50:45  \n",
-       "9   2024-04-12 11:53:55  2024-06-05 05:00:10  \n",
-       "10  2024-04-12 11:37:15  2024-06-05 05:21:10  "
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "total = wins.merge(timeline,how=\"left\", on=\"tool\")\n",
-    "total"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "total.to_csv(\"accuracy_info.csv\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def extract_question(text):\n",
-    "    pattern = r'\"([^\"]+\\?)\"'\n",
-    "    match = re.search(pattern, text)\n",
-    "    if match:\n",
-    "        return match.group(1)\n",
-    "    return text"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_current_answer(q):\n",
-    "    return trades[trades['title'] == q]['current_answer'].unique()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# only select trades in May 2024\n",
-    "trades['creation_timestamp'] = pd.to_datetime(trades['creation_timestamp'])\n",
-    "trades = trades[trades['creation_timestamp'].dt.month == 5]\n",
-    "trades = trades[trades['creation_timestamp'].dt.year == 2024]\n",
-    "\n",
-    "# make a column for winning_vote\n",
-    "tools['winning_vote'] = (tools['vote'] == tools['currentAnswer'])\n",
-    "tools = tools[tools['tool']!= 'resolve-market-reasoning-gpt-4'].reset_index(drop=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tools['prompt_request'] = tools['prompt_request'].apply(extract_question)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "trades_grouped = trades.groupby(['title', 'winning_trade']).size().unstack().fillna(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "winning_trades_percentage = trades_grouped[True] / trades_grouped.sum(axis=1)\n",
-    "winning_trades_percentage = winning_trades_percentage.reset_index()\n",
-    "winning_trades_percentage.columns = ['title', 'winning_trade_percentage']\n",
-    "winning_trades_percentage['num_trades'] = list(trades_grouped.sum(axis=1).values)\n",
-    "winning_trades_percentage_bottom_50 = winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False)[-50:].reset_index(drop=True)\n",
-    "winning_trades_percentage_top_50 = winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False)[:50].reset_index(drop=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# winning_trades_percentage.sort_values(by='winning_trade_percentage', ascending=False).reset_index(drop=True).to_csv('winning_trades_percentage.csv', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['Will Kylian Mbappe leave Paris St-Germain at the end of the season by 16 May 2024?',\n",
-       " 'Will BlizzCon be reinstated on or by 1 May 2024 after its cancellation in 2024?',\n",
-       " 'Will Joe Biden approve more weapons for Ukraine by 4 May 2024?',\n",
-       " \"Will FiiO's new custom in-ear monitors become the top-selling wireless earbuds by 9 May 2024?\",\n",
-       " 'Will Mohamed Salah leave Liverpool on 7 May 2024?',\n",
-       " \"Will Ryan Gosling accept a 'dark' role in a film by 14 May 2024?\",\n",
-       " 'Will the Philadelphia 76ers win the NBA play-offs on 7 May 2024?',\n",
-       " 'Will the Panamanian presidential election result in a clear victor by 12 May 2024?',\n",
-       " 'Will the Museum of Old and New Art in Tasmania be allowed to keep its exhibit women-only by 14 May 2024?',\n",
-       " \"Will Diego Maradona's 'Stolen' Golden Ball be auctioned off on 14 May 2024?\",\n",
-       " 'Will the Mercedes G-Wagen release an electric version on 1 May 2024?',\n",
-       " 'Will the Israeli government lift the broadcast ban on Al Jazeera on or before 13 May 2024?',\n",
-       " 'Will Intel release its Core Ultra 200 Arrow Lake CPUs by 16 May 2024?',\n",
-       " 'Will the Atlanta City Council pay $3.8 million to settle a lawsuit by the family of a church deacon who died in a struggle with a city police officer by 13 May 2024?',\n",
-       " 'Will Voyager-1 continue to send readable data until 1 May 2024?',\n",
-       " 'Will the Amber Alert issued in New Mexico result in the discovery of the missing 10-month-old baby by 13 May 2024?',\n",
-       " \"Will Florida's ban on lab-grown meat be overturned by 12 May 2024?\",\n",
-       " \"Will the US government successfully distribute the $138.7 million payout to Larry Nassar's victims by 1 May 2024?\",\n",
-       " 'Will a new sport be officially added to the Olympics programme on 16 May 2024?',\n",
-       " \"Will Kristi Noem be announced as Donald Trump's vice presidential running mate by 6 May 2024?\",\n",
-       " 'Will the United Auto Workers union strike against Daimler Truck on or by 7 May 2024?',\n",
-       " 'Will the World Snooker Championship 2024 conclude with Judd Trump or Tom Ford as the winner by May 5, 2024?',\n",
-       " \"Will Maria Georgas be announced as the next 'Bachelorette' lead on 9 May 2024?\",\n",
-       " 'Will Apple release new iPads at their event on May 7, 2024?',\n",
-       " 'Will Joe Biden still be the President of the United States on 11 May 2024?',\n",
-       " \"Will the world's biggest 3D printer be used to make parts of houses by 2 May 2024?\",\n",
-       " \"Will Anthony Edwards be named NBA's MVP on 11 May 2024?\",\n",
-       " 'Will a winner be declared in the Eurovision 2024 grand final by 19 May 2024?',\n",
-       " \"Will a new mission be launched to explore the moon's 'hidden side' by 12 May 2024?\",\n",
-       " 'Will Mike Tyson win his bout against Jake Paul on 7 May 2024?',\n",
-       " 'Will the bird flu outbreak be declared a global pandemic by 12 May 2024?',\n",
-       " 'Will the new Apple Pencil Pro be revealed by 15 May 2024?',\n",
-       " \"Will the amateur angler who landed UK's 'biggest fish' in Essex catch another record-breaking fish by 7 May 2024?\",\n",
-       " \"Will Saul 'Canelo' Alvarez successfully defend his WBA, WBC, WBO, and IBF titles again by 13 May 2024?\",\n",
-       " \"Will Taylor Swift's 'The Tortured Poets Department' album reach number 1 on Billboard 200 on 3 May 2024?\",\n",
-       " 'Will Joe Biden attend the White House Correspondents Dinner on 5 May 2024?',\n",
-       " 'Will King Charles perform public duties on 5 May 2024, after his progress in cancer treatment?',\n",
-       " \"Will LinkedIn's new puzzle games Pinpoint, Queens, and Crossclimb be successful on their platform by 9 May 2024?\",\n",
-       " 'Will South Dakota Governor Kristi Noem resign over the puppy killing controversy by 15 May 2024?',\n",
-       " 'Will Apple announce the release of a new M4 chip by 13 May 2024?',\n",
-       " 'Will Eric Adams still be the mayor of New York City on 10 May 2024?',\n",
-       " \"Will the livestream video 'portals' connecting New York City and Dublin still be operational on 19 May 2024?\",\n",
-       " 'Will there be more pro-Palestinian protests on US university campuses on 6 May 2024?',\n",
-       " 'Will Google Pixel 8a be released at Google I/O 2024 on 14 May?',\n",
-       " 'Will Apple announce more than just a spec bump at the May 2024 iPad event?',\n",
-       " \"Will Apple's new Magic Keyboard for the iPad Pro M4 be released by 15 May 2024?\",\n",
-       " 'Will the UEFA Champions League final be between PSG and Borussia Dortmund on 13 May 2024?',\n",
-       " 'Will the FBI report an increase in scams targeting Americans older than 60 in 2024?',\n",
-       " 'Will Erik ten Hag remain as Manchester United manager on 17 May 2024?',\n",
-       " 'Will Jofra Archer be a part of the England squad for T20 World Cup in June 2024?']"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "winning_trades_percentage_top_50['title'].tolist()\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[\"Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\",\n",
-       " 'Will Fiona Harvey officially file a lawsuit against Netflix and Richard Gadd by 17 May 2024?',\n",
-       " 'Will the final report on the Baltimore bridge collapse be released by 20 May 2024?',\n",
-       " 'Will the Autonomous Racing League successfully hold their second race by May 3, 2024?',\n",
-       " 'Will Trent Staggs win the Senatorial race to replace Sen. Mitt Romney (R-UT) on 5 May 2024?',\n",
-       " 'Will the Houston area experience flooding conditions on 11 May 2024?',\n",
-       " \"Will 'Wednesday' season 2 be released on Netflix by 1 May 2024?\",\n",
-       " 'Will Arsenal win against Bournemouth in the Premier League match on 12 May 2024?',\n",
-       " 'Will Qualcomm release its Snapdragon X Plus laptop chip by 1 May 2024?',\n",
-       " \"Will Feyenoord's Arne Slot become the new manager of Liverpool by 1 May 2024?\",\n",
-       " 'Will the FCC receive additional funding for replacing Huawei gear by 10 May 2024?',\n",
-       " 'Will there be any major cyber attack on an organization using AI before 2 May 2024?',\n",
-       " 'Will Sony complete the takeover of Paramount by 11 May 2024?',\n",
-       " \"Will 'Hell's Kitchen' win the Tony Awards for Best Musical on 7 May 2024?\",\n",
-       " 'Will Tesla announce reinstating any laid off supercharger workers by 11 May 2024?',\n",
-       " 'Will there be another tornado in Nebraska and Iowa on 6 May 2024?',\n",
-       " 'Will the DJI drones be officially banned in the United States by 4 May 2024?',\n",
-       " 'Will OpenAI debut a multimodal AI digital assistant by 19 May 2024?',\n",
-       " 'Will TikTok be purchased by a Wall Street or Tech billionaire by 2 May 2024?',\n",
-       " \"Will the 'Lost' Gustav Klimt painting be sold at the auction in Vienna on 3 May 2024?\",\n",
-       " \"Will the Federal Communications Commission levy fines against AT&T, Sprint, T-Mobile, and Verizon for illegally sharing customers' location data by 9 May 2024?\",\n",
-       " 'Will the Manchester City win the WSL title on 14 May 2024?',\n",
-       " 'Will Meta start making profit from generative AI by 3 May 2024?',\n",
-       " 'Will Apple launch an AI-powered iOS 18 on or by 1 May 2024?',\n",
-       " 'Will iOS 18 receive a major AI overhaul by 6 May 2024?',\n",
-       " 'Will Ippei Mizuhara be sentenced for bank fraud by 15 May 2024?',\n",
-       " 'Will Tesla lay off nearly 2,700 workers at its Austin, Texas factory by 1 May 2024?',\n",
-       " 'Will Manchester City win the Premier League title on 11 May 2024?',\n",
-       " 'Will there be another deadly pandemic by 8 May 2024?',\n",
-       " 'Will China successfully collect samples from the far side of the Moon on 10 May 2024?',\n",
-       " \"Will the American Airlines correct their system's error of mistaking 101-year-old passenger for a baby by 7 May 2024?\",\n",
-       " 'Will the Boeing Starliner capsule successfully complete its first astronaut-crewed flight to the International Space Station by 13 May 2024?',\n",
-       " \"Will the Technics' special-edition turntable in collaboration with Lamborghini be released by 17 May 2024?\",\n",
-       " 'Will the Florida Panthers win against the Boston Bruins in the Game 3 on 17 May 2024?',\n",
-       " 'Will Harvard Yard be free from Anti-Israel protests by 2 May 2024?',\n",
-       " \"Will Samsung's latest jibe have any impact on Apple's sales by 11 May 2024?\",\n",
-       " \"Will the Miss USA organization respond to the call for 'full transparency' from contestants by 16 May 2024?\",\n",
-       " 'Will Tom Daley win a medal at the Paris Olympics 2024 by 14 May 2024?',\n",
-       " \"Will Liverpool win any more trophies in Jurgen Klopp's final season?\",\n",
-       " 'Will Liverpool win any more trophies by 2 May 2024?',\n",
-       " 'Will Caitlin Clark score more than 20 points in her next NBA game by 10 May 2024?',\n",
-       " 'Will the statues of civil rights leader Daisy Bates and singer Johnny Cash replace the Arkansas statues at the U.S Capitol by 14 May 2024?',\n",
-       " \"Will the season 6 of Netflix's Cobra Kai be released in 3 parts by 12 May 2024?\",\n",
-       " \"Will the 'Don't Say Gay' education restrictions bill be implemented in Alabama on or before 1 May 2024?\",\n",
-       " \"Will the 'lost' Gustav Klimt painting be successfully auctioned by 3 May 2024?\",\n",
-       " 'Will the Kansas City Chiefs win their next game on or before May 15, 2024?',\n",
-       " 'Will Lando Norris win another F1 race by 15 May 2024?',\n",
-       " 'Will Pennsylvania be a red state by 6 May 2024?',\n",
-       " 'Will Tesla face significant financial troubles by 11 May 2024?',\n",
-       " 'Will the BattlerGC Pro be released for the GameCube on or by 3 May 2024?']"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "winning_trades_percentage_bottom_50['title'].tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def losing_percentage(q):\n",
-    "    print(f\"Losing percentage for: {q}\")\n",
-    "    q_losing = tools[tools['prompt_request'].str.contains(q)].groupby(['tool', 'winning_vote']).size().unstack().fillna(0)\n",
-    "    q_losing_perc = q_losing[False] / (q_losing[False] + q_losing[True])\n",
-    "    q_losing_perc = q_losing_perc.reset_index()\n",
-    "    q_losing_perc.columns = ['tool', 'losing_percentage']\n",
-    "    q_losing_perc['num_calls'] = list(q_losing.sum(axis=1).values)\n",
-    "    q_losing_perc = q_losing_perc.sort_values(by='losing_percentage', ascending=False)\n",
-    "    return q_losing_perc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>17.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.656716</td>\n",
-       "      <td>67.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.538462</td>\n",
-       "      <td>52.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.250000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.185185</td>\n",
-       "      <td>27.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0                   prediction-offline           1.000000       40.0\n",
-       "4        prediction-request-rag-claude           1.000000       17.0\n",
-       "7            prediction-url-cot-claude           1.000000        2.0\n",
-       "2                prediction-online-sme           0.656716       67.0\n",
-       "6  prediction-request-reasoning-claude           0.571429        7.0\n",
-       "5         prediction-request-reasoning           0.538462       52.0\n",
-       "3               prediction-request-rag           0.250000        4.0\n",
-       "1                    prediction-online           0.185185       27.0"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# have confirmed market resolution was correct\n",
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[0, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will 'Scavengers Reign' be renewed for a second season on Netflix by 19 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>17.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.656716</td>\n",
-       "      <td>67.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.571429</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.538462</td>\n",
-       "      <td>52.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.250000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.185185</td>\n",
-       "      <td>27.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0                   prediction-offline           1.000000       40.0\n",
-       "4        prediction-request-rag-claude           1.000000       17.0\n",
-       "7            prediction-url-cot-claude           1.000000        2.0\n",
-       "2                prediction-online-sme           0.656716       67.0\n",
-       "6  prediction-request-reasoning-claude           0.571429        7.0\n",
-       "5         prediction-request-reasoning           0.538462       52.0\n",
-       "3               prediction-request-rag           0.250000        4.0\n",
-       "1                    prediction-online           0.185185       27.0"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# have confirmed currentAnswer\n",
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[0, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 65,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will Fiona Harvey officially file a lawsuit against Netflix and Richard Gadd by 17 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.977273</td>\n",
-       "      <td>44.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.975000</td>\n",
-       "      <td>40.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>0.677419</td>\n",
-       "      <td>31.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.534483</td>\n",
-       "      <td>58.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>0.223881</td>\n",
-       "      <td>67.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.200000</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>8.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "7            prediction-url-cot-claude           1.000000        1.0\n",
-       "2                prediction-online-sme           0.977273       44.0\n",
-       "1                    prediction-online           0.975000       40.0\n",
-       "0                   prediction-offline           0.677419       31.0\n",
-       "5         prediction-request-reasoning           0.534483       58.0\n",
-       "4        prediction-request-rag-claude           0.223881       67.0\n",
-       "6  prediction-request-reasoning-claude           0.200000        5.0\n",
-       "3               prediction-request-rag           0.000000        8.0"
-      ]
-     },
-     "execution_count": 65,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# have confirmed currentAnswer\n",
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[1, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will the final report on the Baltimore bridge collapse be released by 20 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>87.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>25.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.951220</td>\n",
-       "      <td>41.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.833333</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.714286</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.437500</td>\n",
-       "      <td>48.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.394366</td>\n",
-       "      <td>71.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline           1.000000        5.0\n",
-       "1             claude-prediction-online           1.000000        1.0\n",
-       "2                   prediction-offline           1.000000       87.0\n",
-       "6        prediction-request-rag-claude           1.000000       25.0\n",
-       "9            prediction-url-cot-claude           1.000000        1.0\n",
-       "3                    prediction-online           0.951220       41.0\n",
-       "8  prediction-request-reasoning-claude           0.833333        6.0\n",
-       "5               prediction-request-rag           0.714286        7.0\n",
-       "7         prediction-request-reasoning           0.437500       48.0\n",
-       "4                prediction-online-sme           0.394366       71.0"
-      ]
-     },
-     "execution_count": 66,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# have confirmed currentAnswer\n",
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[2, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 67,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will the Autonomous Racing League successfully hold their second race by May 3, 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>23.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>14.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>18.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>8.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>18.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline                1.0        2.0\n",
-       "1                   prediction-offline                1.0       23.0\n",
-       "2                    prediction-online                1.0       14.0\n",
-       "3                prediction-online-sme                1.0       18.0\n",
-       "4               prediction-request-rag                1.0        5.0\n",
-       "5        prediction-request-rag-claude                1.0        8.0\n",
-       "8            prediction-url-cot-claude                1.0        6.0\n",
-       "6         prediction-request-reasoning                0.0       18.0\n",
-       "7  prediction-request-reasoning-claude                0.0        3.0"
-      ]
-     },
-     "execution_count": 67,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# have confirmed currentAnswer\n",
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[3, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will the Houston area experience flooding conditions on 11 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>58.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>39.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>8.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>0.754717</td>\n",
-       "      <td>53.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.369048</td>\n",
-       "      <td>84.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.166667</td>\n",
-       "      <td>72.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline           1.000000        2.0\n",
-       "1             claude-prediction-online           1.000000        6.0\n",
-       "2                   prediction-offline           1.000000       58.0\n",
-       "4                prediction-online-sme           1.000000       39.0\n",
-       "5               prediction-request-rag           1.000000        4.0\n",
-       "8  prediction-request-reasoning-claude           1.000000        8.0\n",
-       "9            prediction-url-cot-claude           1.000000        5.0\n",
-       "6        prediction-request-rag-claude           0.754717       53.0\n",
-       "7         prediction-request-reasoning           0.369048       84.0\n",
-       "3                    prediction-online           0.166667       72.0"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[5, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will 'Wednesday' season 2 be released on Netflix by 1 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.750000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.666667</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>0.500000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.400000</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "1                prediction-online-sme           0.750000        4.0\n",
-       "5  prediction-request-reasoning-claude           0.750000        4.0\n",
-       "2               prediction-request-rag           0.666667        6.0\n",
-       "3        prediction-request-rag-claude           0.500000        2.0\n",
-       "4         prediction-request-reasoning           0.400000        5.0\n",
-       "0             claude-prediction-online           0.000000        1.0"
-      ]
-     },
-     "execution_count": 73,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[6, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 74,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will Arsenal win against Bournemouth in the Premier League match on 12 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>11.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>17.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>30.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>45.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.874016</td>\n",
-       "      <td>127.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.250000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0                   prediction-offline           1.000000       11.0\n",
-       "1                    prediction-online           1.000000       17.0\n",
-       "2                prediction-online-sme           1.000000       30.0\n",
-       "4        prediction-request-rag-claude           1.000000       45.0\n",
-       "5         prediction-request-reasoning           0.874016      127.0\n",
-       "3               prediction-request-rag           0.250000        4.0\n",
-       "6  prediction-request-reasoning-claude           0.000000        2.0"
-      ]
-     },
-     "execution_count": 74,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[7, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 75,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will Qualcomm release its Snapdragon X Plus laptop chip by 1 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>19.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>15.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.941176</td>\n",
-       "      <td>17.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.800000</td>\n",
-       "      <td>5.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.666667</td>\n",
-       "      <td>15.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.652174</td>\n",
-       "      <td>23.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>3.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline           1.000000        7.0\n",
-       "1                   prediction-offline           1.000000        1.0\n",
-       "3                prediction-online-sme           1.000000       19.0\n",
-       "5        prediction-request-rag-claude           1.000000       15.0\n",
-       "4               prediction-request-rag           0.941176       17.0\n",
-       "2                    prediction-online           0.800000        5.0\n",
-       "7  prediction-request-reasoning-claude           0.666667       15.0\n",
-       "6         prediction-request-reasoning           0.652174       23.0\n",
-       "8            prediction-url-cot-claude           0.333333        3.0"
-      ]
-     },
-     "execution_count": 75,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[8, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will Feyenoord's Arne Slot become the new manager of Liverpool by 1 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>4.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.916667</td>\n",
-       "      <td>12.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.900000</td>\n",
-       "      <td>10.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.714286</td>\n",
-       "      <td>14.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.666667</td>\n",
-       "      <td>9.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.500000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>0.454545</td>\n",
-       "      <td>11.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline           1.000000        4.0\n",
-       "1                   prediction-offline           1.000000        2.0\n",
-       "8            prediction-url-cot-claude           1.000000        2.0\n",
-       "6         prediction-request-reasoning           0.916667       12.0\n",
-       "7  prediction-request-reasoning-claude           0.900000       10.0\n",
-       "4               prediction-request-rag           0.714286       14.0\n",
-       "3                prediction-online-sme           0.666667        9.0\n",
-       "2                    prediction-online           0.500000        2.0\n",
-       "5        prediction-request-rag-claude           0.454545       11.0"
-      ]
-     },
-     "execution_count": 76,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[9, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 77,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Losing percentage for: Will the FCC receive additional funding for replacing Huawei gear by 10 May 2024?\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>6.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>3.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>36.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>50.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.986486</td>\n",
-       "      <td>74.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.947368</td>\n",
-       "      <td>19.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.910714</td>\n",
-       "      <td>56.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>0.777778</td>\n",
-       "      <td>9.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.465753</td>\n",
-       "      <td>73.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.071429</td>\n",
-       "      <td>14.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                  tool  losing_percentage  num_calls\n",
-       "0            claude-prediction-offline           1.000000        6.0\n",
-       "1             claude-prediction-online           1.000000        3.0\n",
-       "2                   prediction-offline           1.000000       36.0\n",
-       "6        prediction-request-rag-claude           1.000000       50.0\n",
-       "4                prediction-online-sme           0.986486       74.0\n",
-       "5               prediction-request-rag           0.947368       19.0\n",
-       "3                    prediction-online           0.910714       56.0\n",
-       "9            prediction-url-cot-claude           0.777778        9.0\n",
-       "7         prediction-request-reasoning           0.465753       73.0\n",
-       "8  prediction-request-reasoning-claude           0.071429       14.0"
-      ]
-     },
-     "execution_count": 77,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "losing_percentage(winning_trades_percentage_bottom_50.loc[10, 'title'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 98,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "all_q = winning_trades_percentage_bottom_50['title'].unique().tolist()\n",
-    "q_losing = tools[tools['prompt_request'].isin(all_q)]\n",
-    "q_losing = q_losing.groupby(['tool'])['winning_vote'].value_counts().unstack().fillna(0)\n",
-    "q_losing_perc = q_losing[False] / (q_losing[False] + q_losing[True])\n",
-    "q_losing_perc = q_losing_perc.reset_index()\n",
-    "q_losing_perc.columns = ['tool', 'losing_percentage']\n",
-    "q_losing_perc['num_calls'] = list(q_losing.sum(axis=1).values)\n",
-    "q_losing_perc = q_losing_perc.sort_values(by='losing_percentage', ascending=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 99,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>tool</th>\n",
-       "      <th>losing_percentage</th>\n",
-       "      <th>num_calls</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>prediction-offline-sme</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>prediction-request-rag-claude</td>\n",
-       "      <td>0.913007</td>\n",
-       "      <td>1184.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>prediction-offline</td>\n",
-       "      <td>0.893281</td>\n",
-       "      <td>1012.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>prediction-request-rag</td>\n",
-       "      <td>0.889881</td>\n",
-       "      <td>336.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>prediction-online-sme</td>\n",
-       "      <td>0.857143</td>\n",
-       "      <td>1722.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>prediction-online</td>\n",
-       "      <td>0.853553</td>\n",
-       "      <td>1154.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>prediction-request-reasoning</td>\n",
-       "      <td>0.847451</td>\n",
-       "      <td>2727.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>prediction-url-cot-claude</td>\n",
-       "      <td>0.846154</td>\n",
-       "      <td>130.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>claude-prediction-online</td>\n",
-       "      <td>0.735849</td>\n",
-       "      <td>53.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>prediction-request-reasoning-claude</td>\n",
-       "      <td>0.659664</td>\n",
-       "      <td>238.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>claude-prediction-offline</td>\n",
-       "      <td>0.591549</td>\n",
-       "      <td>142.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                   tool  losing_percentage  num_calls\n",
-       "3                prediction-offline-sme           1.000000        2.0\n",
-       "7         prediction-request-rag-claude           0.913007     1184.0\n",
-       "2                    prediction-offline           0.893281     1012.0\n",
-       "6                prediction-request-rag           0.889881      336.0\n",
-       "5                 prediction-online-sme           0.857143     1722.0\n",
-       "4                     prediction-online           0.853553     1154.0\n",
-       "8          prediction-request-reasoning           0.847451     2727.0\n",
-       "10            prediction-url-cot-claude           0.846154      130.0\n",
-       "1              claude-prediction-online           0.735849       53.0\n",
-       "9   prediction-request-reasoning-claude           0.659664      238.0\n",
-       "0             claude-prediction-offline           0.591549      142.0"
-      ]
-     },
-     "execution_count": 99,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "q_losing_perc"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 103,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th>confidence</th>\n",
-       "      <th>0.00</th>\n",
-       "      <th>0.10</th>\n",
-       "      <th>0.20</th>\n",
-       "      <th>0.30</th>\n",
-       "      <th>0.40</th>\n",
-       "      <th>0.50</th>\n",
-       "      <th>0.55</th>\n",
-       "      <th>0.60</th>\n",
-       "      <th>0.65</th>\n",
-       "      <th>0.70</th>\n",
-       "      <th>0.75</th>\n",
-       "      <th>0.80</th>\n",
-       "      <th>0.85</th>\n",
-       "      <th>0.90</th>\n",
-       "      <th>0.95</th>\n",
-       "      <th>0.99</th>\n",
-       "      <th>1.00</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>tool</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>claude-prediction-offline</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>87.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>claude-prediction-online</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-offline</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>302.0</td>\n",
-       "      <td>189.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>231.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-offline-sme</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-online</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>22.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>23.0</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>670.0</td>\n",
-       "      <td>99.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>76.0</td>\n",
-       "      <td>28.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>20.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-online-sme</th>\n",
-       "      <td>1.0</td>\n",
-       "      <td>27.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>71.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>679.0</td>\n",
-       "      <td>234.0</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>149.0</td>\n",
-       "      <td>76.0</td>\n",
-       "      <td>109.0</td>\n",
-       "      <td>80.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>39.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-rag</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>57.0</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>20.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-rag-claude</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>175.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>513.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>209.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-reasoning</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>103.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>97.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>176.0</td>\n",
-       "      <td>441.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>339.0</td>\n",
-       "      <td>159.0</td>\n",
-       "      <td>44.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>97.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-request-reasoning-claude</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>27.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>38.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>76.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>prediction-url-cot-claude</th>\n",
-       "      <td>0.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>40.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>22.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "confidence                           0.00   0.10   0.20  0.30   0.40   0.50  \\\n",
-       "tool                                                                          \n",
-       "claude-prediction-offline             0.0    0.0    5.0  46.0    4.0    0.0   \n",
-       "claude-prediction-online              0.0    0.0    2.0  10.0    7.0    3.0   \n",
-       "prediction-offline                    0.0  267.0    2.0  13.0  302.0  189.0   \n",
-       "prediction-offline-sme                0.0    0.0    0.0   0.0    0.0    0.0   \n",
-       "prediction-online                     0.0   22.0    4.0   5.0   43.0   23.0   \n",
-       "prediction-online-sme                 1.0   27.0   10.0   0.0   71.0    2.0   \n",
-       "prediction-request-rag                0.0    3.0    2.0   0.0    4.0    4.0   \n",
-       "prediction-request-rag-claude         0.0    0.0    1.0  32.0    0.0    0.0   \n",
-       "prediction-request-reasoning          0.0    3.0  103.0   1.0   58.0   97.0   \n",
-       "prediction-request-reasoning-claude   0.0    0.0    0.0   3.0    4.0    0.0   \n",
-       "prediction-url-cot-claude             0.0    2.0    1.0   2.0    0.0    0.0   \n",
-       "\n",
-       "confidence                           0.55   0.60   0.65   0.70   0.75   0.80  \\\n",
-       "tool                                                                           \n",
-       "claude-prediction-offline             0.0   87.0    0.0    0.0    0.0    0.0   \n",
-       "claude-prediction-online              0.0   30.0    0.0    0.0    0.0    0.0   \n",
-       "prediction-offline                    0.0  231.0    3.0    0.0    0.0    0.0   \n",
-       "prediction-offline-sme                0.0    0.0    0.0    0.0    2.0    0.0   \n",
-       "prediction-online                     8.0  670.0   99.0    2.0   76.0   28.0   \n",
-       "prediction-online-sme                 0.0  679.0  234.0   39.0  149.0   76.0   \n",
-       "prediction-request-rag                0.0   25.0    5.0   48.0   11.0   36.0   \n",
-       "prediction-request-rag-claude         0.0  175.0    0.0  513.0    0.0  209.0   \n",
-       "prediction-request-reasoning          0.0  315.0  176.0  441.0  317.0  339.0   \n",
-       "prediction-request-reasoning-claude   0.0   27.0    0.0   38.0    4.0   76.0   \n",
-       "prediction-url-cot-claude             0.0   40.0    0.0   60.0    0.0   22.0   \n",
-       "\n",
-       "confidence                            0.85  0.90  0.95  0.99  1.00  \n",
-       "tool                                                                \n",
-       "claude-prediction-offline              0.0   0.0   0.0   0.0   0.0  \n",
-       "claude-prediction-online               0.0   1.0   0.0   0.0   0.0  \n",
-       "prediction-offline                     1.0   2.0   0.0   0.0   1.0  \n",
-       "prediction-offline-sme                 0.0   0.0   0.0   0.0   0.0  \n",
-       "prediction-online                     55.0  25.0  11.0   0.0  20.0  \n",
-       "prediction-online-sme                109.0  80.0   6.0   0.0  39.0  \n",
-       "prediction-request-rag                57.0  16.0  11.0   1.0  20.0  \n",
-       "prediction-request-rag-claude          3.0  40.0   3.0   0.0   0.0  \n",
-       "prediction-request-reasoning         159.0  44.0  58.0   0.0  97.0  \n",
-       "prediction-request-reasoning-claude    0.0   8.0   1.0   0.0   2.0  \n",
-       "prediction-url-cot-claude              0.0   3.0   0.0   0.0   0.0  "
-      ]
-     },
-     "execution_count": 103,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "all_q = winning_trades_percentage_bottom_50['title'].unique().tolist()\n",
-    "q_losing = tools[tools['prompt_request'].isin(all_q)]\n",
-    "q_losing.groupby(['tool'])['confidence'].value_counts().unstack().fillna(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_question(text: str) -> str:\n",
-    "    \"\"\"Get the question from a text.\"\"\"\n",
-    "    # Regex to find text within double quotes\n",
-    "    pattern = r'\"([^\"]*)\"'\n",
-    "\n",
-    "    # Find all occurrences\n",
-    "    questions = re.findall(pattern, text)\n",
-    "\n",
-    "    # Assuming you want the first question if there are multiple\n",
-    "    question = questions[0] if questions else None\n",
-    "\n",
-    "    return question"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from web3 import Web3\n",
-    "from typing import Optional\n",
-    "import re\n",
-    "import pickle\n",
-    "\n",
-    "def block_number_to_timestamp(block_number: int, web3: Web3) -> str:\n",
-    "    \"\"\"Convert a block number to a timestamp.\"\"\"\n",
-    "    block = web3.eth.get_block(block_number)\n",
-    "    timestamp = datetime.utcfromtimestamp(block[\"timestamp\"])\n",
-    "    return timestamp.strftime(\"%Y-%m-%d %H:%M:%S\")\n",
-    "\n",
-    "\n",
-    "def parallelize_timestamp_conversion(df: pd.DataFrame, function: callable) -> list:\n",
-    "    \"\"\"Parallelize the timestamp conversion.\"\"\"\n",
-    "    block_numbers = df[\"request_block\"].tolist()\n",
-    "    with ThreadPoolExecutor(max_workers=10) as executor:\n",
-    "        results = list(\n",
-    "            tqdm(executor.map(function, block_numbers), total=len(block_numbers))\n",
-    "        )\n",
-    "    return results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def current_answer(text: str, fpmms: pd.DataFrame) -> Optional[str]:\n",
-    "    \"\"\"Get the current answer for a question.\"\"\"\n",
-    "    row = fpmms[fpmms[\"title\"] == text]\n",
-    "    if row.shape[0] == 0:\n",
-    "        return None\n",
-    "    return row[\"currentAnswer\"].values[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from functools import partial\n",
-    "from concurrent.futures import ThreadPoolExecutor\n",
-    "def weekly_analysis():\n",
-    "    rpc = \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\"\n",
-    "    web3 = Web3(Web3.HTTPProvider(rpc))\n",
-    "    # Get currentAnswer from FPMMS\n",
-    "    fpmms = pd.read_parquet('../data/fpmms.parquet')\n",
-    "    tools = pd.read_parquet('../data/tools.parquet')\n",
-    "\n",
-    "    # Get the question from the tools\n",
-    "    print(\"Getting the question and current answer for the tools\")\n",
-    "    tools[\"title\"] = tools[\"prompt_request\"].apply(lambda x: get_question(x))\n",
-    "    tools[\"currentAnswer\"] = tools[\"title\"].apply(lambda x: current_answer(x, fpmms))\n",
-    "\n",
-    "    tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"yes\", \"Yes\")\n",
-    "    tools[\"currentAnswer\"] = tools[\"currentAnswer\"].str.replace(\"no\", \"No\")\n",
-    "\n",
-    "    # Convert block number to timestamp\n",
-    "    print(\"Converting block number to timestamp\")\n",
-    "    t_map = pickle.load(open(\"../data/t_map.pkl\", \"rb\"))\n",
-    "    tools[\"request_time\"] = tools[\"request_block\"].map(t_map)\n",
-    "\n",
-    "    # Identify tools with missing request_time and fill them\n",
-    "    missing_time_indices = tools[tools[\"request_time\"].isna()].index\n",
-    "    if not missing_time_indices.empty:\n",
-    "        partial_block_number_to_timestamp = partial(\n",
-    "            block_number_to_timestamp, web3=web3\n",
-    "        )\n",
-    "        missing_timestamps = parallelize_timestamp_conversion(\n",
-    "            tools.loc[missing_time_indices], partial_block_number_to_timestamp\n",
-    "        )\n",
-    "\n",
-    "        # Update the original DataFrame with the missing timestamps\n",
-    "        for i, timestamp in zip(missing_time_indices, missing_timestamps):\n",
-    "            tools.at[i, \"request_time\"] = timestamp\n",
-    "\n",
-    "    tools[\"request_month_year\"] = pd.to_datetime(tools[\"request_time\"]).dt.strftime(\n",
-    "        \"%Y-%m\"\n",
-    "    )\n",
-    "    tools[\"request_month_year_week\"] = (\n",
-    "        pd.to_datetime(tools[\"request_time\"]).dt.to_period(\"W\").astype(str)\n",
-    "    )\n",
-    "\n",
-    "    # Save the tools data after the updates on the content\n",
-    "    tools.to_parquet('../data/tools.parquet', index=False)\n",
-    "\n",
-    "    # Update t_map with new timestamps\n",
-    "    new_timestamps = (\n",
-    "        tools[[\"request_block\", \"request_time\"]]\n",
-    "        .dropna()\n",
-    "        .set_index(\"request_block\")\n",
-    "        .to_dict()[\"request_time\"]\n",
-    "    )\n",
-    "    t_map.update(new_timestamps)\n",
-    "\n",
-    "    with open(\"../data/t_map.pkl\", \"wb\") as f:\n",
-    "        pickle.dump(t_map, f)\n",
-    "\n",
-    "    # clean and release all memory\n",
-    "    del tools\n",
-    "    del fpmms\n",
-    "    del t_map\n",
-    "    gc.collect()\n",
-    "\n",
-    "    print(\"Weekly analysis files generated and saved\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Getting the question and current answer for the tools\n",
-      "Converting block number to timestamp\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|          | 0/11690 [00:00<?, ?it/s]/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_28372/2484496282.py:9: DeprecationWarning: datetime.datetime.utcfromtimestamp() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.fromtimestamp(timestamp, datetime.UTC).\n",
-      "  timestamp = datetime.utcfromtimestamp(block[\"timestamp\"])\n",
-      "100%|██████████| 11690/11690 [01:40<00:00, 116.87it/s]\n"
-     ]
-    },
-    {
-     "ename": "NameError",
-     "evalue": "name 'gc' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[50], line 10\u001b[0m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mfunctools\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m partial\n\u001b[0;32m---> 10\u001b[0m \u001b[43mweekly_analysis\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
-      "Cell \u001b[0;32mIn[48], line 63\u001b[0m, in \u001b[0;36mweekly_analysis\u001b[0;34m()\u001b[0m\n\u001b[1;32m     61\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m fpmms\n\u001b[1;32m     62\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m t_map\n\u001b[0;32m---> 63\u001b[0m \u001b[43mgc\u001b[49m\u001b[38;5;241m.\u001b[39mcollect()\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWeekly analysis files generated and saved\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'gc' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "import pickle\n",
-    "from datetime import datetime\n",
-    "from concurrent.futures import ThreadPoolExecutor\n",
-    "from tqdm import tqdm\n",
-    "from web3 import Web3\n",
-    "import pandas as pd\n",
-    "from pathlib import Path\n",
-    "from functools import partial\n",
-    "weekly_analysis()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1187"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import gc\n",
-    "gc.collect()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "akash",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

win	tool	tool_accuracy	total_requests
0	claude-prediction-offline	66.308244	279
1	claude-prediction-online	58.914027	1105
2	prediction-offline	67.717915	2283
3	prediction-offline-sme	55.555556	18
4	prediction-online	65.459066	5631
5	prediction-online-sme	67.417656	8167
6	prediction-request-rag	64.217072	1769
7	prediction-request-rag-claude	69.554566	4490
8	prediction-request-reasoning	68.813594	9828
9	prediction-request-reasoning-claude	68.910256	2184
10	prediction-url-cot-claude	64.584980	1265
	min	max
tool
claude-prediction-offline	2024-04-23 13:09:30	2024-06-10 00:31:30
claude-prediction-online	2024-04-12 12:24:20	2024-06-09 21:41:20
prediction-offline	2024-04-12 12:20:10	2024-06-08 23:45:00
prediction-offline-sme	2024-04-16 07:58:45	2024-04-29 20:45:15
prediction-online	2024-04-16 05:52:40	2024-06-09 21:47:20
prediction-online-sme	2024-04-12 11:51:30	2024-06-10 00:06:00
prediction-request-rag	2024-04-12 11:39:40	2024-06-09 21:17:45
prediction-request-rag-claude	2024-04-12 11:14:30	2024-06-07 11:42:30
prediction-request-reasoning	2024-04-12 11:57:05	2024-06-09 21:50:45
prediction-request-reasoning-claude	2024-04-12 11:53:55	2024-06-05 05:00:10
prediction-url-cot-claude	2024-04-12 11:37:15	2024-06-05 05:21:10
	tool	losing_percentage	num_calls
0	prediction-offline	1.000000	40.0
4	prediction-request-rag-claude	1.000000	17.0
7	prediction-url-cot-claude	1.000000	2.0
2	prediction-online-sme	0.656716	67.0
6	prediction-request-reasoning-claude	0.571429	7.0
5	prediction-request-reasoning	0.538462	52.0
3	prediction-request-rag	0.250000	4.0
1	prediction-online	0.185185	27.0
	tool	losing_percentage	num_calls
0	claude-prediction-offline	1.0	2.0
1	prediction-offline	1.0	23.0
2	prediction-online	1.0	14.0
3	prediction-online-sme	1.0	18.0
4	prediction-request-rag	1.0	5.0
5	prediction-request-rag-claude	1.0	8.0
8	prediction-url-cot-claude	1.0	6.0
6	prediction-request-reasoning	0.0	18.0
7	prediction-request-reasoning-claude	0.0	3.0
	tool	losing_percentage	num_calls
1	prediction-online-sme	0.750000	4.0
5	prediction-request-reasoning-claude	0.750000	4.0
2	prediction-request-rag	0.666667	6.0
3	prediction-request-rag-claude	0.500000	2.0
4	prediction-request-reasoning	0.400000	5.0
0	claude-prediction-online	0.000000	1.0
	tool	losing_percentage	num_calls
0	prediction-offline	1.000000	11.0
1	prediction-online	1.000000	17.0
2	prediction-online-sme	1.000000	30.0
4	prediction-request-rag-claude	1.000000	45.0
5	prediction-request-reasoning	0.874016	127.0
3	prediction-request-rag	0.250000	4.0
6	prediction-request-reasoning-claude	0.000000	2.0
confidence	0.00	0.10	0.20	0.30	0.40	0.50	0.55	0.60	0.65	0.70	0.75	0.80	0.85	0.90	0.95	0.99	1.00
tool
claude-prediction-offline	0.0	0.0	5.0	46.0	4.0	0.0	0.0	87.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
claude-prediction-online	0.0	0.0	2.0	10.0	7.0	3.0	0.0	30.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0
prediction-offline	0.0	267.0	2.0	13.0	302.0	189.0	0.0	231.0	3.0	0.0	0.0	0.0	1.0	2.0	0.0	0.0	1.0
prediction-offline-sme	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	2.0	0.0	0.0	0.0	0.0	0.0	0.0
prediction-online	0.0	22.0	4.0	5.0	43.0	23.0	8.0	670.0	99.0	2.0	76.0	28.0	55.0	25.0	11.0	0.0	20.0
prediction-online-sme	1.0	27.0	10.0	0.0	71.0	2.0	0.0	679.0	234.0	39.0	149.0	76.0	109.0	80.0	6.0	0.0	39.0
prediction-request-rag	0.0	3.0	2.0	0.0	4.0	4.0	0.0	25.0	5.0	48.0	11.0	36.0	57.0	16.0	11.0	1.0	20.0
prediction-request-rag-claude	0.0	0.0	1.0	32.0	0.0	0.0	0.0	175.0	0.0	513.0	0.0	209.0	3.0	40.0	3.0	0.0	0.0
prediction-request-reasoning	0.0	3.0	103.0	1.0	58.0	97.0	0.0	315.0	176.0	441.0	317.0	339.0	159.0	44.0	58.0	0.0	97.0
prediction-request-reasoning-claude	0.0	0.0	0.0	3.0	4.0	0.0	0.0	27.0	0.0	38.0	4.0	76.0	0.0	8.0	1.0	0.0	2.0
prediction-url-cot-claude	0.0	2.0	1.0	2.0	0.0	0.0	0.0	40.0	0.0	60.0	0.0	22.0	0.0	3.0	0.0	0.0	0.0