Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

arshy commited on May 24, 2024

Commit

52a040b

1 Parent(s): 68f06cc

gradio version

Browse files

Files changed (2) hide show

requirements.txt +1 -3
test.ipynb +94 -21

requirements.txt CHANGED Viewed

@@ -2,8 +2,6 @@ pandas==2.0.1
 matplotlib
 huggingface-hub
 pyarrow
-web3
 requests
-gradio
-apscheduler
 pytz

 matplotlib
 huggingface-hub
 pyarrow
 requests
+gradio==4.13.0
 pytz

test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,7 +15,7 @@
     "import datetime\n",
     "import pandas as pd\n",
     "from collections import defaultdict\n",
-    "from typing import Any, Union\n",
     "from string import Template\n",
     "from enum import Enum\n",
     "from tqdm import tqdm\n",
@@ -25,35 +25,70 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
-     "ename": "",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
-      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
-      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
-      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
      ]
     }
    ],
    "source": [
-    "tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
-    "trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
     "\n",
-    "tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
-    "tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
     "\n",
-    "trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
-    "trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -64,17 +99,55 @@
        "       'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
        "       'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
        "       'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
-       "       'roi'],\n",
        "      dtype='object')"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "trades_df.columns\n"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "import datetime\n",
     "import pandas as pd\n",
     "from collections import defaultdict\n",
+    "from typing import Any, Union, List\n",
     "from string import Template\n",
     "from enum import Enum\n",
     "from tqdm import tqdm\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INC_TOOLS = [\n",
+    "    'prediction-online', \n",
+    "    'prediction-offline', \n",
+    "    'claude-prediction-online', \n",
+    "    'claude-prediction-offline', \n",
+    "    'prediction-offline-sme',\n",
+    "    'prediction-online-sme',\n",
+    "    'prediction-request-rag',\n",
+    "    'prediction-request-reasoning',\n",
+    "    'prediction-url-cot-claude', \n",
+    "    'prediction-request-rag-claude',\n",
+    "    'prediction-request-reasoning-claude'\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
+      "  trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
+      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
+      "  trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n"
      ]
     }
    ],
    "source": [
+    "def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:\n",
+    "    \"\"\"Prepares the trades data for analysis.\"\"\"\n",
+    "    trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
+    "    trades_df['creation_timestamp'] = trades_df['creation_timestamp'].dt.tz_convert('UTC')\n",
+    "    trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
+    "    trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n",
+    "    trades_df['winning_trade'] = trades_df['winning_trade'].astype(int)\n",
+    "    return trades_df\n",
+    "\n",
+    "def prepare_data():\n",
+    "    tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
+    "    trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
+    "\n",
+    "    tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
+    "    tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
     "\n",
+    "    trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
+    "    trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]\n",
     "\n",
+    "    trades_df = prepare_trades(trades_df)\n",
+    "    return tools_df, trades_df\n",
+    "\n",
+    "tools_df, trades_df = prepare_data()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
        "       'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
        "       'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
        "       'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
+       "       'roi', 'month_year', 'month_year_week'],\n",
        "      dtype='object')"
       ]
      },
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "trades_df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:\n",
+    "    \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
+    "    tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]\n",
+    "    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack(fill_value=0).reset_index()\n",
+    "    error['error_perc'] = (error[True] / (error[False] + error[True])) * 100\n",
+    "    error['total_requests'] = error[False] + error[True]\n",
+    "    return error\n",
+    "\n",
+    "def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:\n",
+    "    \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
+    "    error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()\n",
+    "    error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100\n",
+    "    error_total.columns = error_total.columns.astype(str)\n",
+    "    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))\n",
+    "    return error_total"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "error_df = get_error_data(\n",
+    "    tools_df=tools_df,\n",
+    "    inc_tools=INC_TOOLS\n",
+    ")\n",
+    "error_overall_df = get_error_data_overall(\n",
+    "    error_df=error_df\n",
+    ")"
    ]
   },
   {