{ "cells": [ { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from datetime import datetime\n", "from tqdm import tqdm\n", "\n", "import time\n", "import requests\n", "import datetime\n", "import pandas as pd\n", "from collections import defaultdict\n", "from typing import Any, Union, List\n", "from string import Template\n", "from enum import Enum\n", "from tqdm import tqdm\n", "import numpy as np\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "INC_TOOLS = [\n", " 'prediction-online', \n", " 'prediction-offline', \n", " 'claude-prediction-online', \n", " 'claude-prediction-offline', \n", " 'prediction-offline-sme',\n", " 'prediction-online-sme',\n", " 'prediction-request-rag',\n", " 'prediction-request-reasoning',\n", " 'prediction-url-cot-claude', \n", " 'prediction-request-rag-claude',\n", " 'prediction-request-reasoning-claude'\n", "]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", " trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n", "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", " trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n" ] } ], "source": [ "def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:\n", " \"\"\"Prepares the trades data for analysis.\"\"\"\n", " trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n", " trades_df['creation_timestamp'] = trades_df['creation_timestamp'].dt.tz_convert('UTC')\n", " trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n", " trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n", " trades_df['winning_trade'] = trades_df['winning_trade'].astype(int)\n", " return trades_df\n", "\n", "def prepare_data():\n", " tools_df = pd.read_parquet(\"./data/tools.parquet\")\n", " trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n", "\n", " tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n", " tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n", "\n", " trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n", " trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]\n", "\n", " trades_df = prepare_trades(trades_df)\n", " return tools_df, trades_df\n", "\n", "tools_df, trades_df = prepare_data()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['trader_address', 'trade_id', 'creation_timestamp', 'title',\n", " 'market_status', 'collateral_amount', 'outcome_index',\n", " 'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n", " 'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n", " 'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n", " 'roi', 'month_year', 'month_year_week'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trades_df.columns" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:\n", " \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n", " tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]\n", " error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack(fill_value=0).reset_index()\n", " error['error_perc'] = (error[True] / (error[False] + error[True])) * 100\n", " error['total_requests'] = error[False] + error[True]\n", " return error\n", "\n", "def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:\n", " \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n", " error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()\n", " error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100\n", " error_total.columns = error_total.columns.astype(str)\n", " error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))\n", " return error_total" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "error_df = get_error_data(\n", " tools_df=tools_df,\n", " inc_tools=INC_TOOLS\n", ")\n", "error_overall_df = get_error_data_overall(\n", " error_df=error_df\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "akash", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }