{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import gc\n", "sns.set_style(\"darkgrid\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'scripts'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mscripts\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mget_mech_info\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m update_fpmmTrades_parquet\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'scripts'" ] } ], "source": [ "from scripts.get_mech_info import update_fpmmTrades_parquet" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "fpmms = pd.read_parquet('../data/all_fpmms.parquet')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "markets = pd.read_parquet('../data/fpmms.parquet')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "trades_data = pd.read_parquet('../tmp/fpmmTrades.parquet')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "trades_filename = \"new_fpmmTrades.parquet\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "new_trades = pd.read_parquet(\"../tmp/new_fpmmTrades.parquet\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "merge_df = pd.concat([trades_data, new_trades], ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "merge_df[\"fpmm.arbitrationOccurred\"] = merge_df[\"fpmm.arbitrationOccurred\"].astype(\n", " bool\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "merge_df[\"fpmm.isPendingArbitration\"] = merge_df[\n", " \"fpmm.isPendingArbitration\"\n", " ].astype(bool)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Initial length before removing duplicates in fpmmTrades= 123556\n" ] } ], "source": [ " print(f\"Initial length before removing duplicates in fpmmTrades= {len(merge_df)}\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Final length after removing duplicates in fpmmTrades= 117771\n" ] } ], "source": [ "merge_df.drop_duplicates(\"id\", inplace=True)\n", "print(f\"Final length after removing duplicates in fpmmTrades= {len(merge_df)}\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "merge_df.to_parquet(\"../tmp/fpmmTrades.parquet\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n", " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n", " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n", " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n", " 'transactionHash', 'type', 'market_creator',\n", " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n", " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n", " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n", " 'fpmm.condition.id'],\n", " dtype='object')" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trades_data.columns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "102664" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(trades_data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "max(fpmmsTra)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 4688 entries, 0 to 4687\n", "Data columns (total 4 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 currentAnswer 4688 non-null object\n", " 1 id 4688 non-null object\n", " 2 title 4688 non-null object\n", " 3 market_creator 4688 non-null object\n", "dtypes: object(4)\n", "memory usage: 146.6+ KB\n" ] } ], "source": [ "markets.info()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "currentAnswer\n", "No 2771\n", "Yes 1914\n", "no 1\n", "False 1\n", "IND 1\n", "Name: count, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "markets.currentAnswer.value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
currentAnsweridtitlemarket_creator
0No0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5Will the first floating offshore wind research...quickstart
1No0x0020d13c89140b47e10db54cbd53852b90bc1391Will the Francis Scott Key Bridge in Baltimore...quickstart
2No0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07Will FC Saarbrucken reach the final of the Ger...quickstart
3Yes0x004c8d4c619dc6b9caa940f5ea7ef699ae85359cWill the pro-life activists convicted for 'con...quickstart
4Yes0x005e3f7a90585acbec807425a750fbba1d0c2b5cWill Apple announce the release of a new M4 ch...quickstart
\n", "
" ], "text/plain": [ " currentAnswer id \\\n", "0 No 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 \n", "1 No 0x0020d13c89140b47e10db54cbd53852b90bc1391 \n", "2 No 0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07 \n", "3 Yes 0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c \n", "4 Yes 0x005e3f7a90585acbec807425a750fbba1d0c2b5c \n", "\n", " title market_creator \n", "0 Will the first floating offshore wind research... quickstart \n", "1 Will the Francis Scott Key Bridge in Baltimore... quickstart \n", "2 Will FC Saarbrucken reach the final of the Ger... quickstart \n", "3 Will the pro-life activists convicted for 'con... quickstart \n", "4 Will Apple announce the release of a new M4 ch... quickstart " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpmms.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "currentAnswer\n", "No 2583\n", "Yes 1833\n", "no 1\n", "False 1\n", "IND 1\n", "Name: count, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpmms.currentAnswer.value_counts()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "market_creator\n", "quickstart 4252\n", "pearl 167\n", "Name: count, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpmms.market_creator.value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 4419 entries, 0 to 4418\n", "Data columns (total 4 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 currentAnswer 4419 non-null object\n", " 1 id 4419 non-null object\n", " 2 title 4419 non-null object\n", " 3 market_creator 4419 non-null object\n", "dtypes: object(4)\n", "memory usage: 138.2+ KB\n" ] } ], "source": [ "fpmms.info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "currentAnswer\n", "No 2495\n", "Yes 1746\n", "no 1\n", "False 1\n", "IND 1\n", "Name: count, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpmms.currentAnswer.value_counts()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "currentAnswer\n", "No 2170\n", "Yes 1500\n", "no 1\n", "False 1\n", "IND 1\n", "Name: count, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fpmms.currentAnswer.value_counts()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 3673 entries, 0 to 3672\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 id 3673 non-null object\n", " 1 currentAnswer 3673 non-null object\n", " 2 title 3673 non-null object\n", "dtypes: object(3)\n", "memory usage: 86.2+ KB\n" ] } ], "source": [ "fpmms.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "all_trades = pd.read_parquet('../data/all_trades_profitability.parquet')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "is_invalid\n", "False 23830\n", "True 3877\n", "Name: count, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_trades.is_invalid.value_counts()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24722" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mask = (all_trades[\"is_invalid\"] & all_trades[\"redeemed\"])\n", "filtered_trades = all_trades[~mask]\n", "len(filtered_trades)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "27707" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(all_trades)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24722" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(filtered_trades)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "winning_trade\n", "True 13133\n", "False 11589\n", "Name: count, dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_trades.winning_trade.value_counts()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_82376/982645160.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " filtered_trades[\"creation_timestamp\"] = pd.to_datetime(filtered_trades[\"creation_timestamp\"])\n" ] } ], "source": [ "filtered_trades[\"creation_timestamp\"] = pd.to_datetime(filtered_trades[\"creation_timestamp\"])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "current_answer\n", " 1 13016\n", " 0 10814\n", "-1 892\n", "Name: count, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_trades.current_answer.value_counts()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "203" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(list(all_trades.trader_address.unique()))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "27707" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(all_trades)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_70112/183699308.py:1: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", " all_trades['month_year'] = all_trades['creation_timestamp'].dt.to_period('M').astype(str)\n", "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_70112/183699308.py:2: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n", " all_trades['month_year_week'] = all_trades['creation_timestamp'].dt.to_period('W').astype(str)\n" ] }, { "data": { "text/plain": [ "winning_trade\n", "0 14574\n", "1 13133\n", "Name: count, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_trades['month_year'] = all_trades['creation_timestamp'].dt.to_period('M').astype(str)\n", "all_trades['month_year_week'] = all_trades['creation_timestamp'].dt.to_period('W').astype(str)\n", "all_trades['winning_trade'] = all_trades['winning_trade'].astype(int)\n", "all_trades.winning_trade.value_counts()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
month_year_weekwinning_trade
02024-04-22/2024-04-2860.465116
12024-04-29/2024-05-0553.887043
22024-05-06/2024-05-1249.626201
32024-05-13/2024-05-1947.931617
42024-05-20/2024-05-2646.209810
52024-05-27/2024-06-0241.855369
62024-06-03/2024-06-0943.714888
72024-06-10/2024-06-1646.697039
82024-06-17/2024-06-2352.762120
\n", "
" ], "text/plain": [ " month_year_week winning_trade\n", "0 2024-04-22/2024-04-28 60.465116\n", "1 2024-04-29/2024-05-05 53.887043\n", "2 2024-05-06/2024-05-12 49.626201\n", "3 2024-05-13/2024-05-19 47.931617\n", "4 2024-05-20/2024-05-26 46.209810\n", "5 2024-05-27/2024-06-02 41.855369\n", "6 2024-06-03/2024-06-09 43.714888\n", "7 2024-06-10/2024-06-16 46.697039\n", "8 2024-06-17/2024-06-23 52.762120" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "winning_trades = all_trades.groupby(['month_year_week'])['winning_trade'].sum() / all_trades.groupby(['month_year_week'])['winning_trade'].count() * 100\n", "# winning_trades is a series, give it a dataframe\n", "winning_trades = winning_trades.reset_index()\n", "winning_trades.columns = winning_trades.columns.astype(str)\n", "winning_trades.columns = ['month_year_week', 'winning_trade']\n", "winning_trades" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
month_year_weekwinning_trade
62024-06-03/2024-06-0943.714888
\n", "
" ], "text/plain": [ " month_year_week winning_trade\n", "6 2024-06-03/2024-06-09 43.714888" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "that_week = winning_trades[winning_trades[\"month_year_week\"]==\"2024-06-03/2024-06-09\"]\n", "that_week" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
month_year_weeksumcount
02024-04-22/2024-04-282643
12024-04-29/2024-05-0516223010
22024-05-06/2024-05-1227885618
32024-05-13/2024-05-1922714738
42024-05-20/2024-05-2619694261
52024-05-27/2024-06-0217194107
62024-06-03/2024-06-0912452848
72024-06-10/2024-06-1610252195
82024-06-17/2024-06-23468887
\n", "
" ], "text/plain": [ " month_year_week sum count\n", "0 2024-04-22/2024-04-28 26 43\n", "1 2024-04-29/2024-05-05 1622 3010\n", "2 2024-05-06/2024-05-12 2788 5618\n", "3 2024-05-13/2024-05-19 2271 4738\n", "4 2024-05-20/2024-05-26 1969 4261\n", "5 2024-05-27/2024-06-02 1719 4107\n", "6 2024-06-03/2024-06-09 1245 2848\n", "7 2024-06-10/2024-06-16 1025 2195\n", "8 2024-06-17/2024-06-23 468 887" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "winning_trades2 = all_trades.groupby(['month_year_week'])['winning_trade'].agg([\"sum\",\"count\"]).reset_index()\n", "winning_trades2" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
month_year_weeksumcountwinning_trade
62024-06-03/2024-06-091245284843.714888
\n", "
" ], "text/plain": [ " month_year_week sum count winning_trade\n", "6 2024-06-03/2024-06-09 1245 2848 43.714888" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "that_week = winning_trades2[winning_trades2[\"month_year_week\"]==\"2024-06-03/2024-06-09\"]\n", "that_week" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "INC_TOOLS = [\n", " \"prediction-online\",\n", " \"prediction-offline\",\n", " \"claude-prediction-online\",\n", " \"claude-prediction-offline\",\n", " \"prediction-offline-sme\",\n", " \"prediction-online-sme\",\n", " \"prediction-request-rag\",\n", " \"prediction-request-reasoning\",\n", " \"prediction-url-cot-claude\",\n", " \"prediction-request-rag-claude\",\n", " \"prediction-request-reasoning-claude\",\n", "]" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "tools = pd.read_parquet('../data/tools.parquet')" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 127674 entries, 0 to 127673\n", "Data columns (total 22 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 request_id 127674 non-null object \n", " 1 request_block 127674 non-null int64 \n", " 2 prompt_request 127674 non-null object \n", " 3 tool 127674 non-null object \n", " 4 nonce 127674 non-null object \n", " 5 trader_address 127674 non-null object \n", " 6 deliver_block 127674 non-null int64 \n", " 7 error 127668 non-null float64\n", " 8 error_message 19534 non-null object \n", " 9 prompt_response 120607 non-null object \n", " 10 mech_address 127674 non-null object \n", " 11 p_yes 108134 non-null float64\n", " 12 p_no 108134 non-null float64\n", " 13 confidence 108134 non-null float64\n", " 14 info_utility 108134 non-null float64\n", " 15 vote 94137 non-null object \n", " 16 win_probability 108134 non-null float64\n", " 17 title 118074 non-null object \n", " 18 currentAnswer 88330 non-null object \n", " 19 request_time 127674 non-null object \n", " 20 request_month_year 127674 non-null object \n", " 21 request_month_year_week 127674 non-null object \n", "dtypes: float64(6), int64(2), object(14)\n", "memory usage: 21.4+ MB\n" ] } ], "source": [ "tools.info()" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "currentAnswer\n", "No 51140\n", "Yes 37190\n", "Name: count, dtype: int64" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tools.currentAnswer.value_counts()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "127674" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(tools)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "tools_inc = tools[tools['tool'].isin(INC_TOOLS)]\n", "tools_non_error = tools_inc[tools_inc['error'] != 1]\n", "tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})\n", "tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]\n", "tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]\n", "tools_non_error['win'] = (tools_non_error['currentAnswer'] == tools_non_error['vote']).astype(int)\n", "tools_non_error.columns = tools_non_error.columns.astype(str)\n", "wins = tools_non_error.groupby(['tool', 'request_month_year_week', 'win']).size().unstack().fillna(0)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
win01
toolrequest_month_year_week
claude-prediction-offline2024-04-22/2024-04-2814.023.0
2024-04-29/2024-05-0534.099.0
2024-05-06/2024-05-1222.034.0
2024-05-13/2024-05-1940.052.0
2024-05-20/2024-05-2618.052.0
............
prediction-url-cot-claude2024-05-06/2024-05-1267.091.0
2024-05-13/2024-05-1928.043.0
2024-05-20/2024-05-2664.0145.0
2024-05-27/2024-06-0281.0112.0
2024-06-03/2024-06-097.041.0
\n", "

91 rows × 2 columns

\n", "
" ], "text/plain": [ "win 0 1\n", "tool request_month_year_week \n", "claude-prediction-offline 2024-04-22/2024-04-28 14.0 23.0\n", " 2024-04-29/2024-05-05 34.0 99.0\n", " 2024-05-06/2024-05-12 22.0 34.0\n", " 2024-05-13/2024-05-19 40.0 52.0\n", " 2024-05-20/2024-05-26 18.0 52.0\n", "... ... ...\n", "prediction-url-cot-claude 2024-05-06/2024-05-12 67.0 91.0\n", " 2024-05-13/2024-05-19 28.0 43.0\n", " 2024-05-20/2024-05-26 64.0 145.0\n", " 2024-05-27/2024-06-02 81.0 112.0\n", " 2024-06-03/2024-06-09 7.0 41.0\n", "\n", "[91 rows x 2 columns]" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wins" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "186" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "selected_traders = list(tools.trader_address.unique())\n", "len(selected_traders)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "182" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(list(tools_non_error.trader_address.unique()))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10817" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(tools)-len(tools_inc)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11778" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tools_week = tools_non_error[tools_non_error[\"request_month_year_week\"]==\"2024-06-03/2024-06-09\"]\n", "len(tools_week)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_trades = all_trades.loc[all_trades[\"trader_address\"].isin(selected_traders)]\n", "len(filtered_trades)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "all_addresses = list(all_trades.trader_address.unique())" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "for a in all_addresses:\n", " if a in selected_traders:\n", " print(\"found\")" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "for a in selected_traders:\n", " if a in all_addresses:\n", " print(\"found\")" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filtered_tools = tools[tools[\"trader_address\"].isin(all_addresses)]\n", "len(filtered_tools)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 27707.000000\n", "mean 3.912224\n", "std 4.622220\n", "min 0.000000\n", "25% 1.000000\n", "50% 2.000000\n", "75% 5.000000\n", "max 66.000000\n", "Name: num_mech_calls, dtype: float64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_trades.num_mech_calls.describe()" ] } ], "metadata": { "kernelspec": { "display_name": "hf_dashboards", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }