gradio version
Browse files- requirements.txt +1 -3
- test.ipynb +94 -21
requirements.txt
CHANGED
@@ -2,8 +2,6 @@ pandas==2.0.1
|
|
2 |
matplotlib
|
3 |
huggingface-hub
|
4 |
pyarrow
|
5 |
-
web3
|
6 |
requests
|
7 |
-
gradio
|
8 |
-
apscheduler
|
9 |
pytz
|
|
|
2 |
matplotlib
|
3 |
huggingface-hub
|
4 |
pyarrow
|
|
|
5 |
requests
|
6 |
+
gradio==4.13.0
|
|
|
7 |
pytz
|
test.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -15,7 +15,7 @@
|
|
15 |
"import datetime\n",
|
16 |
"import pandas as pd\n",
|
17 |
"from collections import defaultdict\n",
|
18 |
-
"from typing import Any, Union\n",
|
19 |
"from string import Template\n",
|
20 |
"from enum import Enum\n",
|
21 |
"from tqdm import tqdm\n",
|
@@ -25,35 +25,70 @@
|
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "code",
|
28 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
"metadata": {},
|
30 |
"outputs": [
|
31 |
{
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
|
36 |
-
"
|
37 |
-
"
|
38 |
-
"
|
39 |
-
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
40 |
]
|
41 |
}
|
42 |
],
|
43 |
"source": [
|
44 |
-
"
|
45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
"\n",
|
47 |
-
"
|
48 |
-
"
|
49 |
"\n",
|
50 |
-
"trades_df
|
51 |
-
"
|
|
|
|
|
52 |
]
|
53 |
},
|
54 |
{
|
55 |
"cell_type": "code",
|
56 |
-
"execution_count":
|
57 |
"metadata": {},
|
58 |
"outputs": [
|
59 |
{
|
@@ -64,17 +99,55 @@
|
|
64 |
" 'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
|
65 |
" 'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
|
66 |
" 'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
|
67 |
-
" 'roi'],\n",
|
68 |
" dtype='object')"
|
69 |
]
|
70 |
},
|
71 |
-
"execution_count":
|
72 |
"metadata": {},
|
73 |
"output_type": "execute_result"
|
74 |
}
|
75 |
],
|
76 |
"source": [
|
77 |
-
"trades_df.columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
]
|
79 |
},
|
80 |
{
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 6,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
15 |
"import datetime\n",
|
16 |
"import pandas as pd\n",
|
17 |
"from collections import defaultdict\n",
|
18 |
+
"from typing import Any, Union, List\n",
|
19 |
"from string import Template\n",
|
20 |
"from enum import Enum\n",
|
21 |
"from tqdm import tqdm\n",
|
|
|
25 |
},
|
26 |
{
|
27 |
"cell_type": "code",
|
28 |
+
"execution_count": 9,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [],
|
31 |
+
"source": [
|
32 |
+
"INC_TOOLS = [\n",
|
33 |
+
" 'prediction-online', \n",
|
34 |
+
" 'prediction-offline', \n",
|
35 |
+
" 'claude-prediction-online', \n",
|
36 |
+
" 'claude-prediction-offline', \n",
|
37 |
+
" 'prediction-offline-sme',\n",
|
38 |
+
" 'prediction-online-sme',\n",
|
39 |
+
" 'prediction-request-rag',\n",
|
40 |
+
" 'prediction-request-reasoning',\n",
|
41 |
+
" 'prediction-url-cot-claude', \n",
|
42 |
+
" 'prediction-request-rag-claude',\n",
|
43 |
+
" 'prediction-request-reasoning-claude'\n",
|
44 |
+
"]"
|
45 |
+
]
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"cell_type": "code",
|
49 |
+
"execution_count": 2,
|
50 |
"metadata": {},
|
51 |
"outputs": [
|
52 |
{
|
53 |
+
"name": "stderr",
|
54 |
+
"output_type": "stream",
|
55 |
+
"text": [
|
56 |
+
"/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
|
57 |
+
" trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
|
58 |
+
"/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
|
59 |
+
" trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n"
|
|
|
60 |
]
|
61 |
}
|
62 |
],
|
63 |
"source": [
|
64 |
+
"def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:\n",
|
65 |
+
" \"\"\"Prepares the trades data for analysis.\"\"\"\n",
|
66 |
+
" trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
|
67 |
+
" trades_df['creation_timestamp'] = trades_df['creation_timestamp'].dt.tz_convert('UTC')\n",
|
68 |
+
" trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
|
69 |
+
" trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n",
|
70 |
+
" trades_df['winning_trade'] = trades_df['winning_trade'].astype(int)\n",
|
71 |
+
" return trades_df\n",
|
72 |
+
"\n",
|
73 |
+
"def prepare_data():\n",
|
74 |
+
" tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
|
75 |
+
" trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
|
76 |
+
"\n",
|
77 |
+
" tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
|
78 |
+
" tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
|
79 |
"\n",
|
80 |
+
" trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
|
81 |
+
" trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]\n",
|
82 |
"\n",
|
83 |
+
" trades_df = prepare_trades(trades_df)\n",
|
84 |
+
" return tools_df, trades_df\n",
|
85 |
+
"\n",
|
86 |
+
"tools_df, trades_df = prepare_data()"
|
87 |
]
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 4,
|
92 |
"metadata": {},
|
93 |
"outputs": [
|
94 |
{
|
|
|
99 |
" 'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
|
100 |
" 'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
|
101 |
" 'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
|
102 |
+
" 'roi', 'month_year', 'month_year_week'],\n",
|
103 |
" dtype='object')"
|
104 |
]
|
105 |
},
|
106 |
+
"execution_count": 4,
|
107 |
"metadata": {},
|
108 |
"output_type": "execute_result"
|
109 |
}
|
110 |
],
|
111 |
"source": [
|
112 |
+
"trades_df.columns"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"cell_type": "code",
|
117 |
+
"execution_count": 21,
|
118 |
+
"metadata": {},
|
119 |
+
"outputs": [],
|
120 |
+
"source": [
|
121 |
+
"def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:\n",
|
122 |
+
" \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
|
123 |
+
" tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]\n",
|
124 |
+
" error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack(fill_value=0).reset_index()\n",
|
125 |
+
" error['error_perc'] = (error[True] / (error[False] + error[True])) * 100\n",
|
126 |
+
" error['total_requests'] = error[False] + error[True]\n",
|
127 |
+
" return error\n",
|
128 |
+
"\n",
|
129 |
+
"def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:\n",
|
130 |
+
" \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
|
131 |
+
" error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()\n",
|
132 |
+
" error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100\n",
|
133 |
+
" error_total.columns = error_total.columns.astype(str)\n",
|
134 |
+
" error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))\n",
|
135 |
+
" return error_total"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"cell_type": "code",
|
140 |
+
"execution_count": 22,
|
141 |
+
"metadata": {},
|
142 |
+
"outputs": [],
|
143 |
+
"source": [
|
144 |
+
"error_df = get_error_data(\n",
|
145 |
+
" tools_df=tools_df,\n",
|
146 |
+
" inc_tools=INC_TOOLS\n",
|
147 |
+
")\n",
|
148 |
+
"error_overall_df = get_error_data_overall(\n",
|
149 |
+
" error_df=error_df\n",
|
150 |
+
")"
|
151 |
]
|
152 |
},
|
153 |
{
|