arshy commited on
Commit
52a040b
·
1 Parent(s): 68f06cc

gradio version

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -3
  2. test.ipynb +94 -21
requirements.txt CHANGED
@@ -2,8 +2,6 @@ pandas==2.0.1
2
  matplotlib
3
  huggingface-hub
4
  pyarrow
5
- web3
6
  requests
7
- gradio
8
- apscheduler
9
  pytz
 
2
  matplotlib
3
  huggingface-hub
4
  pyarrow
 
5
  requests
6
+ gradio==4.13.0
 
7
  pytz
test.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -15,7 +15,7 @@
15
  "import datetime\n",
16
  "import pandas as pd\n",
17
  "from collections import defaultdict\n",
18
- "from typing import Any, Union\n",
19
  "from string import Template\n",
20
  "from enum import Enum\n",
21
  "from tqdm import tqdm\n",
@@ -25,35 +25,70 @@
25
  },
26
  {
27
  "cell_type": "code",
28
- "execution_count": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "metadata": {},
30
  "outputs": [
31
  {
32
- "ename": "",
33
- "evalue": "",
34
- "output_type": "error",
35
- "traceback": [
36
- "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
37
- "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
38
- "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
39
- "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
40
  ]
41
  }
42
  ],
43
  "source": [
44
- "tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
45
- "trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  "\n",
47
- "tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
48
- "tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
49
  "\n",
50
- "trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
51
- "trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]"
 
 
52
  ]
53
  },
54
  {
55
  "cell_type": "code",
56
- "execution_count": 5,
57
  "metadata": {},
58
  "outputs": [
59
  {
@@ -64,17 +99,55 @@
64
  " 'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
65
  " 'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
66
  " 'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
67
- " 'roi'],\n",
68
  " dtype='object')"
69
  ]
70
  },
71
- "execution_count": 5,
72
  "metadata": {},
73
  "output_type": "execute_result"
74
  }
75
  ],
76
  "source": [
77
- "trades_df.columns\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  ]
79
  },
80
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 6,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
15
  "import datetime\n",
16
  "import pandas as pd\n",
17
  "from collections import defaultdict\n",
18
+ "from typing import Any, Union, List\n",
19
  "from string import Template\n",
20
  "from enum import Enum\n",
21
  "from tqdm import tqdm\n",
 
25
  },
26
  {
27
  "cell_type": "code",
28
+ "execution_count": 9,
29
+ "metadata": {},
30
+ "outputs": [],
31
+ "source": [
32
+ "INC_TOOLS = [\n",
33
+ " 'prediction-online', \n",
34
+ " 'prediction-offline', \n",
35
+ " 'claude-prediction-online', \n",
36
+ " 'claude-prediction-offline', \n",
37
+ " 'prediction-offline-sme',\n",
38
+ " 'prediction-online-sme',\n",
39
+ " 'prediction-request-rag',\n",
40
+ " 'prediction-request-reasoning',\n",
41
+ " 'prediction-url-cot-claude', \n",
42
+ " 'prediction-request-rag-claude',\n",
43
+ " 'prediction-request-reasoning-claude'\n",
44
+ "]"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "execution_count": 2,
50
  "metadata": {},
51
  "outputs": [
52
  {
53
+ "name": "stderr",
54
+ "output_type": "stream",
55
+ "text": [
56
+ "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
57
+ " trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
58
+ "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
59
+ " trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n"
 
60
  ]
61
  }
62
  ],
63
  "source": [
64
+ "def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:\n",
65
+ " \"\"\"Prepares the trades data for analysis.\"\"\"\n",
66
+ " trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
67
+ " trades_df['creation_timestamp'] = trades_df['creation_timestamp'].dt.tz_convert('UTC')\n",
68
+ " trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
69
+ " trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n",
70
+ " trades_df['winning_trade'] = trades_df['winning_trade'].astype(int)\n",
71
+ " return trades_df\n",
72
+ "\n",
73
+ "def prepare_data():\n",
74
+ " tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
75
+ " trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
76
+ "\n",
77
+ " tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
78
+ " tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
79
  "\n",
80
+ " trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
81
+ " trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]\n",
82
  "\n",
83
+ " trades_df = prepare_trades(trades_df)\n",
84
+ " return tools_df, trades_df\n",
85
+ "\n",
86
+ "tools_df, trades_df = prepare_data()"
87
  ]
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": 4,
92
  "metadata": {},
93
  "outputs": [
94
  {
 
99
  " 'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
100
  " 'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
101
  " 'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
102
+ " 'roi', 'month_year', 'month_year_week'],\n",
103
  " dtype='object')"
104
  ]
105
  },
106
+ "execution_count": 4,
107
  "metadata": {},
108
  "output_type": "execute_result"
109
  }
110
  ],
111
  "source": [
112
+ "trades_df.columns"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 21,
118
+ "metadata": {},
119
+ "outputs": [],
120
+ "source": [
121
+ "def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:\n",
122
+ " \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
123
+ " tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]\n",
124
+ " error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack(fill_value=0).reset_index()\n",
125
+ " error['error_perc'] = (error[True] / (error[False] + error[True])) * 100\n",
126
+ " error['total_requests'] = error[False] + error[True]\n",
127
+ " return error\n",
128
+ "\n",
129
+ "def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:\n",
130
+ " \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
131
+ " error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()\n",
132
+ " error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100\n",
133
+ " error_total.columns = error_total.columns.astype(str)\n",
134
+ " error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))\n",
135
+ " return error_total"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": 22,
141
+ "metadata": {},
142
+ "outputs": [],
143
+ "source": [
144
+ "error_df = get_error_data(\n",
145
+ " tools_df=tools_df,\n",
146
+ " inc_tools=INC_TOOLS\n",
147
+ ")\n",
148
+ "error_overall_df = get_error_data_overall(\n",
149
+ " error_df=error_df\n",
150
+ ")"
151
  ]
152
  },
153
  {