rosacastillo commited on
Commit
c255cf4
·
1 Parent(s): dbb4d06

added markets creator info for the tools tab

Browse files
Files changed (6) hide show
  1. app.py +18 -26
  2. scripts/markets.py +28 -0
  3. scripts/tools.py +6 -1
  4. tabs/metrics.py +7 -2
  5. tabs/tool_win.py +109 -31
  6. tabs/trades.py +0 -34
app.py CHANGED
@@ -1,8 +1,6 @@
1
  from datetime import datetime, timedelta
2
  import gradio as gr
3
- import matplotlib.pyplot as plt
4
  import pandas as pd
5
- import seaborn as sns
6
  import duckdb
7
  import logging
8
  from tabs.trades import (
@@ -11,10 +9,6 @@ from tabs.trades import (
11
  get_overall_by_market_trades,
12
  get_overall_winning_trades,
13
  get_overall_winning_by_market_trades,
14
- plot_trades_by_week,
15
- plot_trades_per_market_by_week,
16
- plot_winning_trades_by_week,
17
- plot_winning_trades_per_market_by_week,
18
  integrated_plot_trades_per_market_by_week,
19
  integrated_plot_winning_trades_per_market_by_week,
20
  )
@@ -31,24 +25,20 @@ from tabs.metrics import (
31
  )
32
 
33
  from tabs.tool_win import (
34
- get_tool_winning_rate,
35
  get_tool_winning_rate_by_market,
36
- get_overall_winning_rate,
37
- plot_tool_winnings_overall,
38
- plot_tool_winnings_by_tool,
39
  )
40
 
41
  from tabs.tool_accuracy import (
42
  plot_tools_weighted_accuracy_rotated_graph,
43
  plot_tools_accuracy_rotated_graph,
44
  compute_weighted_accuracy,
45
- plot_tools_accuracy_graph,
46
- plot_tools_weighted_accuracy_graph,
47
  )
48
 
49
  from tabs.invalid_markets import (
50
  plot_daily_dist_invalid_trades,
51
- plot_ratio_invalid_trades_per_market,
52
  plot_top_invalid_markets,
53
  plot_daily_nr_invalid_markets,
54
  )
@@ -160,9 +150,7 @@ def prepare_data():
160
  tools_df, trades_df, tools_accuracy_info, invalid_trades = get_all_data()
161
  print(trades_df.info())
162
 
163
- tools_df["request_time"] = pd.to_datetime(tools_df["request_time"])
164
- trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
165
-
166
  trades_df = prepare_trades(trades_df)
167
 
168
  tools_accuracy_info = compute_weighted_accuracy(tools_accuracy_info)
@@ -184,8 +172,8 @@ demo = gr.Blocks()
184
 
185
  error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
186
  error_overall_df = get_error_data_overall(error_df=error_df)
187
- winning_rate_df = get_tool_winning_rate(tools_df=tools_df, inc_tools=INC_TOOLS)
188
- winning_rate_overall_df = get_overall_winning_rate(wins_df=winning_rate_df)
189
  trades_count_df = get_overall_trades(trades_df=trades_df)
190
  trades_winning_rate_df = get_overall_winning_trades(trades_df=trades_df)
191
  trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
@@ -261,20 +249,20 @@ with demo:
261
  with gr.Row():
262
  winning_selector = gr.Dropdown(
263
  label="Select the tool metric",
264
- choices=tool_metric_choices,
265
  value=default_tool_metric,
266
  )
267
 
268
  with gr.Row():
269
  # plot_tool_metrics
270
- winning_plot = plot_tool_winnings_overall(
271
- wins_df=winning_rate_overall_df,
272
  winning_selector=default_tool_metric,
273
  )
274
 
275
  def update_tool_winnings_overall_plot(winning_selector):
276
- return plot_tool_winnings_overall(
277
- wins_df=winning_rate_overall_df, winning_selector=winning_selector
278
  )
279
 
280
  winning_selector.change(
@@ -297,12 +285,16 @@ with demo:
297
  )
298
 
299
  with gr.Row():
300
- tool_winnings_by_tool_plot = plot_tool_winnings_by_tool(
301
- wins_df=winning_rate_df, tool=INC_TOOLS[0]
 
 
302
  )
303
 
304
  def update_tool_winnings_by_tool_plot(tool):
305
- return plot_tool_winnings_by_tool(wins_df=winning_rate_df, tool=tool)
 
 
306
 
307
  sel_tool.change(
308
  update_tool_winnings_by_tool_plot,
 
1
  from datetime import datetime, timedelta
2
  import gradio as gr
 
3
  import pandas as pd
 
4
  import duckdb
5
  import logging
6
  from tabs.trades import (
 
9
  get_overall_by_market_trades,
10
  get_overall_winning_trades,
11
  get_overall_winning_by_market_trades,
 
 
 
 
12
  integrated_plot_trades_per_market_by_week,
13
  integrated_plot_winning_trades_per_market_by_week,
14
  )
 
25
  )
26
 
27
  from tabs.tool_win import (
28
+ prepare_tools,
29
  get_tool_winning_rate_by_market,
30
+ integrated_plot_tool_winnings_overall_per_market_by_week,
31
+ integrated_tool_winnings_by_tool_per_market,
 
32
  )
33
 
34
  from tabs.tool_accuracy import (
35
  plot_tools_weighted_accuracy_rotated_graph,
36
  plot_tools_accuracy_rotated_graph,
37
  compute_weighted_accuracy,
 
 
38
  )
39
 
40
  from tabs.invalid_markets import (
41
  plot_daily_dist_invalid_trades,
 
42
  plot_top_invalid_markets,
43
  plot_daily_nr_invalid_markets,
44
  )
 
150
  tools_df, trades_df, tools_accuracy_info, invalid_trades = get_all_data()
151
  print(trades_df.info())
152
 
153
+ tools_df = prepare_tools(tools_df)
 
 
154
  trades_df = prepare_trades(trades_df)
155
 
156
  tools_accuracy_info = compute_weighted_accuracy(tools_accuracy_info)
 
172
 
173
  error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
174
  error_overall_df = get_error_data_overall(error_df=error_df)
175
+ winning_df = get_tool_winning_rate_by_market(tools_df, inc_tools=INC_TOOLS)
176
+ # preparing data for the trades graph
177
  trades_count_df = get_overall_trades(trades_df=trades_df)
178
  trades_winning_rate_df = get_overall_winning_trades(trades_df=trades_df)
179
  trades_by_market = get_overall_by_market_trades(trades_df=trades_df)
 
249
  with gr.Row():
250
  winning_selector = gr.Dropdown(
251
  label="Select the tool metric",
252
+ choices=list(tool_metric_choices.keys()),
253
  value=default_tool_metric,
254
  )
255
 
256
  with gr.Row():
257
  # plot_tool_metrics
258
+ winning_plot = integrated_plot_tool_winnings_overall_per_market_by_week(
259
+ winning_df=winning_df,
260
  winning_selector=default_tool_metric,
261
  )
262
 
263
  def update_tool_winnings_overall_plot(winning_selector):
264
+ return integrated_plot_tool_winnings_overall_per_market_by_week(
265
+ winning_df=winning_df, winning_selector=winning_selector
266
  )
267
 
268
  winning_selector.change(
 
285
  )
286
 
287
  with gr.Row():
288
+ tool_winnings_by_tool_plot = (
289
+ integrated_tool_winnings_by_tool_per_market(
290
+ wins_df=winning_df, tool=INC_TOOLS[0]
291
+ )
292
  )
293
 
294
  def update_tool_winnings_by_tool_plot(tool):
295
+ return integrated_tool_winnings_by_tool_per_market(
296
+ wins_df=winning_df, tool=tool
297
+ )
298
 
299
  sel_tool.change(
300
  update_tool_winnings_by_tool_plot,
scripts/markets.py CHANGED
@@ -250,5 +250,33 @@ def etl(filename: Optional[str] = None) -> pd.DataFrame:
250
  return fpmms
251
 
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  if __name__ == "__main__":
254
  etl("all_fpmms.parquet")
 
250
  return fpmms
251
 
252
 
253
+ def add_market_creator(tools: pd.DataFrame) -> None:
254
+ # Check if fpmmTrades.parquet is in the same directory
255
+ try:
256
+ trades_filename = "fpmmTrades.parquet"
257
+ fpmms_trades = pd.read_parquet(DATA_DIR / trades_filename)
258
+ except FileNotFoundError:
259
+ print("Error: fpmmTrades.parquet not found. No market creator added")
260
+ return
261
+ tools["market_creator"] = ""
262
+ # traverse the list of traders
263
+ traders_list = list(tools.trader_address.unique())
264
+ for trader_address in traders_list:
265
+ market_creator = ""
266
+ try:
267
+ trades = fpmms_trades[fpmms_trades["trader_address"] == trader_address]
268
+ market_creator = trades.iloc[0]["market_creator"] # first value is enough
269
+ except Exception:
270
+ print(f"ERROR getting the market creator of {trader_address}")
271
+ continue
272
+ # update
273
+ tools.loc[tools["trader_address"] == trader_address, "market_creator"] = (
274
+ market_creator
275
+ )
276
+ # filter those tools where we don't have market creator info
277
+ tools = tools.loc[tools["market_creator"] != ""]
278
+ return tools
279
+
280
+
281
  if __name__ == "__main__":
282
  etl("all_fpmms.parquet")
scripts/tools.py CHANGED
@@ -45,6 +45,7 @@ from urllib3.exceptions import (
45
  )
46
  from web3 import Web3, HTTPProvider
47
  from web3.exceptions import MismatchedABI
 
48
  from web3.types import BlockParams
49
  from concurrent.futures import ThreadPoolExecutor, as_completed
50
  from utils import (
@@ -586,7 +587,11 @@ def parse_store_json_events_parallel(
586
  contents.append(current_mech_contents)
587
 
588
  tools = pd.concat(contents, ignore_index=True)
589
- print(f"Length of the contents dataframe {len(tools)}")
 
 
 
 
590
  print(tools.info())
591
  try:
592
  if "result" in tools.columns:
 
45
  )
46
  from web3 import Web3, HTTPProvider
47
  from web3.exceptions import MismatchedABI
48
+ from markets import add_market_creator
49
  from web3.types import BlockParams
50
  from concurrent.futures import ThreadPoolExecutor, as_completed
51
  from utils import (
 
587
  contents.append(current_mech_contents)
588
 
589
  tools = pd.concat(contents, ignore_index=True)
590
+ print(f"Adding market creators info. Length of the tools file = {tools}")
591
+ tools = add_market_creator(tools)
592
+ print(
593
+ f"Length of the tools dataframe after adding market creators info= {len(tools)}"
594
+ )
595
  print(tools.info())
596
  try:
597
  if "result" in tools.columns:
tabs/metrics.py CHANGED
@@ -10,10 +10,15 @@ trade_metric_choices = [
10
  "ROI",
11
  ]
12
 
13
- tool_metric_choices = ["losses", "wins", "total_request", "win_perc"]
 
 
 
 
 
14
 
15
  default_trade_metric = "ROI"
16
- default_tool_metric = "win_perc"
17
 
18
  HEIGHT = 600
19
  WIDTH = 1000
 
10
  "ROI",
11
  ]
12
 
13
+ tool_metric_choices = {
14
+ "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %": "win_perc",
15
+ "Total Weekly Inaccurate Nr of Mech Tool Responses": "losses",
16
+ "Total Weekly Accurate Nr of Mech Tool Responses": "wins",
17
+ "Total Weekly Nr of Mech Tool Requests": "total_request",
18
+ }
19
 
20
  default_trade_metric = "ROI"
21
+ default_tool_metric = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %"
22
 
23
  HEIGHT = 600
24
  WIDTH = 1000
tabs/tool_win.py CHANGED
@@ -1,12 +1,31 @@
1
  import pandas as pd
2
  import gradio as gr
3
  from typing import List
 
 
4
 
5
 
6
  HEIGHT = 600
7
  WIDTH = 1000
8
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
11
  """Gets the tool winning rate data for the given tools and calculates the winning percentage."""
12
  tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
@@ -68,7 +87,7 @@ def get_tool_winning_rate_by_market(
68
  wins["total_request"] = wins[0] + wins[1]
69
  wins.columns = wins.columns.astype(str)
70
  # Convert request_month_year_week to string and explicitly set type for Altair
71
- wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
72
  return wins
73
 
74
 
@@ -83,17 +102,6 @@ def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame:
83
  return overall_wins
84
 
85
 
86
- def get_overall_winning_rate(wins_df: pd.DataFrame) -> pd.DataFrame:
87
- """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
88
- overall_wins = (
89
- wins_df.groupby("request_month_year_week")
90
- .agg({"0": "sum", "1": "sum", "win_perc": "mean", "total_request": "sum"})
91
- .rename(columns={"0": "losses", "1": "wins"})
92
- .reset_index()
93
- )
94
- return overall_wins
95
-
96
-
97
  def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
98
  """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
99
  overall_wins = (
@@ -125,39 +133,68 @@ def plot_tool_winnings_overall(
125
  )
126
 
127
 
128
- def integrated_plot_tool_winnings_overall(
129
- tools_df: pd.DataFrame, winning_selector: str = "win_perc"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  ) -> gr.Plot:
131
- # TODO Pending final implementation
132
- """Plots the overall winning rate data for the given tools and calculates the winning percentage."""
133
- # adding the total
134
- wins_df_all = tools_df.copy(deep=True)
135
- wins_df_all["market_creator"] = "all"
136
 
137
- # merging both dataframes
138
- all_winning_tools = pd.concat([wins_df, wins_df_all], ignore_index=True)
139
- all_winning_tools = all_winning_tools.sort_values(
140
- by="creation_timestamp", ascending=True
 
 
 
141
  )
142
- final_df = get_overall_winning_rate_by_market(all_winning_tools)
 
 
 
 
 
 
 
143
  fig = px.bar(
144
- final_df,
145
  x="request_month_year_week",
146
- y=winning_selector,
147
  color="market_creator",
148
  barmode="group",
149
- color_discrete_sequence=["goldenrod", "darkgreen", "purple"],
 
 
 
 
150
  )
151
  fig.update_layout(
152
  xaxis_title="Week",
153
- yaxis_title="Weekly % of winning rate",
154
  legend=dict(yanchor="top", y=0.5),
155
  )
156
  fig.update_layout(width=WIDTH, height=HEIGHT)
157
  fig.update_xaxes(tickformat="%b %d\n%Y")
158
- return gr.Plot(
159
- value=fig,
160
- )
161
 
162
 
163
  def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot:
@@ -176,3 +213,44 @@ def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot:
176
  height=HEIGHT,
177
  width=WIDTH,
178
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import gradio as gr
3
  from typing import List
4
+ from tabs.metrics import tool_metric_choices
5
+ import plotly.express as px
6
 
7
 
8
  HEIGHT = 600
9
  WIDTH = 1000
10
 
11
 
12
+ def prepare_tools(tools: pd.DataFrame) -> pd.DataFrame:
13
+ tools["request_time"] = pd.to_datetime(tools["request_time"])
14
+ tools = tools.sort_values(by="request_time", ascending=True)
15
+
16
+ tools["request_month_year_week"] = (
17
+ pd.to_datetime(tools["request_time"]).dt.to_period("W").dt.strftime("%b-%d")
18
+ )
19
+ # preparing the tools graph
20
+ # adding the total
21
+ tools_all = tools.copy(deep=True)
22
+ tools_all["market_creator"] = "all"
23
+ # merging both dataframes
24
+ tools = pd.concat([tools, tools_all], ignore_index=True)
25
+ tools = tools.sort_values(by="request_time", ascending=True)
26
+ return tools
27
+
28
+
29
  def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
30
  """Gets the tool winning rate data for the given tools and calculates the winning percentage."""
31
  tools_inc = tools_df[tools_df["tool"].isin(inc_tools)]
 
87
  wins["total_request"] = wins[0] + wins[1]
88
  wins.columns = wins.columns.astype(str)
89
  # Convert request_month_year_week to string and explicitly set type for Altair
90
+ # wins["request_month_year_week"] = wins["request_month_year_week"].astype(str)
91
  return wins
92
 
93
 
 
102
  return overall_wins
103
 
104
 
 
 
 
 
 
 
 
 
 
 
 
105
  def get_overall_winning_rate_by_market(wins_df: pd.DataFrame) -> pd.DataFrame:
106
  """Gets the overall winning rate data for the given tools and calculates the winning percentage."""
107
  overall_wins = (
 
133
  )
134
 
135
 
136
+ def sort_key(date_str):
137
+ month, year_week = date_str.split("-")
138
+ month_order = [
139
+ "Jan",
140
+ "Feb",
141
+ "Mar",
142
+ "Apr",
143
+ "May",
144
+ "Jun",
145
+ "Jul",
146
+ "Aug",
147
+ "Sep",
148
+ "Oct",
149
+ "Nov",
150
+ "Dec",
151
+ ]
152
+ month_num = month_order.index(month) + 1
153
+ week = int(year_week)
154
+ return (week // 100, month_num, week % 100) # year, month, week
155
+
156
+
157
+ def integrated_plot_tool_winnings_overall_per_market_by_week(
158
+ winning_df: pd.DataFrame,
159
+ winning_selector: str = "Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
160
  ) -> gr.Plot:
 
 
 
 
 
161
 
162
+ # get the column name from the metric name
163
+ column_name = tool_metric_choices.get(winning_selector)
164
+
165
+ wins_df = get_overall_winning_rate_by_market(winning_df)
166
+ # Sort the unique values of request_month_year_week
167
+ sorted_categories = sorted(
168
+ wins_df["request_month_year_week"].unique(), key=sort_key
169
  )
170
+ # Create a categorical type with a specific order
171
+ wins_df["request_month_year_week"] = pd.Categorical(
172
+ wins_df["request_month_year_week"], categories=sorted_categories, ordered=True
173
+ )
174
+
175
+ # Sort the DataFrame based on the new categorical column
176
+ wins_df = wins_df.sort_values("request_month_year_week")
177
+
178
  fig = px.bar(
179
+ wins_df,
180
  x="request_month_year_week",
181
+ y=column_name,
182
  color="market_creator",
183
  barmode="group",
184
+ color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
185
+ category_orders={
186
+ "market_creator": ["pearl", "quickstart", "all"],
187
+ "request_month_year_week": sorted_categories,
188
+ },
189
  )
190
  fig.update_layout(
191
  xaxis_title="Week",
192
+ yaxis_title=winning_selector,
193
  legend=dict(yanchor="top", y=0.5),
194
  )
195
  fig.update_layout(width=WIDTH, height=HEIGHT)
196
  fig.update_xaxes(tickformat="%b %d\n%Y")
197
+ return gr.Plot(value=fig)
 
 
198
 
199
 
200
  def plot_tool_winnings_by_tool(wins_df: pd.DataFrame, tool: str) -> gr.BarPlot:
 
213
  height=HEIGHT,
214
  width=WIDTH,
215
  )
216
+
217
+
218
+ def integrated_tool_winnings_by_tool_per_market(
219
+ wins_df: pd.DataFrame, tool: str
220
+ ) -> gr.Plot:
221
+
222
+ tool_wins_df = wins_df[wins_df["tool"] == tool]
223
+ # Sort the unique values of request_month_year_week
224
+ sorted_categories = sorted(
225
+ tool_wins_df["request_month_year_week"].unique(), key=sort_key
226
+ )
227
+ # Create a categorical type with a specific order
228
+ tool_wins_df["request_month_year_week"] = pd.Categorical(
229
+ tool_wins_df["request_month_year_week"],
230
+ categories=sorted_categories,
231
+ ordered=True,
232
+ )
233
+
234
+ # Sort the DataFrame based on the new categorical column
235
+ wins_df = wins_df.sort_values("request_month_year_week")
236
+ fig = px.bar(
237
+ tool_wins_df,
238
+ x="request_month_year_week",
239
+ y="win_perc",
240
+ color="market_creator",
241
+ barmode="group",
242
+ color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
243
+ category_orders={
244
+ "market_creator": ["pearl", "quickstart", "all"],
245
+ "request_month_year_week": sorted_categories,
246
+ },
247
+ )
248
+
249
+ fig.update_layout(
250
+ xaxis_title="Week",
251
+ yaxis_title="Weekly Mean Mech Tool Accuracy as (Accurate Responses/All) %",
252
+ legend=dict(yanchor="top", y=0.5),
253
+ )
254
+ fig.update_layout(width=WIDTH, height=HEIGHT)
255
+ fig.update_xaxes(tickformat="%b %d\n%Y")
256
+ return gr.Plot(value=fig)
tabs/trades.py CHANGED
@@ -91,40 +91,6 @@ def plot_trades_by_week(trades_df: pd.DataFrame) -> gr.BarPlot:
91
  )
92
 
93
 
94
- def plot_trades_per_market_by_week(
95
- trades_df: pd.DataFrame, market_type: str
96
- ) -> gr.Plot:
97
- """Plots the trades data for the given tools and calculates the winning percentage."""
98
- assert "market_creator" in trades_df.columns
99
- # if market_type is "all then no filter is applied"
100
- if market_type == "quickstart":
101
- trades = trades_df.loc[trades_df["market_creator"] == "quickstart"]
102
- color_sequence = ["goldenrod"]
103
-
104
- elif market_type == "pearl":
105
- trades = trades_df.loc[trades_df["market_creator"] == "pearl"]
106
- color_sequence = ["purple"]
107
- else:
108
- trades = trades_df
109
- color_sequence = ["darkgreen"]
110
-
111
- fig = px.bar(
112
- trades,
113
- x="month_year_week",
114
- y="trades",
115
- color_discrete_sequence=color_sequence,
116
- title=market_type + " trades",
117
- )
118
- fig.update_layout(
119
- xaxis_title="Week",
120
- yaxis_title="Weekly nr of trades",
121
- )
122
- fig.update_xaxes(tickformat="%b %d\n%Y")
123
- return gr.Plot(
124
- value=fig,
125
- )
126
-
127
-
128
  def integrated_plot_trades_per_market_by_week(trades_df: pd.DataFrame) -> gr.Plot:
129
 
130
  # adding the total
 
91
  )
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def integrated_plot_trades_per_market_by_week(trades_df: pd.DataFrame) -> gr.Plot:
95
 
96
  # adding the total