rosacastillo commited on
Commit
061956a
·
1 Parent(s): c9a9ff5

Outliers filter and cleaning

Browse files
Files changed (2) hide show
  1. app.py +6 -0
  2. tabs/metrics.py +0 -44
app.py CHANGED
@@ -160,6 +160,12 @@ def prepare_data():
160
  )
161
  invalid_trades["creation_date"] = invalid_trades["creation_timestamp"].dt.date
162
 
 
 
 
 
 
 
163
  return tools_df, trades_df, tools_accuracy_info, invalid_trades
164
 
165
 
 
160
  )
161
  invalid_trades["creation_date"] = invalid_trades["creation_timestamp"].dt.date
162
 
163
+ # discovering outliers for ROI
164
+ outliers = trades_df.loc[trades_df["roi"] >= 1000]
165
+ if len(outliers) > 0:
166
+ outliers.to_parquet("./tmp/outliers.parquet")
167
+ trades_df = trades_df.loc[trades_df["roi"] < 1000]
168
+
169
  return tools_df, trades_df, tools_accuracy_info, invalid_trades
170
 
171
 
tabs/metrics.py CHANGED
@@ -24,49 +24,6 @@ HEIGHT = 600
24
  WIDTH = 1000
25
 
26
 
27
- def plot_trade_details(metric_name: str, trades_df: pd.DataFrame) -> gr.LinePlot:
28
- """Plots the trade details for the given trade detail."""
29
- column_name = metric_name
30
- if metric_name == "mech calls":
31
- metric_name = "mech_calls"
32
- column_name = "num_mech_calls"
33
- elif metric_name == "ROI":
34
- column_name = "roi"
35
-
36
- # this is to filter out the data before 2023-09-01
37
- trades_filtered = trades_df[trades_df["creation_timestamp"] > "2023-09-01"]
38
- trades_filtered = (
39
- trades_filtered.groupby("month_year_week")[column_name]
40
- .quantile([0.25, 0.5, 0.75])
41
- .unstack()
42
- )
43
- trades_filtered.columns = trades_filtered.columns.astype(str)
44
- trades_filtered.reset_index(inplace=True)
45
- trades_filtered.columns = [
46
- "month_year_week",
47
- "25th_percentile",
48
- "50th_percentile",
49
- "75th_percentile",
50
- ]
51
- # reformat the data as percentile, date, value
52
- trades_filtered = trades_filtered.melt(
53
- id_vars=["month_year_week"], var_name="percentile", value_name=metric_name
54
- )
55
-
56
- return gr.LinePlot(
57
- value=trades_filtered,
58
- x="month_year_week",
59
- y=metric_name,
60
- color="percentile",
61
- show_label=True,
62
- interactive=True,
63
- show_actions_button=True,
64
- tooltip=["month_year_week", "percentile", metric_name],
65
- height=HEIGHT,
66
- width=WIDTH,
67
- )
68
-
69
-
70
  def get_metrics(
71
  metric_name: str, column_name: str, market_creator: str, trades_df: pd.DataFrame
72
  ) -> pd.DataFrame:
@@ -107,7 +64,6 @@ def get_boxplot_metrics(column_name: str, trades_df: pd.DataFrame) -> pd.DataFra
107
  trades_filtered = trades_filtered[
108
  ["creation_timestamp", "month_year_week", "market_creator", column_name]
109
  ]
110
-
111
  # adding the total
112
  trades_filtered_all = trades_filtered.copy(deep=True)
113
  trades_filtered_all["market_creator"] = "all"
 
24
  WIDTH = 1000
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def get_metrics(
28
  metric_name: str, column_name: str, market_creator: str, trades_df: pd.DataFrame
29
  ) -> pd.DataFrame:
 
64
  trades_filtered = trades_filtered[
65
  ["creation_timestamp", "month_year_week", "market_creator", column_name]
66
  ]
 
67
  # adding the total
68
  trades_filtered_all = trades_filtered.copy(deep=True)
69
  trades_filtered_all["market_creator"] = "all"