howardroark commited on
Commit
019a614
·
1 Parent(s): 29e4361

code updates

Browse files
Files changed (2) hide show
  1. app.py +14 -2
  2. models_utils/ml_models.py +61 -21
app.py CHANGED
@@ -150,6 +150,11 @@ if tabs == "Exploratory analysis":
150
  if tabs == "Model training":
151
 
152
  st.header("Model Training")
 
 
 
 
 
153
 
154
  if 'uplift_sim' in st.session_state:
155
 
@@ -177,7 +182,7 @@ if tabs == "Model training":
177
  y_name = 'conversion' # st.selectbox('Select target variable for conversion', options=uplift_sim.target_options)
178
  model_trainer.y_name = y_name
179
  tau = model_trainer.fit_predict_classifier(params, control_name)
180
- elif model_type == 'BATE Model':
181
  y_name = 'benefit' # st.selectbox('Select target variable for benefit', options=uplift_sim.benefit_options)
182
  model_trainer.y_name = y_name
183
  tau = model_trainer.fit_predict_regressor(params, control_name)
@@ -203,6 +208,10 @@ if tabs == "Model training":
203
  if tabs == "Economic effects":
204
 
205
  st.header("Economic Effects Analysis")
 
 
 
 
206
 
207
  if 'uplift_sim' in st.session_state and 'model_trainer' in st.session_state:
208
  df_test = st.session_state.model_trainer.df_test
@@ -268,12 +277,15 @@ if tabs == "Economic effects":
268
  qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
269
  qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
270
  qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
 
 
 
271
 
272
  ax_comp.legend(prop={'size': 10})
273
  ax_comp.set_xlabel('CATE Conversion')
274
  ax_comp.set_ylabel('CATE Benefit')
275
  ax_comp.set_title('CATE Benefit vs CATE Conversion')
276
  st.pyplot(fig)
277
-
278
  else:
279
  st.error("Please ensure the model is trained and the dataset is prepared.")
 
150
  if tabs == "Model training":
151
 
152
  st.header("Model Training")
153
+ st.write("""
154
+ In this section, we train a model to predict the uplift effect of different treatments on customer behavior.
155
+ We use the XGBoost algorithm to train the model. The model can be used to predict the conversion rate or the benefit per user for each treatment group.
156
+ We can also analyze the economic effects of the treatments by comparing the uplift in conversion rate and benefit per user.
157
+ """)
158
 
159
  if 'uplift_sim' in st.session_state:
160
 
 
182
  y_name = 'conversion' # st.selectbox('Select target variable for conversion', options=uplift_sim.target_options)
183
  model_trainer.y_name = y_name
184
  tau = model_trainer.fit_predict_classifier(params, control_name)
185
+ elif model_type == 'Benefit Model':
186
  y_name = 'benefit' # st.selectbox('Select target variable for benefit', options=uplift_sim.benefit_options)
187
  model_trainer.y_name = y_name
188
  tau = model_trainer.fit_predict_regressor(params, control_name)
 
208
  if tabs == "Economic effects":
209
 
210
  st.header("Economic Effects Analysis")
211
+ st.write("""
212
+ We can evaluate our models by looking at the Qini curves. We can use the CATE conversion model to evaluate the performance on both the Conversion and the Benefit as a function of the fraction of users targeted.
213
+ The Qini curve is a measure of the uplift effect of a model. It shows the difference between the uplift model and a random model.
214
+ """)
215
 
216
  if 'uplift_sim' in st.session_state and 'model_trainer' in st.session_state:
217
  df_test = st.session_state.model_trainer.df_test
 
277
  qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
278
  qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
279
  qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
280
+
281
+ st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
282
+ st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
283
 
284
  ax_comp.legend(prop={'size': 10})
285
  ax_comp.set_xlabel('CATE Conversion')
286
  ax_comp.set_ylabel('CATE Benefit')
287
  ax_comp.set_title('CATE Benefit vs CATE Conversion')
288
  st.pyplot(fig)
289
+
290
  else:
291
  st.error("Please ensure the model is trained and the dataset is prepared.")
models_utils/ml_models.py CHANGED
@@ -12,10 +12,13 @@ class ModelTraining:
12
  self.X_names = X_names
13
  self.df_train = None
14
  self.df_test = None
15
- self.learner_t = None
16
  self.conversion_learner_t = None
17
  self.benefit_learner_t = None
18
 
 
 
 
19
  def split_data(self, test_size, random_state):
20
  self.df_train, self.df_test = train_test_split(
21
  self.df,
@@ -24,32 +27,69 @@ class ModelTraining:
24
  )
25
 
26
  def fit_predict_classifier(self, params, control_name):
27
- self.learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name)
28
- self.conversion_learner_t = self.learner_t
29
- return self._fit_predict()
 
 
 
 
 
30
 
31
  def fit_predict_regressor(self, params, control_name):
32
- self.learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name)
33
- self.benefit_learner_t = self.learner_t
34
- return self._fit_predict()
35
-
36
- def _fit_predict(self):
37
- self.learner_t_tau = self.learner_t.fit_predict(
38
  X=self.df_train[self.X_names].values,
39
  treatment=self.df_train['treatment_group_key'].values,
40
  y=self.df_train[self.y_name].values
41
  )
42
- self.learner_t.feature_names = self.X_names
43
- return self.learner_t_tau
 
 
 
 
 
 
 
 
 
44
 
45
  def compute_feature_importance(self):
46
- if self.learner_t is None:
47
- raise ValueError("Model must be fitted before computing feature importances.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- return self.learner_t.get_importance(
50
- X=self.df_train[self.X_names],
51
- tau=self.learner_t_tau,
52
- features=self.X_names,
53
- normalize=True,
54
- method='auto'
55
- )
 
12
  self.X_names = X_names
13
  self.df_train = None
14
  self.df_test = None
15
+
16
  self.conversion_learner_t = None
17
  self.benefit_learner_t = None
18
 
19
+ self.conversion_learner_t_tau = None
20
+ self.benefit_learner_t_tau = None
21
+
22
  def split_data(self, test_size, random_state):
23
  self.df_train, self.df_test = train_test_split(
24
  self.df,
 
27
  )
28
 
29
  def fit_predict_classifier(self, params, control_name):
30
+ self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name)
31
+ self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict(
32
+ X=self.df_train[self.X_names].values,
33
+ treatment=self.df_train['treatment_group_key'].values,
34
+ y=self.df_train[self.y_name].values
35
+ )
36
+ self.conversion_learner_t.feature_names = self.X_names
37
+ return self.conversion_learner_t_tau
38
 
39
  def fit_predict_regressor(self, params, control_name):
40
+ self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name)
41
+ self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict(
 
 
 
 
42
  X=self.df_train[self.X_names].values,
43
  treatment=self.df_train['treatment_group_key'].values,
44
  y=self.df_train[self.y_name].values
45
  )
46
+ self.benefit_learner_t.feature_names = self.X_names
47
+ return self.benefit_learner_t_tau
48
+
49
+ # def _fit_predict(self):
50
+ # self.learner_t_tau = self.learner_t.fit_predict(
51
+ # X=self.df_train[self.X_names].values,
52
+ # treatment=self.df_train['treatment_group_key'].values,
53
+ # y=self.df_train[self.y_name].values
54
+ # )
55
+ # self.learner_t.feature_names = self.X_names
56
+ # return self.learner_t_tau
57
 
58
  def compute_feature_importance(self):
59
+
60
+ if self.y_name == 'conversion':
61
+
62
+ if self.conversion_learner_t is None:
63
+ raise ValueError("Model must be fitted before computing feature importances.")
64
+
65
+ return self.conversion_learner_t.get_importance(
66
+ X=self.df_train[self.X_names],
67
+ tau=self.conversion_learner_t_tau,
68
+ features=self.X_names,
69
+ normalize=True,
70
+ method='auto'
71
+ )
72
+
73
+ elif self.y_name == 'benefit':
74
+
75
+ if self.benefit_learner_t is None:
76
+ raise ValueError("Model must be fitted before computing feature importances.")
77
+
78
+ return self.benefit_learner_t.get_importance(
79
+ X=self.df_train[self.X_names],
80
+ tau=self.benefit_learner_t_tau,
81
+ features=self.X_names,
82
+ normalize=True,
83
+ method='auto'
84
+ )
85
+
86
+ # if self.learner_t is None:
87
+ # raise ValueError("Model must be fitted before computing feature importances.")
88
 
89
+ # return self.learner_t.get_importance(
90
+ # X=self.df_train[self.X_names],
91
+ # tau=self.learner_t_tau,
92
+ # features=self.X_names,
93
+ # normalize=True,
94
+ # method='auto'
95
+ # )