Spaces:
Sleeping
Sleeping
Commit
·
019a614
1
Parent(s):
29e4361
code updates
Browse files- app.py +14 -2
- models_utils/ml_models.py +61 -21
app.py
CHANGED
@@ -150,6 +150,11 @@ if tabs == "Exploratory analysis":
|
|
150 |
if tabs == "Model training":
|
151 |
|
152 |
st.header("Model Training")
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
if 'uplift_sim' in st.session_state:
|
155 |
|
@@ -177,7 +182,7 @@ if tabs == "Model training":
|
|
177 |
y_name = 'conversion' # st.selectbox('Select target variable for conversion', options=uplift_sim.target_options)
|
178 |
model_trainer.y_name = y_name
|
179 |
tau = model_trainer.fit_predict_classifier(params, control_name)
|
180 |
-
elif model_type == '
|
181 |
y_name = 'benefit' # st.selectbox('Select target variable for benefit', options=uplift_sim.benefit_options)
|
182 |
model_trainer.y_name = y_name
|
183 |
tau = model_trainer.fit_predict_regressor(params, control_name)
|
@@ -203,6 +208,10 @@ if tabs == "Model training":
|
|
203 |
if tabs == "Economic effects":
|
204 |
|
205 |
st.header("Economic Effects Analysis")
|
|
|
|
|
|
|
|
|
206 |
|
207 |
if 'uplift_sim' in st.session_state and 'model_trainer' in st.session_state:
|
208 |
df_test = st.session_state.model_trainer.df_test
|
@@ -268,12 +277,15 @@ if tabs == "Economic effects":
|
|
268 |
qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
|
269 |
qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
|
270 |
qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
|
|
|
|
|
|
|
271 |
|
272 |
ax_comp.legend(prop={'size': 10})
|
273 |
ax_comp.set_xlabel('CATE Conversion')
|
274 |
ax_comp.set_ylabel('CATE Benefit')
|
275 |
ax_comp.set_title('CATE Benefit vs CATE Conversion')
|
276 |
st.pyplot(fig)
|
277 |
-
|
278 |
else:
|
279 |
st.error("Please ensure the model is trained and the dataset is prepared.")
|
|
|
150 |
if tabs == "Model training":
|
151 |
|
152 |
st.header("Model Training")
|
153 |
+
st.write("""
|
154 |
+
In this section, we train a model to predict the uplift effect of different treatments on customer behavior.
|
155 |
+
We use the XGBoost algorithm to train the model. The model can be used to predict the conversion rate or the benefit per user for each treatment group.
|
156 |
+
We can also analyze the economic effects of the treatments by comparing the uplift in conversion rate and benefit per user.
|
157 |
+
""")
|
158 |
|
159 |
if 'uplift_sim' in st.session_state:
|
160 |
|
|
|
182 |
y_name = 'conversion' # st.selectbox('Select target variable for conversion', options=uplift_sim.target_options)
|
183 |
model_trainer.y_name = y_name
|
184 |
tau = model_trainer.fit_predict_classifier(params, control_name)
|
185 |
+
elif model_type == 'Benefit Model':
|
186 |
y_name = 'benefit' # st.selectbox('Select target variable for benefit', options=uplift_sim.benefit_options)
|
187 |
model_trainer.y_name = y_name
|
188 |
tau = model_trainer.fit_predict_regressor(params, control_name)
|
|
|
208 |
if tabs == "Economic effects":
|
209 |
|
210 |
st.header("Economic Effects Analysis")
|
211 |
+
st.write("""
|
212 |
+
We can evaluate our models by looking at the Qini curves. We can use the CATE conversion model to evaluate the performance on both the Conversion and the Benefit as a function of the fraction of users targeted.
|
213 |
+
The Qini curve is a measure of the uplift effect of a model. It shows the difference between the uplift model and a random model.
|
214 |
+
""")
|
215 |
|
216 |
if 'uplift_sim' in st.session_state and 'model_trainer' in st.session_state:
|
217 |
df_test = st.session_state.model_trainer.df_test
|
|
|
277 |
qini_conc_test = pd.concat([qini_conversions[discount][['S']], qini_benefits[discount][['S']]], axis=1)
|
278 |
qini_conc_test.columns = ['cate_conversion', 'cate_benefit']
|
279 |
qini_conc_test.plot(ax=ax_comp, x='cate_conversion', y='cate_benefit', color=colors[i], label=f'{discount} model')
|
280 |
+
|
281 |
+
st.write('To simplify the comparison, we can plot the CATE Benefit as a function of the CATE conversion.')
|
282 |
+
st.write('In the last plot for example we can see that there is a region where offering 15% discount to a targeted group of users is more efficient than giving 10% to everyone. We can obtain the same impact in overall conversion uplift while reducing our benefit loss considerably.')
|
283 |
|
284 |
ax_comp.legend(prop={'size': 10})
|
285 |
ax_comp.set_xlabel('CATE Conversion')
|
286 |
ax_comp.set_ylabel('CATE Benefit')
|
287 |
ax_comp.set_title('CATE Benefit vs CATE Conversion')
|
288 |
st.pyplot(fig)
|
289 |
+
|
290 |
else:
|
291 |
st.error("Please ensure the model is trained and the dataset is prepared.")
|
models_utils/ml_models.py
CHANGED
@@ -12,10 +12,13 @@ class ModelTraining:
|
|
12 |
self.X_names = X_names
|
13 |
self.df_train = None
|
14 |
self.df_test = None
|
15 |
-
|
16 |
self.conversion_learner_t = None
|
17 |
self.benefit_learner_t = None
|
18 |
|
|
|
|
|
|
|
19 |
def split_data(self, test_size, random_state):
|
20 |
self.df_train, self.df_test = train_test_split(
|
21 |
self.df,
|
@@ -24,32 +27,69 @@ class ModelTraining:
|
|
24 |
)
|
25 |
|
26 |
def fit_predict_classifier(self, params, control_name):
|
27 |
-
self.
|
28 |
-
self.
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
def fit_predict_regressor(self, params, control_name):
|
32 |
-
self.
|
33 |
-
self.
|
34 |
-
return self._fit_predict()
|
35 |
-
|
36 |
-
def _fit_predict(self):
|
37 |
-
self.learner_t_tau = self.learner_t.fit_predict(
|
38 |
X=self.df_train[self.X_names].values,
|
39 |
treatment=self.df_train['treatment_group_key'].values,
|
40 |
y=self.df_train[self.y_name].values
|
41 |
)
|
42 |
-
self.
|
43 |
-
return self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
def compute_feature_importance(self):
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
return self.learner_t.get_importance(
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
)
|
|
|
12 |
self.X_names = X_names
|
13 |
self.df_train = None
|
14 |
self.df_test = None
|
15 |
+
|
16 |
self.conversion_learner_t = None
|
17 |
self.benefit_learner_t = None
|
18 |
|
19 |
+
self.conversion_learner_t_tau = None
|
20 |
+
self.benefit_learner_t_tau = None
|
21 |
+
|
22 |
def split_data(self, test_size, random_state):
|
23 |
self.df_train, self.df_test = train_test_split(
|
24 |
self.df,
|
|
|
27 |
)
|
28 |
|
29 |
def fit_predict_classifier(self, params, control_name):
|
30 |
+
self.conversion_learner_t = BaseTClassifier(XGBClassifier(**params), control_name=control_name)
|
31 |
+
self.conversion_learner_t_tau = self.conversion_learner_t.fit_predict(
|
32 |
+
X=self.df_train[self.X_names].values,
|
33 |
+
treatment=self.df_train['treatment_group_key'].values,
|
34 |
+
y=self.df_train[self.y_name].values
|
35 |
+
)
|
36 |
+
self.conversion_learner_t.feature_names = self.X_names
|
37 |
+
return self.conversion_learner_t_tau
|
38 |
|
39 |
def fit_predict_regressor(self, params, control_name):
|
40 |
+
self.benefit_learner_t = BaseTRegressor(XGBRegressor(**params), control_name=control_name)
|
41 |
+
self.benefit_learner_t_tau = self.benefit_learner_t.fit_predict(
|
|
|
|
|
|
|
|
|
42 |
X=self.df_train[self.X_names].values,
|
43 |
treatment=self.df_train['treatment_group_key'].values,
|
44 |
y=self.df_train[self.y_name].values
|
45 |
)
|
46 |
+
self.benefit_learner_t.feature_names = self.X_names
|
47 |
+
return self.benefit_learner_t_tau
|
48 |
+
|
49 |
+
# def _fit_predict(self):
|
50 |
+
# self.learner_t_tau = self.learner_t.fit_predict(
|
51 |
+
# X=self.df_train[self.X_names].values,
|
52 |
+
# treatment=self.df_train['treatment_group_key'].values,
|
53 |
+
# y=self.df_train[self.y_name].values
|
54 |
+
# )
|
55 |
+
# self.learner_t.feature_names = self.X_names
|
56 |
+
# return self.learner_t_tau
|
57 |
|
58 |
def compute_feature_importance(self):
|
59 |
+
|
60 |
+
if self.y_name == 'conversion':
|
61 |
+
|
62 |
+
if self.conversion_learner_t is None:
|
63 |
+
raise ValueError("Model must be fitted before computing feature importances.")
|
64 |
+
|
65 |
+
return self.conversion_learner_t.get_importance(
|
66 |
+
X=self.df_train[self.X_names],
|
67 |
+
tau=self.conversion_learner_t_tau,
|
68 |
+
features=self.X_names,
|
69 |
+
normalize=True,
|
70 |
+
method='auto'
|
71 |
+
)
|
72 |
+
|
73 |
+
elif self.y_name == 'benefit':
|
74 |
+
|
75 |
+
if self.benefit_learner_t is None:
|
76 |
+
raise ValueError("Model must be fitted before computing feature importances.")
|
77 |
+
|
78 |
+
return self.benefit_learner_t.get_importance(
|
79 |
+
X=self.df_train[self.X_names],
|
80 |
+
tau=self.benefit_learner_t_tau,
|
81 |
+
features=self.X_names,
|
82 |
+
normalize=True,
|
83 |
+
method='auto'
|
84 |
+
)
|
85 |
+
|
86 |
+
# if self.learner_t is None:
|
87 |
+
# raise ValueError("Model must be fitted before computing feature importances.")
|
88 |
|
89 |
+
# return self.learner_t.get_importance(
|
90 |
+
# X=self.df_train[self.X_names],
|
91 |
+
# tau=self.learner_t_tau,
|
92 |
+
# features=self.X_names,
|
93 |
+
# normalize=True,
|
94 |
+
# method='auto'
|
95 |
+
# )
|