import pandas as pd
import numpy as np
import pyrsm as rsm
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
= pd.read_parquet("cg_organic_control.parquet").reset_index(drop=True)
cg_organic_control
## loading the treatment data
= pd.read_parquet("cg_ad_treatment.parquet").reset_index(drop=True)
cg_ad_treatment
# Load the ad random data"
= pd.read_parquet("cg_ad_random.parquet")
cg_ad_random
# a. Add "ad" to cg_ad_random and set its value to 1 for all rows
"ad"] = 1
cg_ad_random[
# b. Add "ad" to cg_organic_control and set its value to 0 for all rows
"ad"] = 0
cg_organic_control[
# c. Create a stacked dataset by combining cg_ad_random and cg_organic_control
= pd.concat([cg_ad_random, cg_organic_control], axis=0)
cg_rct_stacked
'converted_yes']= rsm.ifelse(
cg_rct_stacked[== "yes", 1, rsm.ifelse(cg_rct_stacked.converted == "no", 0, np.nan)
cg_rct_stacked.converted
)
# d. Create a training variable
'training'] = rsm.model.make_train(
cg_rct_stacked[=cg_rct_stacked, test_size=0.3, strat_var=['converted', 'ad'], random_state = 1234)
data
# Assign variables to evar
= [
evar "GameLevel",
"NumGameDays",
"NumGameDays4Plus",
"NumInGameMessagesSent",
"NumFriends",
"NumFriendRequestIgnored",
"NumSpaceHeroBadges",
"AcquiredSpaceship",
"AcquiredIonWeapon",
"TimesLostSpaceship",
"TimesKilled",
"TimesCaptain",
"TimesNavigator",
"PurchasedCoinPackSmall",
"PurchasedCoinPackLarge",
"NumAdsClicked",
"DaysUser",
"UserConsole",
"UserHasOldOS"
]
= rsm.model.logistic(
lr_treatment = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
data = 'converted',
rvar = 'yes',
lev = evar,
evar
)
= rsm.model.logistic(
lr_control ={'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
data= 'converted',
rvar = 'yes',
lev = evar
evar
)
"pred_treatment"] = lr_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control"] = lr_control.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked[
"uplift_score"] = (
cg_rct_stacked[- cg_rct_stacked.pred_control
cg_rct_stacked.pred_treatment
)
= rsm.uplift_tab(
uplift_tab "training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20
cg_rct_stacked.query(
)
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title("Gains curves of Uplift Model", fontsize = 18)
_ plt.show()
Machine Learning for Uplift
Column
Tab 1
Row
Uplift and Propensity using Logistic Regression
Column
= plt.subplots(figsize=(10, 6)) # Adjust the figure size here
fig, ax
rsm.uplift_plot("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20, ax=ax
cg_rct_stacked.query(
)=ax.set_title('Gains plot of Uplift Model', fontsize = 18)
_
plt.show()
# fig = rsm.uplift_plot(
# cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20
# )
Column
= rsm.uplift_tab(
propensity_tab "training == 0"), "converted", "yes", "pred_treatment", "ad", 1, qnt = 20)
cg_rct_stacked.query(
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "pred_treatment", "ad", 1, qnt = 20, ax = ax)
cg_rct_stacked.query(=ax.set_title("Gains curves of Propensity Model", fontsize = 18)
_ plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax = rsm.uplift_plot(
fig "training == 0"),
cg_rct_stacked.query("converted", "yes", "pred_treatment", "ad", 1, qnt = 20, ax = ax)
= ax.set_title("Gains plot of Propensity Model", fontsize = 18) _
Column
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment", "uplift_score"],
["ad",
1, qnt = 20, ax = ax
)= ax.set_title("Compare Gain Curves Uplift model and Propensity model", fontsize = 18)
_ plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment", "uplift_score"],
["ad",
1, qnt = 20, ax = ax
)= ax.set_title("Compare Gain Plot Uplift model and Propensity model", fontsize = 18)
_ plt.show()
Column
Tab 2
Row
Neural Network Model
Column
= rsm.model.mlp(
clf_treatment = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
data = 'converted',
rvar = 'yes',
lev = evar,
evar = (4, 2),
hidden_layer_sizes = 0.0001
alpha
)
= rsm.model.mlp(
clf_control = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
data = 'converted',
rvar = 'yes',
lev = evar,
evar = (4,2),
hidden_layer_sizes = 1
alpha
)
"pred_treatment_nn"] = clf_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control_nn"] = clf_control.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked[
"uplift_score_nn"] = (
cg_rct_stacked[- cg_rct_stacked.pred_control_nn
cg_rct_stacked.pred_treatment_nn
)
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "uplift_score_nn", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)= ax.set_title("Gain curves - Uplift plot", fontsize = 18)
_ plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"), "converted", "yes", "uplift_score_nn", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)= ax.set_title("Gain Plot - Uplift plot", fontsize = 18)
_ plt.show()
Column
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "pred_treatment_nn", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain curves - Propensity Model', fontsize = 18)
_
plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"), "converted", "yes", "pred_treatment_nn", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain plot - Propensity Model', fontsize = 18)
_
plt.show()
Column
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment_nn", "uplift_score_nn"],
["ad",
1, qnt = 20, ax = ax
)=ax.set_title('Gain curves - Comparision Uplift and Propensity Model', fontsize = 18)
_
plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment_nn", "uplift_score_nn"],
["ad",
1, qnt = 20
)=ax.set_title('Gain plot - Comparision Uplift and Propensity Model', fontsize = 18)
_
plt.show()
Column
Tab 3
Row
Random Forest Model
Column
= rsm.model.rforest(
rf_treatment = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
data = 'converted',
rvar = 'yes',
lev = evar,
evar = 0.25,
max_features = 1000
n_estimators
)
= rsm.model.rforest(
rf_control = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
data = 'converted',
rvar = 'yes',
lev = evar,
evar = 0.25,
max_features = 1000
n_estimators
)
# Predictions
"pred_treatment_rf"] = rf_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control_rf"] = rf_control.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked[
"uplift_score_rf"] = (
cg_rct_stacked[- cg_rct_stacked.pred_control_rf
cg_rct_stacked.pred_treatment_rf
)
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "uplift_score_rf", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain Curves - Uplift Model', fontsize = 18)
_
plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"), "converted", "yes", "uplift_score_rf", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain Plot - Uplift Model', fontsize = 18)
_
plt.show()
Column
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"), "converted", "yes", "pred_treatment_rf", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain Curves - Propensity Model', fontsize = 18)
_
plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.uplift_plot("training == 0"), "converted", "yes", "pred_treatment_rf", "ad", 1, qnt = 20, ax = ax
cg_rct_stacked.query(
)=ax.set_title('Gain Plot - Propensity Model', fontsize = 18)
_
plt.show()
Column
= plt.subplots(figsize=(10, 6))
fig, ax
rsm.inc_uplift_plot("training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment_rf", "uplift_score_rf"],
["ad",
1, qnt = 20, ax = ax
)=ax.set_title('Gain curves - Comparision Uplift and Propensity Model', fontsize = 18)
_
plt.show()
= plt.subplots(figsize=(10, 6))
fig, ax = rsm.uplift_plot(
fig "training == 0"),
cg_rct_stacked.query("converted",
"yes",
"pred_treatment_rf", "uplift_score_rf"],
["ad",
1, qnt = 20, ax = ax
)=ax.set_title('Gain plot - Comparision Uplift and Propensity Model', fontsize = 18)
_ plt.show()