Machine Learning for Uplift

Column

Tab 1

Row

Uplift and Propensity using Logistic Regression

Column

import pandas as pd
import numpy as np
import pyrsm as rsm
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

cg_organic_control = pd.read_parquet("cg_organic_control.parquet").reset_index(drop=True)


## loading the treatment data
cg_ad_treatment = pd.read_parquet("cg_ad_treatment.parquet").reset_index(drop=True)


# Load the ad random data"
cg_ad_random = pd.read_parquet("cg_ad_random.parquet")

# a. Add "ad" to cg_ad_random and set its value to 1 for all rows
cg_ad_random["ad"] = 1

# b. Add "ad" to cg_organic_control and set its value to 0 for all rows
cg_organic_control["ad"] = 0

# c. Create a stacked dataset by combining cg_ad_random and cg_organic_control
cg_rct_stacked = pd.concat([cg_ad_random, cg_organic_control], axis=0)

cg_rct_stacked['converted_yes']= rsm.ifelse(
    cg_rct_stacked.converted == "yes", 1, rsm.ifelse(cg_rct_stacked.converted == "no", 0, np.nan)
)


# d. Create a training variable
cg_rct_stacked['training'] = rsm.model.make_train(
    data=cg_rct_stacked, test_size=0.3, strat_var=['converted', 'ad'], random_state = 1234)

# Assign variables to evar
evar = [
        "GameLevel",
        "NumGameDays",
        "NumGameDays4Plus",
        "NumInGameMessagesSent",
        "NumFriends",
        "NumFriendRequestIgnored",
        "NumSpaceHeroBadges",
        "AcquiredSpaceship",
        "AcquiredIonWeapon",
        "TimesLostSpaceship",
        "TimesKilled",
        "TimesCaptain",
        "TimesNavigator",
        "PurchasedCoinPackSmall",
        "PurchasedCoinPackLarge",
        "NumAdsClicked",
        "DaysUser",
        "UserConsole",
        "UserHasOldOS"
    ]

lr_treatment = rsm.model.logistic(
    data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar,
)

lr_control = rsm.model.logistic(
    data={'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar
)

cg_rct_stacked["pred_treatment"] = lr_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control"] = lr_control.predict(cg_rct_stacked)["prediction"]

cg_rct_stacked["uplift_score"] = (
    cg_rct_stacked.pred_treatment - cg_rct_stacked.pred_control
)

uplift_tab = rsm.uplift_tab(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20
)

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title("Gains curves of Uplift Model", fontsize = 18)
plt.show()

fig, ax = plt.subplots(figsize=(10, 6))  # Adjust the figure size here
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20, ax=ax
)
_ =ax.set_title('Gains plot of Uplift Model', fontsize = 18)
plt.show()

# fig = rsm.uplift_plot(
#     cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt = 20
# )

Column

propensity_tab = rsm.uplift_tab(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment", "ad", 1, qnt = 20)

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment", "ad", 1, qnt = 20, ax = ax)
_=ax.set_title("Gains curves of Propensity Model", fontsize = 18)
plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
fig = rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), 
    "converted", "yes", "pred_treatment", "ad", 1, qnt = 20, ax = ax)
_ = ax.set_title("Gains plot of Propensity Model", fontsize = 18)

Column

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment", "uplift_score"],
    "ad",
    1, qnt = 20, ax = ax
)
_ = ax.set_title("Compare Gain Curves Uplift model and Propensity model", fontsize = 18)
plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment", "uplift_score"],
    "ad",
    1, qnt = 20, ax = ax
)
_ = ax.set_title("Compare Gain Plot Uplift model and Propensity model", fontsize = 18)
plt.show()

Column

Tab 2

Row

Neural Network Model

Column

clf_treatment = rsm.model.mlp(
    data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar,
    hidden_layer_sizes = (4, 2),
    alpha = 0.0001
)

clf_control = rsm.model.mlp(
    data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar,
    hidden_layer_sizes = (4,2),
    alpha = 1
)

cg_rct_stacked["pred_treatment_nn"] = clf_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control_nn"] = clf_control.predict(cg_rct_stacked)["prediction"]

cg_rct_stacked["uplift_score_nn"] = (
    cg_rct_stacked.pred_treatment_nn - cg_rct_stacked.pred_control_nn
)

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score_nn", "ad", 1, qnt = 20, ax = ax
)
_ = ax.set_title("Gain curves - Uplift plot", fontsize = 18)
plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score_nn", "ad", 1, qnt = 20, ax = ax
)
_ = ax.set_title("Gain Plot - Uplift plot", fontsize = 18)
plt.show()

Column

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment_nn", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain curves - Propensity Model', fontsize = 18)

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment_nn", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain plot - Propensity Model', fontsize = 18)

plt.show()

Column

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment_nn", "uplift_score_nn"],
    "ad",
    1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain curves - Comparision Uplift and Propensity Model', fontsize = 18)

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment_nn", "uplift_score_nn"],
    "ad",
    1, qnt = 20
)
_ =ax.set_title('Gain plot - Comparision Uplift and Propensity Model', fontsize = 18)

plt.show()

Column

Tab 3

Row

Random Forest Model

Column

rf_treatment = rsm.model.rforest(
    data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar,
    max_features = 0.25,
    n_estimators = 1000
)

rf_control = rsm.model.rforest(
    data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")},
    rvar = 'converted',
    lev = 'yes',
    evar = evar,
    max_features = 0.25,
    n_estimators = 1000
)

# Predictions
cg_rct_stacked["pred_treatment_rf"] = rf_treatment.predict(cg_rct_stacked)["prediction"]
cg_rct_stacked["pred_control_rf"] = rf_control.predict(cg_rct_stacked)["prediction"]

cg_rct_stacked["uplift_score_rf"] = (
    cg_rct_stacked.pred_treatment_rf - cg_rct_stacked.pred_control_rf
)

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score_rf", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain Curves - Uplift Model', fontsize = 18)

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score_rf", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain Plot - Uplift Model', fontsize = 18)

plt.show()

Column

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment_rf", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain Curves - Propensity Model', fontsize = 18)

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"), "converted", "yes", "pred_treatment_rf", "ad", 1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain Plot - Propensity Model', fontsize = 18)

plt.show()

Column

fig, ax = plt.subplots(figsize=(10, 6))
rsm.inc_uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment_rf", "uplift_score_rf"],
    "ad",
    1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain curves - Comparision Uplift and Propensity Model', fontsize = 18)

plt.show()

fig, ax = plt.subplots(figsize=(10, 6))
fig = rsm.uplift_plot(
    cg_rct_stacked.query("training == 0"),
    "converted",
    "yes",
    ["pred_treatment_rf", "uplift_score_rf"],
    "ad",
    1, qnt = 20, ax = ax
)
_ =ax.set_title('Gain plot - Comparision Uplift and Propensity Model', fontsize = 18)
plt.show()