import pandas as pdimport numpy as npimport pyrsm as rsmfrom sklearn.model_selection import GridSearchCVimport matplotlib.pyplot as pltcg_organic_control = pd.read_parquet("cg_organic_control.parquet").reset_index(drop=True)## loading the treatment datacg_ad_treatment = pd.read_parquet("cg_ad_treatment.parquet").reset_index(drop=True)# Load the ad random data"cg_ad_random = pd.read_parquet("cg_ad_random.parquet")# a. Add "ad" to cg_ad_random and set its value to 1 for all rowscg_ad_random["ad"] =1# b. Add "ad" to cg_organic_control and set its value to 0 for all rowscg_organic_control["ad"] =0# c. Create a stacked dataset by combining cg_ad_random and cg_organic_controlcg_rct_stacked = pd.concat([cg_ad_random, cg_organic_control], axis=0)cg_rct_stacked['converted_yes']= rsm.ifelse( cg_rct_stacked.converted =="yes", 1, rsm.ifelse(cg_rct_stacked.converted =="no", 0, np.nan))# d. Create a training variablecg_rct_stacked['training'] = rsm.model.make_train( data=cg_rct_stacked, test_size=0.3, strat_var=['converted', 'ad'], random_state =1234)# Assign variables to evarevar = ["GameLevel","NumGameDays","NumGameDays4Plus","NumInGameMessagesSent","NumFriends","NumFriendRequestIgnored","NumSpaceHeroBadges","AcquiredSpaceship","AcquiredIonWeapon","TimesLostSpaceship","TimesKilled","TimesCaptain","TimesNavigator","PurchasedCoinPackSmall","PurchasedCoinPackLarge","NumAdsClicked","DaysUser","UserConsole","UserHasOldOS" ]lr_treatment = rsm.model.logistic( data = {'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 1")}, rvar ='converted', lev ='yes', evar = evar,)lr_control = rsm.model.logistic( data={'cg_rct_stacked': cg_rct_stacked.query("training == 1 & ad == 0")}, rvar ='converted', lev ='yes', evar = evar)cg_rct_stacked["pred_treatment"] = lr_treatment.predict(cg_rct_stacked)["prediction"]cg_rct_stacked["pred_control"] = lr_control.predict(cg_rct_stacked)["prediction"]cg_rct_stacked["uplift_score"] = ( cg_rct_stacked.pred_treatment - cg_rct_stacked.pred_control)uplift_tab = rsm.uplift_tab( cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt =20)fig, ax = plt.subplots(figsize=(10, 6))rsm.inc_uplift_plot( cg_rct_stacked.query("training == 0"), "converted", "yes", "uplift_score", "ad", 1, qnt =20, ax = ax)_ =ax.set_title("Gains curves of Uplift Model", fontsize =18)plt.show()