S-mobile Customer Churn

# to render w/ a different product type:
# quarto render dashboard.qmd -P product_type:Premium
import pandas as pd
# from helpers import color_palette, plpipot_churn_reason, plot_contract_type, table_purchases
from matplotlib import rcParams
from itables import show
import matplotlib.pyplot as plt
import seaborn as sns

rcParams.update({"savefig.bbox": "tight"})

churn = pd.read_parquet('s_mobile.parquet')
churn.drop('customer', axis=1, errors='ignore', inplace=True)

Churn

Row

dict(
  icon = "archive",
  color = "light",
  value = "69,000"
)
{'icon': 'archive', 'color': 'light', 'value': '69,000'}
dict(
  icon = "graph-down",
  color = "light",
  value = "20,100"
)
{'icon': 'graph-down', 'color': 'light', 'value': '20,100'}
dict(
  icon = "graph-down",
  color = "light",
  value = "48,900"
)
{'icon': 'graph-down', 'color': 'light', 'value': '48,900'}
dict(
  icon = "bullseye",
  color = "warning",
  value = "20%"
)
{'icon': 'bullseye', 'color': 'warning', 'value': '20%'}

Row

plt.figure(figsize=(10, 6))
sns.boxplot(x='churn', y='custcare', data=churn, palette = ['#ff9999', '#66b3ff'])
_ =plt.title('Customer Care Calls by Churn Status')
_ =plt.xlabel('Churn')
_ =plt.ylabel('Customer Care Calls')
plt.show()
/var/folders/28/cfl1_cfs3bb536qkz8wkys_w0000gn/T/ipykernel_85583/423151031.py:2: FutureWarning:



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

plt.figure(figsize=(10, 6))
sns.countplot(x='refurb', hue='churn', data=churn, palette = ['#ff9999', '#66b3ff'])
_ =plt.title('Churn Rate by Refurbished Status')
_ =plt.xlabel('Refurbished Status')
_ =plt.ylabel('Count')
plt.show()

Row

plt.figure(figsize=(10, 6))
mapping_dict = {'yes': 1, 'no': 0}

churn_num = churn[['changer', 'changem', 'revenue', 'mou', 'overage', 'roam', 'conference', 'months', 'uniqsubs', 'custcare', 'retcalls', 'dropvce', 'eqpdays']]
for col in churn_num.columns:
    if churn_num[col].dtype == 'object':  # only apply map to object columns
        churn_num[col] = churn_num[col].map(mapping_dict)

# Now, convert the remaining non-numeric values to NaN
for col in churn_num.columns:
    churn_num[col] = pd.to_numeric(churn_num[col], errors='coerce')

corr = churn_num.corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)
_ =plt.title('Correlation Heatmap')
plt.show()
/var/folders/28/cfl1_cfs3bb536qkz8wkys_w0000gn/T/ipykernel_85583/2138873192.py:11: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

plt.figure(figsize=(10, 6))
sns.scatterplot(x='mou', y='revenue', hue='churn', data=churn, palette=['#ff9999', '#66b3ff'])
_ =plt.title('Revenue vs. Minutes of Use by Churn Status')
_ =plt.xlabel('Minutes of Use')
_ =plt.ylabel('Revenue')
plt.show()

Row

# Calculate summary statistics
total_customers = churn.shape[0]
churned_customers = churn[churn['churn'] == 1].shape[0]
non_churned_customers = churn[churn['churn'] == 0].shape[0]
churned_percentage = (churned_customers / total_customers) * 100
non_churned_percentage = (non_churned_customers / total_customers) * 100

# Revenue and usage metrics
revenue_stats = churn['revenue'].describe()
mou_stats = churn['mou'].describe()
overage_stats = churn['overage'].describe()
roam_stats = churn['roam'].describe()
conference_stats = churn['conference'].describe()

# Customer service interaction
custcare_stats = churn['custcare'].describe()
retcalls_stats = churn['retcalls'].describe()
dropvce_stats = churn['dropvce'].describe()

# Device and service attributes
refurbished_percentage = (churn['refurb'] == 1).mean() * 100
smartphone_percentage = (churn['smartphone'] == 1).mean() * 100
highcreditr_percentage = (churn['highcreditr'] == 1).mean() * 100
mcycle_percentage = (churn['mcycle'] == 1).mean() * 100
car_percentage = (churn['car'] == 1).mean() * 100
travel_percentage = (churn['travel'] == 1).mean() * 100

# Regional and demographic insights
region_distribution = churn['region'].value_counts(normalize=True) * 100
occupation_distribution = churn['occupation'].value_counts(normalize=True) * 100

# Create a summary statistics table
summary_table = pd.DataFrame({
    'Metric': [
        'Total Customers', 'Churned Customers', 'Non-Churned Customers', 'Churned Percentage', 'Non-Churned Percentage',
        'Mean Revenue', 'Median Revenue', 'Revenue Std Dev',
        'Mean MOU', 'Median MOU', 'MOU Std Dev',
        'Mean Overage', 'Median Overage', 'Overage Std Dev',
        'Mean Roaming Calls', 'Median Roaming Calls', 'Roaming Calls Std Dev',
        'Mean Conference Calls', 'Median Conference Calls', 'Conference Calls Std Dev',
        'Mean Customer Care Calls', 'Median Customer Care Calls', 'Customer Care Calls Std Dev',
        'Mean Retention Calls', 'Median Retention Calls', 'Retention Calls Std Dev',
        'Mean Dropped Calls', 'Median Dropped Calls', 'Dropped Calls Std Dev',
        'Percentage with Refurbished Devices', 'Percentage with Smartphones',
        'Percentage with High Credit Rating', 'Percentage with Motorcycles',
        'Percentage with Cars', 'Percentage with International Travel',
    ],
    'Value': [
        total_customers, churned_customers, non_churned_customers, churned_percentage, non_churned_percentage,
        revenue_stats['mean'], revenue_stats['50%'], revenue_stats['std'],
        mou_stats['mean'], mou_stats['50%'], mou_stats['std'],
        overage_stats['mean'], overage_stats['50%'], overage_stats['std'],
        roam_stats['mean'], roam_stats['50%'], roam_stats['std'],
        conference_stats['mean'], conference_stats['50%'], conference_stats['std'],
        custcare_stats['mean'], custcare_stats['50%'], custcare_stats['std'],
        retcalls_stats['mean'], retcalls_stats['50%'], retcalls_stats['std'],
        dropvce_stats['mean'], dropvce_stats['50%'], dropvce_stats['std'],
        refurbished_percentage, smartphone_percentage,
        highcreditr_percentage, mcycle_percentage,
        car_percentage, travel_percentage,
    ]
})

summary_table
Metric Value
0 Total Customers 69000.000000
1 Churned Customers 0.000000
2 Non-Churned Customers 0.000000
3 Churned Percentage 0.000000
4 Non-Churned Percentage 0.000000
5 Mean Revenue 56.959203
6 Median Revenue 46.000000
7 Revenue Std Dev 43.439200
8 Mean MOU 525.822290
9 Median MOU 366.000000
10 MOU Std Dev 530.024730
11 Mean Overage 64.076725
12 Median Overage 40.000000
13 Overage Std Dev 68.671857
14 Mean Roaming Calls 1.195493
15 Median Roaming Calls 0.000000
16 Roaming Calls Std Dev 9.083502
17 Mean Conference Calls 0.276275
18 Median Conference Calls 0.000000
19 Conference Calls Std Dev 1.173325
20 Mean Customer Care Calls 2.377652
21 Median Customer Care Calls 0.000000
22 Customer Care Calls Std Dev 4.253015
23 Mean Retention Calls 0.035290
24 Median Retention Calls 0.000000
25 Retention Calls Std Dev 0.201412
26 Mean Dropped Calls 2.986913
27 Median Dropped Calls 2.000000
28 Dropped Calls Std Dev 4.539006
29 Percentage with Refurbished Devices 0.000000
30 Percentage with Smartphones 0.000000
31 Percentage with High Credit Rating 0.000000
32 Percentage with Motorcycles 0.000000
33 Percentage with Cars 0.000000
34 Percentage with International Travel 0.000000

Data

show(churn)
churn changer changem revenue mou overage roam conference months uniqsubs custcare retcalls dropvce eqpdays refurb smartphone highcreditr mcycle car travel region occupation training representative
Loading ITables v2.0.1 from the internet... (need help?)
# show(churn[churn['product'] == product_type]
#   .groupby(['contract', 'state'])
#   .size()
#   .reset_index(name='count'))