# to render w/ a different product type:
# quarto render dashboard.qmd -P product_type:Premium
S-mobile Customer Churn
import pandas as pd
# from helpers import color_palette, plpipot_churn_reason, plot_contract_type, table_purchases
from matplotlib import rcParams
from itables import show
import matplotlib.pyplot as plt
import seaborn as sns
"savefig.bbox": "tight"})
rcParams.update({
= pd.read_parquet('s_mobile.parquet')
churn 'customer', axis=1, errors='ignore', inplace=True) churn.drop(
Churn
Row
dict(
= "archive",
icon = "light",
color = "69,000"
value )
{'icon': 'archive', 'color': 'light', 'value': '69,000'}
dict(
= "graph-down",
icon = "light",
color = "20,100"
value )
{'icon': 'graph-down', 'color': 'light', 'value': '20,100'}
dict(
= "graph-down",
icon = "light",
color = "48,900"
value )
{'icon': 'graph-down', 'color': 'light', 'value': '48,900'}
dict(
= "bullseye",
icon = "warning",
color = "20%"
value )
{'icon': 'bullseye', 'color': 'warning', 'value': '20%'}
Row
=(10, 6))
plt.figure(figsize='churn', y='custcare', data=churn, palette = ['#ff9999', '#66b3ff'])
sns.boxplot(x=plt.title('Customer Care Calls by Churn Status')
_ =plt.xlabel('Churn')
_ =plt.ylabel('Customer Care Calls')
_ plt.show()
/var/folders/28/cfl1_cfs3bb536qkz8wkys_w0000gn/T/ipykernel_85583/423151031.py:2: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.
=(10, 6))
plt.figure(figsize='refurb', hue='churn', data=churn, palette = ['#ff9999', '#66b3ff'])
sns.countplot(x=plt.title('Churn Rate by Refurbished Status')
_ =plt.xlabel('Refurbished Status')
_ =plt.ylabel('Count')
_ plt.show()
Row
=(10, 6))
plt.figure(figsize= {'yes': 1, 'no': 0}
mapping_dict
= churn[['changer', 'changem', 'revenue', 'mou', 'overage', 'roam', 'conference', 'months', 'uniqsubs', 'custcare', 'retcalls', 'dropvce', 'eqpdays']]
churn_num for col in churn_num.columns:
if churn_num[col].dtype == 'object': # only apply map to object columns
= churn_num[col].map(mapping_dict)
churn_num[col]
# Now, convert the remaining non-numeric values to NaN
for col in churn_num.columns:
= pd.to_numeric(churn_num[col], errors='coerce')
churn_num[col]
= churn_num.corr()
corr =True, cmap='coolwarm', center=0)
sns.heatmap(corr, annot=plt.title('Correlation Heatmap')
_ plt.show()
/var/folders/28/cfl1_cfs3bb536qkz8wkys_w0000gn/T/ipykernel_85583/2138873192.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
=(10, 6))
plt.figure(figsize='mou', y='revenue', hue='churn', data=churn, palette=['#ff9999', '#66b3ff'])
sns.scatterplot(x=plt.title('Revenue vs. Minutes of Use by Churn Status')
_ =plt.xlabel('Minutes of Use')
_ =plt.ylabel('Revenue')
_ plt.show()
Row
# Calculate summary statistics
= churn.shape[0]
total_customers = churn[churn['churn'] == 1].shape[0]
churned_customers = churn[churn['churn'] == 0].shape[0]
non_churned_customers = (churned_customers / total_customers) * 100
churned_percentage = (non_churned_customers / total_customers) * 100
non_churned_percentage
# Revenue and usage metrics
= churn['revenue'].describe()
revenue_stats = churn['mou'].describe()
mou_stats = churn['overage'].describe()
overage_stats = churn['roam'].describe()
roam_stats = churn['conference'].describe()
conference_stats
# Customer service interaction
= churn['custcare'].describe()
custcare_stats = churn['retcalls'].describe()
retcalls_stats = churn['dropvce'].describe()
dropvce_stats
# Device and service attributes
= (churn['refurb'] == 1).mean() * 100
refurbished_percentage = (churn['smartphone'] == 1).mean() * 100
smartphone_percentage = (churn['highcreditr'] == 1).mean() * 100
highcreditr_percentage = (churn['mcycle'] == 1).mean() * 100
mcycle_percentage = (churn['car'] == 1).mean() * 100
car_percentage = (churn['travel'] == 1).mean() * 100
travel_percentage
# Regional and demographic insights
= churn['region'].value_counts(normalize=True) * 100
region_distribution = churn['occupation'].value_counts(normalize=True) * 100
occupation_distribution
# Create a summary statistics table
= pd.DataFrame({
summary_table 'Metric': [
'Total Customers', 'Churned Customers', 'Non-Churned Customers', 'Churned Percentage', 'Non-Churned Percentage',
'Mean Revenue', 'Median Revenue', 'Revenue Std Dev',
'Mean MOU', 'Median MOU', 'MOU Std Dev',
'Mean Overage', 'Median Overage', 'Overage Std Dev',
'Mean Roaming Calls', 'Median Roaming Calls', 'Roaming Calls Std Dev',
'Mean Conference Calls', 'Median Conference Calls', 'Conference Calls Std Dev',
'Mean Customer Care Calls', 'Median Customer Care Calls', 'Customer Care Calls Std Dev',
'Mean Retention Calls', 'Median Retention Calls', 'Retention Calls Std Dev',
'Mean Dropped Calls', 'Median Dropped Calls', 'Dropped Calls Std Dev',
'Percentage with Refurbished Devices', 'Percentage with Smartphones',
'Percentage with High Credit Rating', 'Percentage with Motorcycles',
'Percentage with Cars', 'Percentage with International Travel',
],'Value': [
total_customers, churned_customers, non_churned_customers, churned_percentage, non_churned_percentage,'mean'], revenue_stats['50%'], revenue_stats['std'],
revenue_stats['mean'], mou_stats['50%'], mou_stats['std'],
mou_stats['mean'], overage_stats['50%'], overage_stats['std'],
overage_stats['mean'], roam_stats['50%'], roam_stats['std'],
roam_stats['mean'], conference_stats['50%'], conference_stats['std'],
conference_stats['mean'], custcare_stats['50%'], custcare_stats['std'],
custcare_stats['mean'], retcalls_stats['50%'], retcalls_stats['std'],
retcalls_stats['mean'], dropvce_stats['50%'], dropvce_stats['std'],
dropvce_stats[
refurbished_percentage, smartphone_percentage,
highcreditr_percentage, mcycle_percentage,
car_percentage, travel_percentage,
]
})
summary_table
Metric | Value | |
---|---|---|
0 | Total Customers | 69000.000000 |
1 | Churned Customers | 0.000000 |
2 | Non-Churned Customers | 0.000000 |
3 | Churned Percentage | 0.000000 |
4 | Non-Churned Percentage | 0.000000 |
5 | Mean Revenue | 56.959203 |
6 | Median Revenue | 46.000000 |
7 | Revenue Std Dev | 43.439200 |
8 | Mean MOU | 525.822290 |
9 | Median MOU | 366.000000 |
10 | MOU Std Dev | 530.024730 |
11 | Mean Overage | 64.076725 |
12 | Median Overage | 40.000000 |
13 | Overage Std Dev | 68.671857 |
14 | Mean Roaming Calls | 1.195493 |
15 | Median Roaming Calls | 0.000000 |
16 | Roaming Calls Std Dev | 9.083502 |
17 | Mean Conference Calls | 0.276275 |
18 | Median Conference Calls | 0.000000 |
19 | Conference Calls Std Dev | 1.173325 |
20 | Mean Customer Care Calls | 2.377652 |
21 | Median Customer Care Calls | 0.000000 |
22 | Customer Care Calls Std Dev | 4.253015 |
23 | Mean Retention Calls | 0.035290 |
24 | Median Retention Calls | 0.000000 |
25 | Retention Calls Std Dev | 0.201412 |
26 | Mean Dropped Calls | 2.986913 |
27 | Median Dropped Calls | 2.000000 |
28 | Dropped Calls Std Dev | 4.539006 |
29 | Percentage with Refurbished Devices | 0.000000 |
30 | Percentage with Smartphones | 0.000000 |
31 | Percentage with High Credit Rating | 0.000000 |
32 | Percentage with Motorcycles | 0.000000 |
33 | Percentage with Cars | 0.000000 |
34 | Percentage with International Travel | 0.000000 |
Data
show(churn)
churn | changer | changem | revenue | mou | overage | roam | conference | months | uniqsubs | custcare | retcalls | dropvce | eqpdays | refurb | smartphone | highcreditr | mcycle | car | travel | region | occupation | training | representative |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Loading ITables v2.0.1 from the internet...
(need help?) |
# show(churn[churn['product'] == product_type]
# .groupby(['contract', 'state'])
# .size()
# .reset_index(name='count'))