# plot attributes plt.figure(figsize=(12,8)) plt.title('All Subscriptions', fontsize='xx-large') # y-axis plt.ylabel('Survival Rate', fontsize='x-large') plt.ylim((0.0, 1.0)) plt.yticks(np.arange(0.0,1.0,0.05)) plt.axhline(0.5, color='red', alpha=0.75, linestyle=':') # median line in red # x-axis plt.xlabel('Timeline (Days)', fontsize='x-large') plt.xticks(range(0,800,30)) plt.axvline(30, color='gray', alpha=0.5, linestyle=':') # 30-day gray dashed line plt.axvline(180, color='gray', alpha=0.5, linestyle=':') # 30-day gray dashed line plt.axvline(365, color='gray', alpha=0.5, linestyle=':') # 1-year gray dashed line plt.axvline(365*2, color='gray', alpha=0.5, linestyle=':') # 2-year gray dashed line kmf.plot_survival_function()
%sql -- identify registration channel, initial payment method and initial payment plan days by subscription DROP VIEW IF EXISTS subscription_attributes; CREATE TEMP VIEW subscription_attributes AS WITH transaction_attributes AS ( -- get payment method and plan days for each subscriber's transaction date SELECT a.msno, a.trans_at, FIRST(b.payment_method_id) as payment_method_id, FIRST(b.payment_plan_days) as payment_plan_days FROM ( -- base transaction dates SELECT msno, transaction_date as trans_at, MAX(membership_expire_date) as expires_at FROM kkbox.transactions GROUP BY msno, transaction_date ) a INNER JOIN kkbox.transactions b ON a.msno=b.msno AND a.trans_at=b.transaction_date AND a.expires_at=b.membership_expire_date WHERE b.payment_plan_days > 0 GROUP BY a.msno, a.trans_at ) SELECT m.*, n.payment_method_id as init_payment_method_id, n.payment_plan_days as init_payment_plan_days, COALESCE(CAST(o.registered_via as String), 'Unknown') as registered_via FROM kkbox.subscriptions m INNER JOIN transaction_attributes n ON m.msno=n.msno AND -- match on customer m.starts_at=n.trans_at -- and transaction date at start of transaction LEFT OUTER JOIN kkbox.members o -- membership info (assume stable across subscription) ON m.msno=o.msno ORDER BY m.subscription_id
# eliminate nominal channels channels_pd = subscriptions_pd[~subscriptions_pd['registered_via'].isin(['10','13','16'])] # configure the plot plt.figure(figsize=(12,8)) ax = plt.subplot(111) plt.title('By Registration Channel', fontsize='xx-large') # configure the x-axis plt.xlabel('Timeline (Days)', fontsize='x-large') plt.xticks(range(0,800,30)) # configure the y-axis plt.ylabel('Survival Rate', fontsize='x-large') plt.ylim((0.0, 1.0)) plt.yticks(np.arange(0.0,1.0,0.05)) # graph each curve on the plot for name, grouped_pd in channels_pd.groupby('registered_via'): kmf = KaplanMeierFitter(alpha=0.05) kmf.fit( grouped_pd['duration_days'], grouped_pd['churned'], label='Channel {0}'.format(name) ) kmf.plot(ax=ax)
With our subscription data prepared, we can now begin examining patterns of customer attrition observed with the KKBox music service. In this notebook, we'll get oriented to general patterns of customer dropout in preparation for the more detailed work taking place in the next notebook.
Last refresh: Never