-
Notifications
You must be signed in to change notification settings - Fork 2
/
sbg.py
134 lines (96 loc) · 4.05 KB
/
sbg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# Credit to https://github.com/jdmaturen for this implementation
"""
Implementation of the shifted beta geometric (sBG) model from "How to Project Customer Retention" (Fader and Hardie 2006)
http://www.brucehardie.com/papers/021/sbg_2006-05-30.pdf
And valuation calculations from:
Customer-Base Valuation in a Contractual Setting: The Perils of Ignoring Heterogeneity" (Fader and Hardie 2009)
https://marketing.wharton.upenn.edu/files/?whdmsaction=public:main.file&fileID=336
Apache 2 License
"""
from math import log
import numpy as np
from scipy.optimize import minimize
from scipy.special import hyp2f1
__author__ = 'JD Maturen'
def generate_probabilities(alpha, beta, x):
"""Generate probabilities in one pass for all t in x"""
p = [alpha / (alpha + beta)]
for t in xrange(1, x):
pt = (beta + t - 1) / (alpha + beta + t) * p[t-1]
p.append(pt)
return p
def probability(alpha, beta, t):
"""Probability function P"""
if t == 0:
return alpha / (alpha + beta)
return (beta + t - 1) / (alpha + beta + t) * probability(alpha, beta, t-1)
def survivor(probabilities, t):
"""Survivor function S"""
s = 1 - probabilities[0]
for x in xrange(1, t + 1):
s = s - probabilities[x]
return s
def log_likelihood(alpha, beta, data, survivors=None):
"""Function to maximize to obtain ideal alpha and beta parameters"""
if alpha <= 0 or beta <= 0:
return -1000
if survivors is None:
survivors = survivor_rates(data)
probabilities = generate_probabilities(alpha, beta, len(data))
final_survivor_likelihood = survivor(probabilities, len(data) - 1)
return sum([s * log(probabilities[t]) for t, s in enumerate(survivors)]) + data[-1] * log(final_survivor_likelihood)
def log_likelihood_multi_cohort(alpha, beta, data):
"""Function to maximize to obtain ideal alpha and beta parameters using data across multiple (contiguous) cohorts.
`data` must be a list of cohorts each with an absolute number per observed time unit."""
if alpha <= 0 or beta <= 0:
return -1000
probabilities = generate_probabilities(alpha, beta, len(data[0]))
cohorts = len(data)
total = 0
for i, cohort in enumerate(data):
total += sum([(cohort[j]-cohort[j+1])*log(probabilities[j]) for j in xrange(len(cohort)-1)])
total += cohort[-1] * log(survivor(probabilities, cohorts - i - 1))
return total
def survivor_rates(data):
s = []
for i, x in enumerate(data):
if i == 0:
s.append(1 - data[0])
else:
s.append(data[i-1] - data[i])
return s
def maximize(data):
survivors = survivor_rates(data)
func = lambda x: -log_likelihood(x[0], x[1], data, survivors)
x0 = np.array([100., 100.])
res = minimize(func, x0, method='nelder-mead', options={'xtol': 1e-8})
return res
def maximize_multi_cohort(data):
func = lambda x: -log_likelihood_multi_cohort(x[0], x[1], data)
x0 = np.array([1., 1.])
res = minimize(func, x0, method='nelder-mead', options={'xtol': 1e-8})
return res
def predicted_retention(alpha, beta, t):
"""Predicted retention probability at t. Function 8 in the paper"""
return (beta + t) / (alpha + beta + t)
def predicted_survival(alpha, beta, x):
"""Predicted survival probability, i.e. percentage of customers retained, for all t in x.
Function 1 in the paper"""
s = [predicted_retention(alpha, beta, 0)]
for t in xrange(1, x):
s.append(predicted_retention(alpha, beta, t) * s[t-1])
return s
def fit(data):
res = maximize(data)
if res.status != 0:
raise Exception(res.message)
return res.x
def fit_multi_cohort(data):
res = maximize_multi_cohort(data)
if res.status != 0:
raise Exception(res.message)
return res.x
def derl(alpha, beta, d, n):
"""discounted expected residual lifetime from "Customer-Base Valuation in a Contractual Setting: The Perils of
Ignoring Heterogeneity" (Fader and Hardie 2009)"""
return predicted_retention(alpha, beta, n) * hyp2f1(1, beta + n + 1, alpha + beta + n + 1, 1 / (1 + d))