Skip to content

Commit

Permalink
gamma and lognormal
Browse files Browse the repository at this point in the history
  • Loading branch information
kanodiaayush committed Dec 14, 2023
1 parent 24cb867 commit f1e98d5
Show file tree
Hide file tree
Showing 6 changed files with 325 additions and 40 deletions.
31 changes: 20 additions & 11 deletions bemb/model/bayesian_coefficient.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self,
If a tensor with shape (num_classes, dim) is supplied, supplying a (num_classes, dim) tensor is amount
to specifying a different prior variance for each entry in the coefficient.
Defaults to 1.0.
distribution (str, optional): the distribution of the coefficient. Currently we support 'gaussian' and 'gamma'.
distribution (str, optional): the distribution of the coefficient. Currently we support 'gaussian', 'gamma' and 'lognormal'.
Defaults to 'gaussian'.
"""
super(BayesianCoefficient, self).__init__()
Expand All @@ -75,7 +75,7 @@ def __init__(self,

self.variation = variation

assert distribution in ['gaussian', 'gamma'], f'Unsupported distribution {distribution}'
assert distribution in ['gaussian', 'gamma', 'lognormal'], f'Unsupported distribution {distribution}'
if distribution == 'gamma':
'''
assert not obs2prior, 'Gamma distribution is not supported for obs2prior at present.'
Expand All @@ -84,13 +84,15 @@ def __init__(self,
assert mean > 0, 'Gamma distribution requires mean > 0'
assert variance > 0, 'Gamma distribution requires variance > 0'
# shape (concentration) is mean^2/variance, rate is variance/mean for Gamma distribution.
'''
shape = prior_mean ** 2 / prior_variance
rate = prior_mean / prior_variance
# prior_mean stores ln(shape) for gamma
prior_mean = np.log(shape)
# prior_variance stores rate for gamma
prior_variance = rate
'''
prior_mean = np.log(prior_mean)
prior_variance = prior_variance
# prior_mean = np.log(prior_mean)
# prior_variance = prior_variance

self.distribution = distribution

Expand Down Expand Up @@ -141,13 +143,13 @@ def __init__(self,
num_classes, dim) * self.prior_variance)

# create variational distribution.
if self.distribution == 'gaussian':
if self.distribution == 'gaussian' or self.distribution == 'lognormal':
self.variational_mean_flexible = nn.Parameter(
torch.randn(num_classes, dim), requires_grad=True)
# TOOD(kanodiaayush): initialize the gamma distribution variational mean in a more principled way.
elif self.distribution == 'gamma':
# initialize using uniform distribution between 0.5 and 1.5
# for a gamma distribution, we store the concentration as log(concentration) = variational_mean_flexible
# for a gamma distribution, we store the concentration (shape) as log(concentration) = variational_mean_flexible
self.variational_mean_flexible = nn.Parameter(
torch.rand(num_classes, dim) + 0.5, requires_grad=True)

Expand Down Expand Up @@ -200,7 +202,7 @@ def variational_mean(self) -> torch.Tensor:

if self.distribution == 'gamma':
# M = torch.pow(M, 2) + 0.000001
M = M.exp() / self.variational_logstd.exp()
M = (torch.minimum((M.exp() + 0.000001), torch.tensor(1e3))) / self.variational_logstd.exp()

if self.is_H and (self.H_zero_mask is not None):
# a H-variable with zero-entry restriction.
Expand Down Expand Up @@ -255,13 +257,19 @@ def log_prior(self,
else:
mu = self.prior_zero_mean

if self.distribution == 'gaussian':
if self.distribution == 'gaussian' or self.distribution == 'lognormal':
out = LowRankMultivariateNormal(loc=mu,
cov_factor=self.prior_cov_factor,
cov_diag=self.prior_cov_diag).log_prob(sample)
elif self.distribution == 'gamma':
concentration = torch.exp(mu)
rate = self.prior_variance
'''
print(concentration)
print('concentration')
print(rate)
print('rate')
'''
out = Gamma(concentration=concentration,
rate=rate).log_prob(sample)
# sum over the last dimension
Expand Down Expand Up @@ -315,14 +323,15 @@ def rsample(self, num_seeds: int = 1) -> Union[torch.Tensor, Tuple[torch.Tensor]
def variational_distribution(self) -> Union[LowRankMultivariateNormal, Gamma]:
"""Constructs the current variational distribution of the coefficient from current variational mean and covariance.
"""
if self.distribution == 'gaussian':
if self.distribution == 'gaussian' or self.distribution == 'lognormal':
return LowRankMultivariateNormal(loc=self.variational_mean,
cov_factor=self.variational_cov_factor,
cov_diag=torch.exp(self.variational_logstd))
elif self.distribution == 'gamma':
# for a gamma distribution, we store the concentration as log(concentration) = variational_mean_flexible
assert self.variational_mean_fixed == None, 'Gamma distribution does not support fixed mean'
concentration = self.variational_mean_flexible.exp()
concentration = self.variational_mean_flexible.exp() + 0.000001
concentration = torch.minimum(concentration, torch.tensor(1e3))
# for gamma distribution, we store the rate as log(rate) = variational_logstd
rate = torch.exp(self.variational_logstd)
return Gamma(concentration=concentration, rate=rate)
Expand Down
16 changes: 14 additions & 2 deletions bemb/model/bemb.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ def __init__(self,
if (not self.obs2prior_dict[coef_name]) and (H_zero_mask is not None):
raise ValueError(f'You specified H_zero_mask for {coef_name}, but obs2prior is False for this coefficient.')

print(coef_name)
coef_dict[coef_name] = BayesianCoefficient(variation=variation,
num_classes=variation_to_num_classes[variation],
obs2prior=self.obs2prior_dict[coef_name],
Expand Down Expand Up @@ -695,19 +696,28 @@ def sample_coefficient_dictionary(self, num_seeds: int, deterministic: bool = Fa
sample_dict = dict()
for coef_name, coef in self.coef_dict.items():
if deterministic:
sample_dict[coef_name] = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
s = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
if coef.distribution == 'lognormal':
s = torch.exp(s)
sample_dict[coef_name] = s
if coef.obs2prior:
sample_dict[coef_name + '.H'] = coef.prior_H.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
else:
s = coef.rsample(num_seeds)
if coef.obs2prior:
# sample both obs2prior weight and realization of variable.
assert isinstance(s, tuple) and len(s) == 2
sample_dict[coef_name] = s[0]
if coef.distribution == 'lognormal':
ss = torch.exp(s[0])
else:
ss = s[0]
sample_dict[coef_name] = ss
sample_dict[coef_name + '.H'] = s[1]
else:
# only sample the realization of variable.
assert torch.is_tensor(s)
if coef.distribution == 'lognormal':
s = torch.exp(s)
sample_dict[coef_name] = s
return sample_dict

Expand Down Expand Up @@ -1005,6 +1015,8 @@ def reshape_observable(obs, name):
R, P, I, num_obs, latent_dim)
coef_sample_1 = coef_sample_1.view(
R, P, I, num_obs, latent_dim)
# coef_sample_0 = torch.exp(coef_sample_0)
# coef_sample_1 = torch.exp(coef_sample_1)
# compute the factorized coefficient with shape (R, P, I, O).
coef = (coef_sample_0 * coef_sample_1).sum(dim=-1)

Expand Down
52 changes: 49 additions & 3 deletions bemb/model/bemb_chunked.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def __init__(self,
num_items: int,
pred_item: bool,
num_classes: int = 2,
coef_dist_dict: Dict[str, str] = {'default' : 'gaussian'},
H_zero_mask_dict: Optional[Dict[str, torch.BoolTensor]] = None,
prior_mean: Union[float, Dict[str, float]] = 0.0,
prior_variance: Union[float, Dict[str, float]] = 1.0,
Expand Down Expand Up @@ -75,6 +76,14 @@ def __init__(self,
lambda_item + theta_user * alpha_item + gamma_user * beta_item * price_obs
See the doc-string of parse_utility for an example.
coef_dist_dict (Dict[str, str]): a dictionary mapping coefficient name to coefficient distribution name.
The coefficient distribution name can be one of the following:
1. 'gaussian'
2. 'gamma' - obs2prior is not supported for gamma coefficients
If a coefficient does not appear in the dictionary, it will be assigned the distribution specified
by the 'default' key. By default, the default distribution is 'gaussian'.
For coefficients which have gamma distributions, prior mean and variance MUST be specified in the prior_mean and prior_variance arguments if obs2prior is False for this coefficient. If obs2prior is True, prior_variance is still required
obs2prior_dict (Dict[str, bool]): a dictionary maps coefficient name (e.g., 'lambda_item')
to a boolean indicating if observable (e.g., item_obs) enters the prior of the coefficient.
Expand Down Expand Up @@ -119,6 +128,8 @@ def __init__(self,
If no `prior_mean['default']` is provided, the default prior mean will be 0.0 for those coefficients
not in the prior_mean.keys().
For coefficients with gamma distributions, prior_mean specifies the shape parameter of the gamma prior.
Defaults to 0.0.
prior_variance (Union[float, Dict[str, float]], Dict[str, torch. Tensor]): the variance of prior distribution
Expand All @@ -138,6 +149,8 @@ def __init__(self,
If no `prior_variance['default']` is provided, the default prior variance will be 1.0 for those coefficients
not in the prior_variance.keys().
For coefficients with gamma distributions, prior_variance specifies the concentration parameter of the gamma prior.
Defaults to 1.0, which means all priors have identity matrix as the covariance matrix.
num_users (int, optional): number of users, required only if coefficient or observable
Expand Down Expand Up @@ -172,6 +185,7 @@ def __init__(self,
self.utility_formula = utility_formula
self.obs2prior_dict = obs2prior_dict
self.coef_dim_dict = coef_dim_dict
self.coef_dist_dict = coef_dist_dict
if H_zero_mask_dict is not None:
self.H_zero_mask_dict = H_zero_mask_dict
else:
Expand Down Expand Up @@ -285,6 +299,23 @@ def __init__(self,
for additive_term in self.formula:
for coef_name in additive_term['coefficient']:
variation = coef_name.split('_')[-1]

if coef_name not in self.coef_dist_dict.keys():
if 'default' in self.coef_dist_dict.keys():
self.coef_dist_dict[coef_name] = self.coef_dist_dict['default']
else:
warnings.warn(f"You provided a dictionary of coef_dist_dict, but coefficient {coef_name} is not a key in it. Supply a value for 'default' in the coef_dist_dict dictionary to use that as default value (e.g., coef_dist_dict['default'] = 'gaussian'); now using distribution='gaussian' since this is not supplied.")
self.coef_dist_dict[coef_name] = 'gaussian'

'''
elif self.coef_dist_dict[coef_name] == 'gamma':
if not self.obs2prior_dict[coef_name]:
assert isinstance(self.prior_mean, dict) and coef_name in self.prior_mean.keys(), \
f"Prior mean for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."
assert isinstance(self.prior_variance, dict) and coef_name in self.prior_variance.keys(), \
f"Prior variance for {coef_name} needs to be provided because it's posterior is estimated as a gamma distribution."
'''

if isinstance(self.prior_mean, dict):
# the user didn't specify prior mean for this coefficient.
if coef_name not in self.prior_mean.keys():
Expand Down Expand Up @@ -326,6 +357,8 @@ def __init__(self,
for ii in range(chunk_sizes[0]):
bayesian_coefs_inner = []
for jj in range(chunk_sizes[1]):
if self.coef_dist_dict[coef_name] == 'gamma' and not self.obs2prior_dict[coef_name]:
assert mean > 0, 'shape of gamma distribution specifieid as prior_mean needs to be > 0'
bayesian_coefs_inner.append(BayesianCoefficient(variation=variation,
num_classes=variation_to_num_classes[variation],
obs2prior=self.obs2prior_dict[coef_name],
Expand All @@ -334,8 +367,8 @@ def __init__(self,
prior_mean=mean,
prior_variance=s2,
H_zero_mask=H_zero_mask,
is_H=False
)
is_H=False,
distribution=self.coef_dist_dict[coef_name]),
)
bayesian_coefs_inner = nn.ModuleList(bayesian_coefs_inner)
bayesian_coefs.append(bayesian_coefs_inner)
Expand Down Expand Up @@ -655,7 +688,10 @@ def sample_coefficient_dictionary(self, num_seeds: int, deterministic: bool=Fals
for ii, bayesian_coeffs_inner in enumerate(bayesian_coeffs):
# inner_list = []
for jj, coef in enumerate(bayesian_coeffs_inner):
this_sample[:, :, :, ii, jj] = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
s = coef.variational_distribution.mean.unsqueeze(dim=0) # (1, num_*, dim)
if coef.distribution == 'lognormal':
s = s.exp()
this_sample[:, :, :, ii, jj] = s
# inner_list.append(coef.variational_distribution.mean.unsqueeze(dim=0)) # (1, num_*, dim)
# inner_list.append(coef.variational_distribution.mean.unsqueeze(dim=0)) # (1, num_*, dim)
# outer_list.append(inner_list)
Expand All @@ -680,13 +716,19 @@ def sample_coefficient_dictionary(self, num_seeds: int, deterministic: bool=Fals
if coef.obs2prior:
# sample both obs2prior weight and realization of variable.
assert isinstance(s, tuple) and len(s) == 2
if coef.distribution == 'lognormal':
ss = torch.exp(s[0])
else:
ss = s[0]
this_sample[:, :, :, ii, jj] = s[0]
this_sample_H[:, :, :, ii, jj] = s[1]
# inner_list.append(s[0])
# inner_list_H.append(s[1])
else:
# only sample the realization of variable.
assert torch.is_tensor(s)
if coef.distribution == 'lognormal':
s = torch.exp(s)
this_sample[:, :, :, ii, jj] = s
# inner_list.append(s)
# outer_list.append(inner_list)
Expand Down Expand Up @@ -1004,6 +1046,7 @@ def reshape_observable(obs, name):
sample_dict[coef_name], coef_name)
assert coef_sample.shape == (R, total_computation, 1)
additive_term = coef_sample.view(R, total_computation)
additive_term *= term['sign']

# Type II: factorized coefficient, e.g., <theta_user, lambda_item>.
elif len(term['coefficient']) == 2 and term['observable'] is None:
Expand All @@ -1019,6 +1062,7 @@ def reshape_observable(obs, name):
R, total_computation, positive_integer)

additive_term = (coef_sample_0 * coef_sample_1).sum(dim=-1)
additive_term *= term['sign']

# Type III: single coefficient multiplied by observable, e.g., theta_user * x_obs_item.
elif len(term['coefficient']) == 1 and term['observable'] is not None:
Expand All @@ -1038,6 +1082,7 @@ def reshape_observable(obs, name):
price_coeffs = obs_coeff

additive_term = (coef_sample * obs).sum(dim=-1)
additive_term *= term['sign']

# Type IV: factorized coefficient multiplied by observable.
# e.g., gamma_user * beta_item * price_obs.
Expand Down Expand Up @@ -1071,6 +1116,7 @@ def reshape_observable(obs, name):
price_coeffs = obs_coeff

additive_term = (coef * obs).sum(dim=-1)
additive_term *= term['sign']

else:
raise ValueError(f'Undefined term type: {term}')
Expand Down
Loading

0 comments on commit f1e98d5

Please sign in to comment.