Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace GPy with Gpytorch in TSEMO #94

Merged
merged 1 commit into from
Feb 26, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 87 additions & 112 deletions summit/strategies/tsemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,10 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
next_experiments : :class:`~summit.utils.data.DataSet`
A Dataset object with the suggested experiments
"""
from GPy.models import GPRegression as gpr
from GPy.core.parameterization.priors import LogGaussian
from GPy.kern import Exponential, Matern32, Matern52, RBF
import pyrff
from pymoo.algorithms.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.factory import get_termination
import pyrff

# Suggest lhs initial design or append new experiments to previous experiments
if prev_res is None:
Expand Down Expand Up @@ -197,122 +194,39 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg
outputs - self.output_mean.to_numpy()
) / self.output_std.to_numpy()

# Set up models
input_dim = self.kern_dim
self.models = {
v.name: gpr(
# train and sample
n_outputs = len(self.domain.output_variables)
train_results = [0] * n_outputs
rmse_train_spectral = np.zeros(n_outputs)
for i, v in enumerate(self.domain.output_variables):
# Training
train_results[i] = self._train_sample(
v.name,
inputs_scaled.to_numpy(),
outputs_scaled[[v.name]].to_numpy(),
kernel=self.kernel(input_dim=input_dim, ARD=True),
)
for v in self.domain.output_variables
}

output_dim = len(self.domain.output_variables)
rmse_train = np.zeros(output_dim)
rmse_train_spectral = np.zeros(output_dim)
lengthscales = [None for _ in range(output_dim)]
variances = [None for _ in range(output_dim)]
noises = [None for _ in range(output_dim)]
rffs = [None for _ in range(output_dim)]
i = 0
num_restarts = kwargs.get(
"num_restarts", 100
) # This is a kwarg solely for debugging
self.logger.debug(
f"Fitting models (number of optimization restarts={num_restarts})\n"
)
for name, model in self.models.items():
# Constrain hyperparameters
model.kern.lengthscale.constrain_bounded(
np.sqrt(1e-3), np.sqrt(1e3), warning=False
)
model.kern.lengthscale.set_prior(LogGaussian(0, 10), warning=False)
model.kern.variance.constrain_bounded(
np.sqrt(1e-3), np.sqrt(1e3), warning=False
)
model.kern.variance.set_prior(LogGaussian(-6, 10), warning=False)
model.Gaussian_noise.constrain_bounded(np.exp(-6), 1, warning=False)

# Train model
model.optimize_restarts(
num_restarts=num_restarts, max_iters=10000, parallel=True, verbose=False
)

# self.logger.info model hyperparameters
lengthscales[i] = model.kern.lengthscale.values
variances[i] = model.kern.variance.values[0]
noises[i] = model.Gaussian_noise.variance.values[0]
self.logger.debug(f"Model {name} lengthscales: {lengthscales[i]}")
self.logger.debug(f"Model {name} variance: {variances[i]}")
self.logger.debug(f"Model {name} noise: {noises[i]}")

# Model validation
rmse_train[i] = rmse(
model.predict(inputs_scaled.to_numpy())[0],
outputs_scaled[[name]].to_numpy(),
mean=self.output_mean[name].values[0],
std=self.output_std[name].values[0],
)
self.logger.debug(f"RMSE train {name} = {rmse_train[i].round(2)}")

# Spectral sampling
self.logger.debug(
f"Spectral sampling {name} with {self.n_spectral_points} spectral points."
n_retries=self.n_retries,
)
if type(model.kern) == Exponential:
matern_nu = 1
elif type(model.kern) == Matern32:
matern_nu = 3
elif type(model.kern) == Matern52:
matern_nu = 5
elif type(model.kern) == RBF:
matern_nu = np.inf
else:
raise TypeError(
"Spectral sample currently only works with Matern type kernels, including RBF."
)

for _ in range(self.n_retries):
try:
rffs[i] = pyrff.sample_rff(
lengthscales=lengthscales[i],
scaling=np.sqrt(variances[i]),
noise=noises[i],
kernel_nu=matern_nu,
X=inputs_scaled.to_numpy(),
Y=outputs_scaled[[name]].to_numpy()[:, 0],
M=self.n_spectral_points,
)
break
except np.linalg.LinAlgError as e:
self.logger.error(e)
except ValueError as e:
self.logger.error(e)
if rffs[i] is None:
raise RuntimeError(
f"Spectral sampling failed after {self.n_retries} retries."
)
sample_f = lambda x: np.atleast_2d(rffs[i](x)).T

# Evaluate spectral samples
rff = train_results[i]["rff"]
sample_f = lambda x: np.atleast_2d(rff(x)).T
rmse_train_spectral[i] = rmse(
sample_f(inputs_scaled.to_numpy()),
outputs_scaled[[name]].to_numpy(),
mean=self.output_mean[name].values[0],
std=self.output_std[name].values[0],
outputs_scaled[[v.name]].to_numpy(),
mean=self.output_mean[v.name].values[0],
std=self.output_std[v.name].values[0],
)
self.logger.debug(
f"RMSE train spectral {name} = {rmse_train_spectral[i].round(2)}"
f"RMSE train spectral {v.name} = {rmse_train_spectral[i].round(2)}"
)

i += 1

# Save spectral samples
rffs = [train_result["rff"] for train_result in train_results]
dp_results = get_summit_config_path() / "tsemo" / str(self.uuid_val)
os.makedirs(dp_results, exist_ok=True)
pyrff.save_rffs(rffs, pathlib.Path(dp_results, "models.h5"))

# NSGAII internal optimisation
# NSGAII internal optimisation with spectral samples
self.logger.info("Optimizing models using NSGAII.")
optimizer = NSGA2(pop_size=self.pop_size)
problem = TSEMOInternalWrapper(
Expand Down Expand Up @@ -352,20 +266,81 @@ def suggest_experiments(self, num_experiments, prev_res: DataSet = None, **kwarg

# Add model hyperparameters as metadata columns
self.iterations += 1
i = 0
for name, model in self.models.items():
result[(f"{name}_variance", "METADATA")] = variances[i]
result[(f"{name}_noise", "METADATA")] = noises[i]
for var, l in zip(self.domain.input_variables, lengthscales[i]):
for res in train_results:
name = res["model_name"]
result[(f"{name}_variance", "METADATA")] = res["outputscale"]
result[(f"{name}_noise", "METADATA")] = res["noise"]
for var, l in zip(self.domain.input_variables, res["lengthscales"]):
result[(f"{name}_{var.name}_lengthscale", "METADATA")] = l
result[("iterations", "METADATA")] = self.iterations
i += 1
result[("iterations", "METADATA")] = self.iterations
return result
else:
self.logger.warning("No suggestions found.")
self.iterations += 1
return None

def _train_sample(self, model_name, X, y, **kwargs):
"""Train model and take spectral samples"""
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_model
from gpytorch.mlls.exact_marginal_log_likelihood import (
ExactMarginalLogLikelihood,
)
import pyrff
import torch

# Convert to tensors
X = torch.from_numpy(X)
y = torch.from_numpy(y)

# Train the model
model = SingleTaskGP(X, y)
mll = ExactMarginalLogLikelihood(model.likelihood, model)
fit_gpytorch_model(mll)

# self.logger.info model hyperparameters
lengthscales = model.covar_module.base_kernel.lengthscale.detach()[0].numpy()
outputscale = model.covar_module.outputscale.detach().numpy()
noise = model.likelihood.noise_covar.noise.detach().numpy()[0]
self.logger.debug(f"Model {model_name} lengthscales: {lengthscales}")
self.logger.debug(f"Model {model_name} variance: {outputscale}")
self.logger.debug(f"Model {model_name} noise: {noise}")

# Spectral sampling
n_spectral_points = kwargs.get("n_spectral_points", 1500)
n_retries = kwargs.get("n_retries", 10)
self.logger.debug(
f"Spectral sampling {model_name} with {n_spectral_points} spectral points."
)
rff = None
nu = model.covar_module.base_kernel.nu
for _ in range(n_retries):
try:
rff = pyrff.sample_rff(
lengthscales=lengthscales,
scaling=np.sqrt(outputscale),
noise=noise,
kernel_nu=nu,
X=X.numpy(),
Y=y[:, 0].numpy(),
M=n_spectral_points,
)
break
except np.linalg.LinAlgError as e:
self.logger.error(e)
except ValueError as e:
self.logger.error(e)
if rff is None:
raise RuntimeError(f"Spectral sampling failed after {n_retries} retries.")

return dict(
model_name=model_name,
rff=rff,
lengthscales=lengthscales,
outputscale=outputscale,
noise=noise,
)

def reset(self):
"""Reset TSEMO state"""
self.all_experiments = None
Expand Down