Skip to content

Commit

Permalink
debugging discrepancy
Browse files Browse the repository at this point in the history
  • Loading branch information
daneschi committed Oct 30, 2023
1 parent 658f5ef commit 038415f
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 38 deletions.
36 changes: 27 additions & 9 deletions linfa/discrepancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, model_name,
self.output_size = output_size
self.model_name = model_name
self.model_folder = model_folder
self.is_trained = False

# Assign LF model
self.lf_model = lf_model
Expand All @@ -46,7 +47,7 @@ def __init__(self, model_name,
self.var_out_std = torch.std(var_grid_out)

# Create surrogate
self.surrogate = FNN(input_size, output_size, arch=dnn_arch, device=self.device) if surrogate is None else surrogate
self.surrogate = FNN(input_size, output_size, arch=dnn_arch, device=self.device, init_zero=True) if surrogate is None else surrogate

def surrogate_save(self):
"""Save surrogate model to [self.name].sur and [self.name].npz
Expand All @@ -67,17 +68,28 @@ def surrogate_load(self):
# Read back the state dictionary from file
self.surrogate.load_state_dict(torch.load(self.model_folder + self.model_name + '.sur'))

def update(self, batch_x, max_iters=10000, lr=0.01, lr_exp=0.999, record_interval=500, store=True, reg=False):
def update(self, batch_x, max_iters=10000, lr=0.01, lr_exp=0.999, record_interval=50, store=True, reg=False, reg_penalty=0.0001):
"""Train surrogate model with pre-grid.
"""
print('')
print('--- Training model discrepancy')
print('')

self.is_trained = True

# LF model output at the current batch
y = self.lf_model(batch_x)

# Standardize inputs and outputs
var_grid = (self.var_grid_in - self.var_in_avg) / self.var_in_std
var_out = (self.var_grid_out - self.var_out_avg) / self.var_out_std
# The output is the discrepancy
var_out = self.var_grid_out - torch.mean(y,dim=1).unsqueeze(1)
# Update output stats
# self.var_out_avg = torch.mean(var_out)
# self.var_out_std = torch.std(var_out)
# Rescale outputs
# var_out = (var_out - self.var_out_avg) / self.var_out_std

# Set optimizer and scheduler
optimizer = torch.optim.RMSprop(self.surrogate.parameters(), lr=lr)
Expand All @@ -86,22 +98,21 @@ def update(self, batch_x, max_iters=10000, lr=0.01, lr_exp=0.999, record_interva
for i in range(max_iters):
# Set surrogate in training mode
self.surrogate.train()
# Surrogate returns a table with rows as batches and columns as variables considered
y = self.lf_model(batch_x)
# Surrogate returns a table with rows as batches and columns as variables considered
disc = self.surrogate(var_grid)

# Compute loss averaged over batches/variables
# Mean over the columns (batches)
# Mean over the rows (variables)
# Also we need to account for the number of repeated observations
loss = torch.tensor(0.0)
loss = torch.tensor(0.0)
# Loop over the number of observations
for loopA in range(var_out.size(1)):
loss += torch.mean(torch.mean((y + disc - var_out[:,loopA].unsqueeze(1)) ** 2,dim=1),dim=0)
loss += torch.sum((disc.flatten() - var_out[:,loopA]) ** 2)
if reg:
reg_loss = 0
for param in self.surrogate.parameters():
reg_loss += torch.abs(param).sum() * 0.0001
reg_loss += torch.abs(param).sum() * reg_penalty
loss += reg_loss
optimizer.zero_grad()
loss.backward()
Expand Down Expand Up @@ -129,7 +140,14 @@ def forward(self, var):
Value of the surrogate at x.
"""
return self.surrogate((var - self.var_in_avg) / self.var_in_std) * self.var_out_std + self.var_out_avg
# res = self.surrogate((var - self.var_in_avg) / self.var_in_std) * self.var_out_std + self.var_out_avg
res = self.surrogate((var - self.var_in_avg) / self.var_in_std)
# res = self.surrogate(var)
if not(self.is_trained):
zero_res = torch.zeros_like(res)
return zero_res
else:
return res

def test_surrogate():

Expand Down
2 changes: 1 addition & 1 deletion linfa/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
class FNN(nn.Module):
"""Fully Connected Neural Network"""

def __init__(self, input_size, output_size, arch=None, activation='relu', device='cpu'):
def __init__(self, input_size, output_size, arch=None, activation='relu', device='cpu',init_zero=False):
"""
Args:
input_size (int): Input size for FNN
Expand Down
28 changes: 19 additions & 9 deletions linfa/models/discrepancy_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def __init__(self, var_inputs):
## model constants
self.RConst = torch.tensor(8.314) # universal gas constant (J/ mol/ K)
self.data = None # dataset of model
self.stdRatio = 0.05 # standard deviation ratio
self.defOut = self.solve_lf(self.defParams)
self.stdRatio = 0.001 # standard deviation ratio
self.defOut = self.solve_t(self.defParams)

def solve_lf(self, cal_inputs):
def solve_t(self, cal_inputs):

num_batch = len(cal_inputs)
num_vars = len(self.var_in)
Expand All @@ -44,24 +44,33 @@ def solve_lf(self, cal_inputs):
return cov_frac

def solve_true(self, cal_inputs):


num_batch = len(cal_inputs)
num_vars = len(self.var_in)

# unpack variable input
T, P = torch.chunk(self.var_in, chunks = 2, dim = 1)
T = T.repeat(1, num_batch)
P = P.repeat(1, num_batch)

# unpack ground truth calibration input (adsorption site 1)
p01Const, e1Const = torch.chunk(cal_inputs, chunks = 2, dim = 1)
p01Const = p01Const.repeat(1, num_vars).t()
e1Const = e1Const.repeat(1, num_vars).t()

# specify adsorption site two parameters
p02Const, e2Const = torch.tensor(5000.0), torch.tensor(-22000.0)
p02Const, e2Const, lambda1Const, lambda2Const = torch.tensor(5000.0), torch.tensor(-22000.0), torch.tensor(1.0), torch.tensor(0.5)
p02Const = p02Const.repeat(1, num_vars).t()
e2Const = e2Const.repeat(1, num_vars).t()

# compute equilibrium constant of site one
k1Const = 1.0/p01Const * torch.exp(-e1Const / self.RConst / T)

# compute equilibrium constant of site two
k2Const = 1.0/p02Const * torch.exp(-e2Const/ self.RConst/T)
k2Const = 1.0/p02Const * torch.exp(-e2Const / self.RConst / T)

# compute surface coverage fraction for two adsorption sites with different equilibrium conditions
cov_frac = k1Const*P/(1 + k1Const*P) + k2Const*P/(1 + k2Const*P)
cov_frac = lambda1Const * (k1Const*P/(1 + k1Const*P)) + lambda2Const * (k2Const*P/(1 + k2Const*P))

# Return
return cov_frac
Expand All @@ -73,7 +82,7 @@ def genDataFile(self, dataFileNamePrefix='observations', use_true_model=True, st
if(use_true_model):
def_out = self.solve_true(self.defParams)
else:
def_out = self.solve_lf(self.defParams)
def_out = self.solve_t(self.defParams)

# get standard deviation
stdDev = self.stdRatio * torch.abs(def_out)
Expand All @@ -82,7 +91,8 @@ def genDataFile(self, dataFileNamePrefix='observations', use_true_model=True, st
coverage = def_out.repeat(1,num_observations)

for loopA in range(num_observations):
coverage[:,loopA] = coverage[:,loopA] + (stdDev * torch.normal(torch.tensor(0.0), torch.tensor(1.0),size=(len(coverage),1))).flatten()
noise = torch.randn((len(coverage),1))*stdDev
coverage[:,loopA] = coverage[:,loopA] + noise.flatten()

if store:

Expand Down
4 changes: 3 additions & 1 deletion linfa/plot_res.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def plot_log(log_file,out_dir,fig_format='png',use_dark_mode=False):

# loss profile
plt.figure(figsize=(2,2))
plt.semilogy(log_data[:,1],log_data[:,2],'b-')
# plt.semilogy(log_data[:,1],log_data[:,2],'b-')
plt.plot(log_data[:,1],log_data[:,2],'b-')
plt.xlabel('Iterations',fontsize=fs)
plt.ylabel('log loss',fontsize=fs)
plt.gca().tick_params(axis='both', labelsize=fs)
Expand All @@ -41,6 +42,7 @@ def plot_params(param_data,LL_data,idx1,idx2,out_dir,out_info,fig_format='png',u
# Plot figure
plt.figure(figsize=(3,2))
plt.scatter(param_data[:,idx1],param_data[:,idx2],s=1.5,lw=0,marker='o',c=np.exp(dent_data))
# plt.scatter(param_data[:,idx1],param_data[:,idx2],s=1.5,lw=0,marker='o')
plt.colorbar()
plt.gca().xaxis.set_major_formatter(mtick.FormatStrFormatter('%.1f'))
plt.gca().yaxis.set_major_formatter(mtick.FormatStrFormatter('%.1e'))
Expand Down
56 changes: 46 additions & 10 deletions linfa/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def run(self):
if (self.surrogate_type == 'surrogate'):
not_found = not os.path.exists(self.name + ".sur") or not os.path.exists(self.name + ".npz")
elif(self.surrogate_type == 'discrepancy'):
not_found = not os.path.exists(self.name + ".sur")
# !!! Temprary - CHECK!!!
not_found = False
# not_found = not os.path.exists(self.name + ".sur")
else:
print('Invalid type of surrogate model')
exit(-1)
Expand All @@ -107,7 +109,7 @@ def run(self):
# Save a copy of the data in the result folder so it is handy
if hasattr(self.model,'data'):
# np.savetxt(self.output_dir + '/' + self.name + '_data', self.model.data, newline="\n")
np.save(self.output_dir + '/' + self.name + '_data', self.model.data)
np.savetxt(self.output_dir + '/' + self.name + '_data', self.model.data)

# setup device
torch.manual_seed(self.seed)
Expand Down Expand Up @@ -224,7 +226,7 @@ def train(self, nf, optimizer, iteration, log, sampling=True, t=1):
# Save log profile
np.savetxt(self.output_dir + '/' + self.log_file, np.array(log), newline="\n")

# Save transformed samples
# Save normalized domain samples
np.savetxt(self.output_dir + '/' + self.name + '_samples_' + str(iteration), xkk.data.clone().cpu().numpy(), newline="\n")

# Save samples in the original space
Expand All @@ -243,18 +245,43 @@ def train(self, nf, optimizer, iteration, log, sampling=True, t=1):

# Save model outputs at the samples - If a model is defined
if self.transform:
stds = torch.abs(self.model.defOut).to(self.device) * self.model.stdRatio
o00 = torch.randn(x00.size(0), self.model.data.shape[0]).to(self.device)
noise = o00*stds.repeat(o00.size(0),1)
if(self.surrogate_type == 'surrogate'):
# Define noise when we use NoFAS
stds = torch.abs(self.model.defOut).to(self.device) * self.model.stdRatio
o00 = torch.randn(x00.size(0), self.model.data.shape[0]).to(self.device)
noise = o00*stds.repeat(o00.size(0),1)
# Compute outputs
if self.surrogate:
np.savetxt(self.output_dir + '/' + self.name + '_outputs_' + str(iteration), (self.surrogate.forward(xkk) + noise).data.cpu().numpy(), newline="\n")
else:
np.savetxt(self.output_dir + '/' + self.name + '_outputs_' + str(iteration), (self.model.solve_t(self.transform.forward(xkk)) + noise).data.cpu().numpy(), newline="\n")
elif(self.surrogate_type == 'discrepancy'):
# Define noise when we use NoFAS
stds = torch.abs(self.model.defOut).to(self.device) * self.model.stdRatio
# Noise is rows: number of T,P pairs, columns: number of batches
o00 = torch.randn(self.model.data.shape[0], x00.size(0)).to(self.device)
noise = o00*stds.repeat(1,x00.size(0))
# Print lf outputs
model_out = self.model.solve_t(self.transform.forward(xkk))
np.savetxt(self.output_dir + '/' + self.name + '_outputs_lf_' + str(iteration), model_out.data.cpu().numpy(), newline="\n")
# LF model, plus dicrepancy, plus noise
model_out = self.model.solve_t(self.transform.forward(xkk)) + self.surrogate.forward(self.model.var_in) + noise
np.savetxt(self.output_dir + '/' + self.name + '_outputs_' + str(iteration), model_out.data.cpu().numpy(), newline="\n")
if(self.surrogate is None):
# This need to have as many rows as T,P
# and as many columns as batches
model_out_noise = model_out + noise
np.savetxt(self.output_dir + '/' + self.name + '_outputs_lf+noise_' + str(iteration), model_out_noise.data.cpu().numpy(), newline="\n")
else:
discr_out = self.surrogate.forward(self.model.var_in)
# CHECK COMPATIBILITY !!!
model_out_lf_discr = model_out + discr_out
model_out_lf_discr_noise = model_out + discr_out + noise
# Save model outputs
# For discrepancy we have
# Rows: number of variable pairs
# Columns: number of batches
np.savetxt(self.output_dir + '/' + self.name + '_outputs_discr_' + str(iteration), discr_out.data.cpu().numpy(), newline="\n")
np.savetxt(self.output_dir + '/' + self.name + '_outputs_lf+discr_' + str(iteration), model_out_lf_discr.data.cpu().numpy(), newline="\n")
np.savetxt(self.output_dir + '/' + self.name + '_outputs_lf+discr+noise_' + str(iteration), model_out_lf_discr_noise.data.cpu().numpy(), newline="\n")
else:
print('Invalid type of surrogate model')
exit(-1)
Expand All @@ -276,8 +303,17 @@ def train(self, nf, optimizer, iteration, log, sampling=True, t=1):
print('Invalid type of surrogate model')
exit(-1)
if(go_on):
xk0 = xk[:self.true_data_num, :].data.clone()
self.surrogate.update(xk0, max_iters=self.surr_upd_it)
if(self.surrogate_type == 'surrogate'):
xk0 = xk[:self.true_data_num, :].data.clone()
self.surrogate.update(xk0, max_iters=self.surr_upd_it)
elif(self.surrogate_type == 'discrepancy'):
xk0 = xk.data.clone()
self.surrogate.update(xk0, max_iters=self.surr_upd_it, reg=False, reg_penalty=0.0001)
else:
print('Invalid type of surrogate model')
exit(-1)
# Update Surrogate Model


# Free energy bound
loss = (- torch.sum(sum_log_abs_det_jacobians, 1) - t * self.model_logdensity(xk)).mean()
Expand Down
41 changes: 33 additions & 8 deletions linfa/transform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
import math
import torch
import numpy as np
from functools import partial

# Identity
def id_fun(x):
return x
def id_jac(x):
return torch.zeros_like(x)

# Tanh
def tanh_fun(x,m1,t1,m2,t2):
return torch.tanh((x - m1) / t1 * 3.0) * t2 + m2
def tanh_jac(x,m1,t1,m2,t2):
return torch.log(1.0 - torch.tanh((x - m1) / t1 * 3.0) ** 2) + np.log(t2) + np.log(3.0) - np.log(t1)

# Linear
def lin_fun(x,a,b,c,d):
return (x - a) / (b - a) * (d - c) + c
def lin_jac(x,a,b,c,d):
return torch.tensor([np.log(d - c) - np.log(b - a)]).repeat(x.size(0), 1)

# Exponential
def exp_fun(x,a,b,c,d):
return torch.exp((x - a) / (b - a) * (np.log(d) - np.log(c)) + np.log(c))
def exp_jac(x,a,b,c,d):
return (x - a) / (b - a) * (np.log(d) - np.log(c)) + np.log(c) + np.log(np.log(d) - np.log(c)) - np.log(b - a)

class Transformation(torch.nn.Module):

Expand All @@ -11,21 +36,21 @@ def __init__(self, func_info=None):
self.n = len(func_info)
for func, a, b, c, d in func_info:
if func == "identity":
self.funcs.append(lambda x: x)
self.log_jacob.append(lambda x: torch.zeros_like(x))
self.funcs.append(id_fun)
self.log_jacob.append(id_jac)
elif func == "tanh":
m1 = (a + b) / 2
t1 = (b - a) / 2
m2 = (c + d) / 2
t2 = (d - c) / 2
self.funcs.append(lambda x: torch.tanh((x - m1) / (b - a) * 6.0) * t2 + m2)
self.log_jacob.append(lambda x: torch.log(1.0 - torch.tanh((x - m1) / (b - a) * 6.0) ** 2) + np.log(t2) + np.log(3.0) - np.log(t1))
self.funcs.append(partial(tanh_fun,m1=m1,t1=t1,m2=m2,t2=t2))
self.log_jacob.append(partial(tanh_jac,m1=m1,t1=t1,m2=m2,t2=t2))
elif func == "linear":
self.funcs.append(lambda x: (x - a) / (b - a) * (d - c) + c)
self.log_jacob.append(lambda x: torch.tensor([np.log(d - c) - np.log(b - a)]).repeat(x.size(0), 1))
self.funcs.append(partial(lin_fun,a=a,b=b,c=c,d=d))
self.log_jacob.append(partial(lin_jac,a=a,b=b,c=c,d=d))
elif func == "exp":
self.funcs.append(lambda x: torch.exp((x - a) / (b - a) * (np.log(d) - np.log(c)) + np.log(c)))
self.log_jacob.append(lambda x: (x - a) / (b - a) * (np.log(d) - np.log(c)) + np.log(c) + np.log(np.log(d) - np.log(c)) - np.log(b - a))
self.funcs.append(partial(exp_fun,a=a,b=b,c=c,d=d))
self.log_jacob.append(partial(exp_jac,a=a,b=b,c=c,d=d))

def forward(self, z):
"""
Expand Down

0 comments on commit 038415f

Please sign in to comment.