Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
Maximilian Pichler authored and Maximilian Pichler committed Mar 30, 2020
1 parent d36f3eb commit f6f2625
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 84 deletions.
4 changes: 4 additions & 0 deletions sjSDM/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ S3method(summary,sjSDM_DNN)
S3method(summary,sjSDM_cv)
S3method(summary,sjSDM_model)
export("%>%")
export(DNN)
export(add_legend)
export(add_species_arrows)
export(biotic_struct)
export(compile)
export(curve_text)
export(fit)
Expand All @@ -46,6 +48,7 @@ export(gpuInfo)
export(install_sjSDM)
export(is_torch_available)
export(layer_dense)
export(linear)
export(optimizer_RMSprop)
export(optimizer_adamax)
export(plotAssociations)
Expand All @@ -55,6 +58,7 @@ export(sjSDM)
export(sjSDM_DNN)
export(sjSDM_cv)
export(sjSDM_model)
export(spatialXY)
export(useCPU)
export(useGPU)
importFrom(magrittr,"%>%")
Expand Down
125 changes: 125 additions & 0 deletions sjSDM/R/sjSDM_configs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#' linear
#'
#' create linear env covariates
#' @param data matrix of environmental predictors
#' @param formula formula object for predictors
#' @param lambda lambda penality
#' @param alpha weighting between LASSO and ridge
#' @export
linear = function(data = NULL, formula = NULL, lambda = 0.0, alpha = 0.5) {
if(is.data.frame(data)) {

if(!is.null(formula)){
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = stats::model.matrix(formula, data)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula, data)
}

} else {

if(!is.null(formula)) {
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = data.frame(data)
X = stats::model.matrix(formula, X)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula,data.frame(data))
}
}
out = list()
out$formula = formula
out$X = X
out$l1_coef = (1-alpha)*lambda
out$l2_coef = alpha*lambda
class(out) = "envLinear"
return(out)
}

#' env DNN
#'
#' create deep neural network
#' @param data matrix of environmental predictors
#' @param formula formula object for predictors
#' @param hidden hidden units in layers, length of hidden correspond to number of layers
#' @param activation activation functions for layer, must be of same length as hidden
#' @param lambda lambda penality
#' @param alpha weighting between LASSO and ridge
#' @export
DNN = function(data = NULL, formula = NULL, hidden = c(10L, 10L, 10L), activation = "relu", lambda = 0.0, alpha = 0.5) {
if(is.data.frame(data)) {

if(!is.null(formula)){
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = stats::model.matrix(formula, data)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula, data)
}

} else {

if(!is.null(formula)) {
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = data.frame(data)
X = stats::model.matrix(formula, X)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula,data.frame(data))
}
}
out = list()
out$formula = formula
out$X = X
out$l1_coef = (1-alpha)*lambda
out$l2_coef = alpha*lambda
out$hidden = as.integer(hidden)
if(length(activation) != activation) activation = rep(activation, length(hidden))
out$activation = activation
class(out) = "envDNN"
return(out)
}

#' biotic_struct
#'
#' define biotic interaction structur
#' @param df degree of freedom for covariance parametrization, if \code{NULL} df is set to \code{ncol(Y)/2}
#' @param link link function, probit, logit or linear
#' @param lambda lambda penality
#' @param alpha weighting between LASSO and ridge
#' @param on_diag regularization on diagonals
#' @export
biotic_struct = function(df = NULL, lambda = 0.0, alpha = 0.5, on_diag = TRUE) {
out = list()
out$l1_cov = (1-alpha)*lambda
out$l2_cov = alpha*lambda
if(!is.null(df)) out$df = as.integer(df)
out$on_diag = on_diag
class(out) = "biotic"
return(out)
}



#' spatial
#'
#' define spatial structure, not yet supported
#' @export
spatialXY = function() {
out = list()
class(out) = "spatialXY"
return(out)
}




121 changes: 56 additions & 65 deletions sjSDM/R/sjSDM_linear.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,10 @@
#'
#' @description fast and accurate joint species model
#'
#' @param X matrix of environmental predictors
#' @param Y matrix of species occurences/responses
#' @param formula formula object for predictors
#' @param df degree of freedom for covariance parametrization, if \code{NULL} df is set to \code{ncol(Y)/2}
#' @param l1_coefs strength of lasso regularization on environmental coefficients: \code{l1_coefs * sum(abs(coefs))}
#' @param l2_coefs strength of ridge regularization on environmental coefficients: \code{l2_coefs * sum(coefs^2)}`
#' @param l1_cov strength of lasso regulIarization on covariances in species-species association matrix
#' @param l2_cov strength of ridge regularization on covariances in species-species association matrix
#' @param on_diag regularization on diagonals in association matrix or not, default TRUE
#' @param env matrix of environmental predictors, object of type \code{\link{linear}} or \code{\link{DNN}}
#' @param biotic defines biotic (species-species associations) structure, object of type \code{\link{biotic_struct}}
#' @param spatial defines spatial structure, object of type \code{\link{spatialXY}}
#' @param iter number of fitting iterations
#' @param step_size batch size for stochastic gradient descent, if \code{NULL} then step_size is set to: \code{step_size = 0.1*nrow(X)}
#' @param learning_rate learning rate for Adamax optimizer
Expand All @@ -36,106 +31,102 @@
#' @seealso \code{\link{print.sjSDM}}, \code{\link{predict.sjSDM}}, \code{\link{coef.sjSDM}}, \code{\link{summary.sjSDM}}, \code{\link{getCov}}, \code{\link{simulate.sjSDM}}, \code{\link{getSe}}
#' @author Maximilian Pichler
#' @export
sjSDM = function(X = NULL, Y = NULL, formula = NULL, df = NULL, l1_coefs = 0.0, l2_coefs = 0.0,
l1_cov = 0.0, l2_cov = 0.0,on_diag = TRUE, iter = 50L, step_size = NULL,learning_rate = 0.01, se = FALSE, link = c("probit", "logit", "linear"),sampling = 100L,
parallel = 0L, device = "cpu", dtype = "float32") {
sjSDM = function(Y = NULL,
env = NULL,
biotic = biotic_struct(),
spatial = NULL,
iter = 50L,
step_size = NULL,
learning_rate = 0.01,
se = FALSE,
link = c("probit", "logit", "linear"),
sampling = 100L,
parallel = 0L,
device = "cpu",
dtype = "float32") {
stopifnot(
is.matrix(X) || is.data.frame(X),
is.matrix(Y),
df > 0,
l1_coefs >= 0,
l2_coefs >= 0,
l1_cov >= 0,
l2_cov >= 0,
!is.null(Y),
iter >= 0,
learning_rate >= 0
)

if(reticulate::py_is_null_xptr(fa)) .onLoad()
check_modul()

out = list()

if(is.numeric(device)) device = as.integer(device)

if(device == "gpu") device = 0L

if(is.data.frame(X)) {

if(!is.null(formula)){
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = stats::model.matrix(formula, X)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula, X)
}

} else {

if(!is.null(formula)) {
mf = match.call()
m = match("formula", names(mf))
formula = stats::as.formula(mf[m]$formula)
X = data.frame(X)
X = stats::model.matrix(formula, X)
} else {
formula = stats::as.formula("~.")
X = stats::model.matrix(formula,data.frame(X))
}
}
if(is.matrix(env)) env = linear(data = env)

link = match.arg(link)


out$formula = formula
out$names = colnames(X)
out$formula = env$formula
out$names = colnames(env$X)
out$species = colnames(Y)
out$cl = match.call()
link = match.arg(link)



### settings ##
if(is.null(df)) df = as.integer(floor(ncol(Y) / 2))
if(is.null(biotic$df)) biotic$df = as.integer(floor(ncol(Y) / 2))
if(is.null(step_size)) step_size = as.integer(floor(nrow(X) * 0.1))
else step_size = as.integer(step_size)

#.onLoad()

# if(any(sapply(out$names, function(n) stringr::str_detect(stringr::str_to_lower(n), "intercept")))) intercept = FALSE
output = ncol(Y)
input = ncol(X)
input = ncol(env$X)

out$get_model = function(){
model = fa$Model_base(input, device = device, dtype = dtype)

if(inherits(env, "envDNN")) {
for(i in 1:length(env$hidden))
model$add_layer(fa$layers$Layer_dense(hidden = env$hidden[i],
bias = FALSE,
l1 = env$l1_coef,
l2 = env$l2_coef,
activation = env$activation[i],
device = device,
dtype = dtype))
}
model$add_layer(fa$layers$Layer_dense(hidden = output,
bias = FALSE,
l1 = l1_coefs,
l2 = l2_coefs,
l1 = env$l1_coef,
l2 = env$l2_coef,
activation = NULL,
device = device,
dtype = dtype))
model$build(df = df, l1 = l1_cov, l2 = l2_cov, reg_on_Diag = on_diag,optimizer = fa$optimizer_adamax(lr = learning_rate, weight_decay = 0.01), link = link)


model$build(df = biotic$df,
l1 = biotic$l1_cov,
l2 = biotic$l2_cov,
reg_on_Diag = biotic$on_diag,
optimizer = fa$optimizer_adamax(lr = learning_rate, weight_decay = 0.01),
link = link)
return(model)
}
model = out$get_model()

time = system.time({model$fit(X, Y, batch_size = step_size, epochs = as.integer(iter), parallel = parallel, sampling = as.integer(sampling))})[3]
time = system.time({model$fit(env$X, Y, batch_size = step_size, epochs = as.integer(iter), parallel = parallel, sampling = as.integer(sampling))})[3]

out$logLik = model$logLik(X, Y,batch_size = step_size,parallel = parallel)
if(se) try({ out$se = t(abind::abind(model$se(X, Y, batch_size = step_size, parallel = parallel),along=0L)) })
out$logLik = model$logLik(env$X, Y,batch_size = step_size,parallel = parallel)
if(se && !inherits(env, "envDNN")) try({ out$se = t(abind::abind(model$se(X, Y, batch_size = step_size, parallel = parallel),along=0L)) })

out$model = model
out$settings = list( df = df, l1_coefs = l1_coefs, l2_coefs = l2_coefs,
l1_cov = l1_cov, l2_cov = l2_cov, iter = iter,
step_size = step_size,learning_rate = learning_rate,
parallel = parallel,device = device, dtype = dtype)
out$settings = list(biotic = biotic, env = env, spatial = spatial,iter = iter,
step_size = step_size,learning_rate = learning_rate,
parallel = parallel,device = device, dtype = dtype)
out$time = time
out$data = list(X = X, Y = Y)
out$data = list(X = env$X, Y = Y)
out$sessionInfo = utils::sessionInfo()
out$weights = model$weights_numpy
out$sigma = model$get_sigma_numpy()
out$history = model$history
torch$cuda$empty_cache()
class(out) = "sjSDM"
if(inherits(env, "envLinear")) class(out) = "sjSDM"
else class(out) = "sjSDM_DNN"
return(out)
}

Expand Down
6 changes: 5 additions & 1 deletion sjSDM/R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,11 @@ is_linux = function() {
identical(tolower(Sys.info()[["sysname"]]), "linux")
}


#' check modul
#' check if modul is loaded
check_modul = function(){
if(reticulate::py_is_null_xptr(fa)) .onLoad()
}

#' @importFrom magrittr %>%
#' @export
Expand Down
6 changes: 6 additions & 0 deletions sjSDM/inst/python/sjSDM_py/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,12 @@ def build(self, df=None, optimizer=None, l1=0.0, l2=0.0,
if optimizer != None:
self.optimizer = optimizer(params = params)



# def fill_lower_tril(self, sigma):
# xc = torch.cat([sigma[self.r_dim:], sigma.flip(dims=[0])])
# y = xc.view(self.r_dim, self.r_dim)
# return torch.tril(y)
def fit(self, X=None, Y=None, batch_size=25, epochs=100, sampling=100, parallel=0):
"""fit model
Expand Down
25 changes: 25 additions & 0 deletions sjSDM/man/DNN.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f6f2625

Please sign in to comment.