init

#16 #15
TheoreticalEcology · Mar 30, 2020 · f6f2625 · f6f2625
1 parent d36f3eb
commit f6f2625
Show file tree

Hide file tree

Showing 11 changed files with 294 additions and 84 deletions.
diff --git a/sjSDM/NAMESPACE b/sjSDM/NAMESPACE
@@ -34,8 +34,10 @@ S3method(summary,sjSDM_DNN)
 S3method(summary,sjSDM_cv)
 S3method(summary,sjSDM_model)
 export("%>%")
+export(DNN)
 export(add_legend)
 export(add_species_arrows)
+export(biotic_struct)
 export(compile)
 export(curve_text)
 export(fit)
@@ -46,6 +48,7 @@ export(gpuInfo)
 export(install_sjSDM)
 export(is_torch_available)
 export(layer_dense)
+export(linear)
 export(optimizer_RMSprop)
 export(optimizer_adamax)
 export(plotAssociations)
@@ -55,6 +58,7 @@ export(sjSDM)
 export(sjSDM_DNN)
 export(sjSDM_cv)
 export(sjSDM_model)
+export(spatialXY)
 export(useCPU)
 export(useGPU)
 importFrom(magrittr,"%>%")

diff --git a/sjSDM/R/sjSDM_configs.R b/sjSDM/R/sjSDM_configs.R
@@ -0,0 +1,125 @@
+#' linear
+#' 
+#' create linear env covariates
+#' @param data matrix of environmental predictors
+#' @param formula formula object for predictors
+#' @param lambda lambda penality
+#' @param alpha weighting between LASSO and ridge
+#' @export
+linear = function(data = NULL, formula = NULL, lambda = 0.0, alpha = 0.5) {
+  if(is.data.frame(data)) {
+
+    if(!is.null(formula)){
+      mf = match.call()
+      m = match("formula", names(mf))
+      formula = stats::as.formula(mf[m]$formula)
+      X = stats::model.matrix(formula, data)
+    } else {
+      formula = stats::as.formula("~.")
+      X = stats::model.matrix(formula, data)
+    }
+
+  } else {
+
+    if(!is.null(formula)) {
+      mf = match.call()
+      m = match("formula", names(mf))
+      formula = stats::as.formula(mf[m]$formula)
+      X = data.frame(data)
+      X = stats::model.matrix(formula, X)
+    } else {
+      formula = stats::as.formula("~.")
+      X = stats::model.matrix(formula,data.frame(data))
+    }
+  }
+  out = list()
+  out$formula = formula
+  out$X = X
+  out$l1_coef = (1-alpha)*lambda
+  out$l2_coef = alpha*lambda
+  class(out) = "envLinear"
+  return(out)
+}
+
+#' env DNN
+#' 
+#' create deep neural network 
+#' @param data matrix of environmental predictors
+#' @param formula formula object for predictors
+#' @param hidden hidden units in layers, length of hidden correspond to number of layers
+#' @param activation activation functions for layer, must be of same length as hidden
+#' @param lambda lambda penality
+#' @param alpha weighting between LASSO and ridge
+#' @export
+DNN = function(data = NULL, formula = NULL, hidden = c(10L, 10L, 10L), activation = "relu",  lambda = 0.0, alpha = 0.5) {
+  if(is.data.frame(data)) {
+
+    if(!is.null(formula)){
+      mf = match.call()
+      m = match("formula", names(mf))
+      formula = stats::as.formula(mf[m]$formula)
+      X = stats::model.matrix(formula, data)
+    } else {
+      formula = stats::as.formula("~.")
+      X = stats::model.matrix(formula, data)
+    }
+
+  } else {
+
+    if(!is.null(formula)) {
+      mf = match.call()
+      m = match("formula", names(mf))
+      formula = stats::as.formula(mf[m]$formula)
+      X = data.frame(data)
+      X = stats::model.matrix(formula, X)
+    } else {
+      formula = stats::as.formula("~.")
+      X = stats::model.matrix(formula,data.frame(data))
+    }
+  }
+  out = list()
+  out$formula = formula
+  out$X = X
+  out$l1_coef = (1-alpha)*lambda
+  out$l2_coef = alpha*lambda
+  out$hidden = as.integer(hidden)
+  if(length(activation) != activation) activation = rep(activation, length(hidden))
+  out$activation = activation
+  class(out) = "envDNN"
+  return(out)
+}
+
+#' biotic_struct
+#' 
+#' define biotic interaction structur
+#' @param df degree of freedom for covariance parametrization, if \code{NULL} df is set to \code{ncol(Y)/2}
+#' @param link link function, probit, logit or linear
+#' @param lambda lambda penality
+#' @param alpha weighting between LASSO and ridge
+#' @param on_diag regularization on diagonals 
+#' @export
+biotic_struct = function(df = NULL, lambda = 0.0, alpha = 0.5, on_diag = TRUE) {
+  out = list()
+  out$l1_cov = (1-alpha)*lambda
+  out$l2_cov = alpha*lambda
+  if(!is.null(df)) out$df = as.integer(df)
+  out$on_diag = on_diag
+  class(out) = "biotic"
+  return(out)
+}
+
+
+
+#' spatial
+#' 
+#' define spatial structure, not yet supported
+#' @export
+spatialXY = function() {
+  out = list()
+  class(out) = "spatialXY"
+  return(out)
+}
+
+
+
+
diff --git a/sjSDM/R/sjSDM_linear.R b/sjSDM/R/sjSDM_linear.R
@@ -2,15 +2,10 @@
 #'
 #' @description fast and accurate joint species model
 #' 
-#' @param X matrix of environmental predictors
 #' @param Y matrix of species occurences/responses
-#' @param formula formula object for predictors
-#' @param df degree of freedom for covariance parametrization, if \code{NULL} df is set to \code{ncol(Y)/2}
-#' @param l1_coefs strength of lasso regularization on environmental coefficients: \code{l1_coefs * sum(abs(coefs))}
-#' @param l2_coefs strength of ridge regularization on environmental coefficients: \code{l2_coefs * sum(coefs^2)}`
-#' @param l1_cov strength of lasso regulIarization on covariances in species-species association matrix
-#' @param l2_cov strength of ridge regularization on covariances in species-species association matrix
-#' @param on_diag regularization on diagonals in association matrix or not, default TRUE
+#' @param env matrix of environmental predictors, object of type \code{\link{linear}} or \code{\link{DNN}}
+#' @param biotic defines biotic (species-species associations) structure, object of type \code{\link{biotic_struct}}
+#' @param spatial defines spatial structure, object of type \code{\link{spatialXY}}
 #' @param iter number of fitting iterations
 #' @param step_size batch size for stochastic gradient descent, if \code{NULL} then step_size is set to: \code{step_size = 0.1*nrow(X)}
 #' @param learning_rate learning rate for Adamax optimizer
@@ -36,106 +31,102 @@
 #' @seealso \code{\link{print.sjSDM}}, \code{\link{predict.sjSDM}}, \code{\link{coef.sjSDM}}, \code{\link{summary.sjSDM}}, \code{\link{getCov}}, \code{\link{simulate.sjSDM}}, \code{\link{getSe}}
 #' @author Maximilian Pichler
 #' @export
-sjSDM = function(X = NULL, Y = NULL, formula = NULL, df = NULL, l1_coefs = 0.0, l2_coefs = 0.0, 
-                 l1_cov = 0.0, l2_cov = 0.0,on_diag = TRUE, iter = 50L, step_size = NULL,learning_rate = 0.01, se = FALSE, link = c("probit", "logit", "linear"),sampling = 100L,
-                 parallel = 0L, device = "cpu", dtype = "float32") {
+sjSDM = function(Y = NULL, 
+                 env = NULL,
+                 biotic = biotic_struct(),
+                 spatial = NULL,
+                 iter = 50L, 
+                 step_size = NULL,
+                 learning_rate = 0.01, 
+                 se = FALSE, 
+                 link = c("probit", "logit", "linear"),
+                 sampling = 100L,
+                 parallel = 0L, 
+                 device = "cpu", 
+                 dtype = "float32") {
   stopifnot(
-    is.matrix(X) || is.data.frame(X),
-    is.matrix(Y),
-    df > 0,
-    l1_coefs >= 0,
-    l2_coefs >= 0,
-    l1_cov >= 0,
-    l2_cov >= 0,
+    !is.null(Y),
     iter >= 0,
     learning_rate >= 0
   )
 
-  if(reticulate::py_is_null_xptr(fa)) .onLoad()
+  check_modul()
 
   out = list()
 
   if(is.numeric(device)) device = as.integer(device)
 
   if(device == "gpu") device = 0L
 
-  if(is.data.frame(X)) {
-
-    if(!is.null(formula)){
-      mf = match.call()
-      m = match("formula", names(mf))
-      formula = stats::as.formula(mf[m]$formula)
-      X = stats::model.matrix(formula, X)
-    } else {
-      formula = stats::as.formula("~.")
-      X = stats::model.matrix(formula, X)
-    }
-
-  } else {
-
-    if(!is.null(formula)) {
-      mf = match.call()
-      m = match("formula", names(mf))
-      formula = stats::as.formula(mf[m]$formula)
-      X = data.frame(X)
-      X = stats::model.matrix(formula, X)
-    } else {
-      formula = stats::as.formula("~.")
-      X = stats::model.matrix(formula,data.frame(X))
-    }
-  }
+  if(is.matrix(env)) env = linear(data = env)
+
+  link = match.arg(link)
+
 
-  out$formula = formula
-  out$names = colnames(X)
+  out$formula = env$formula
+  out$names = colnames(env$X)
   out$species = colnames(Y)
   out$cl = match.call()
   link = match.arg(link)
-
-
 
   ### settings ##
-  if(is.null(df)) df = as.integer(floor(ncol(Y) / 2))
+  if(is.null(biotic$df)) biotic$df = as.integer(floor(ncol(Y) / 2))
   if(is.null(step_size)) step_size = as.integer(floor(nrow(X) * 0.1))
   else step_size = as.integer(step_size)
 
-  #.onLoad()
-
-  # if(any(sapply(out$names, function(n) stringr::str_detect(stringr::str_to_lower(n), "intercept")))) intercept = FALSE
   output = ncol(Y)
-  input = ncol(X)
+  input = ncol(env$X)
 
   out$get_model = function(){
     model = fa$Model_base(input, device = device, dtype = dtype)
+
+    if(inherits(env, "envDNN")) {
+      for(i in 1:length(env$hidden))
+        model$add_layer(fa$layers$Layer_dense(hidden = env$hidden[i],
+                                              bias = FALSE,
+                                              l1 = env$l1_coef,
+                                              l2 = env$l2_coef,
+                                              activation = env$activation[i],
+                                              device = device,
+                                              dtype = dtype))
+    } 
     model$add_layer(fa$layers$Layer_dense(hidden = output,
                                           bias = FALSE,
-                                          l1 = l1_coefs,
-                                          l2 = l2_coefs,
+                                          l1 = env$l1_coef,
+                                          l2 = env$l2_coef,
                                           activation = NULL,
                                           device = device,
                                           dtype = dtype))
-    model$build(df = df, l1 = l1_cov, l2 = l2_cov, reg_on_Diag = on_diag,optimizer = fa$optimizer_adamax(lr = learning_rate, weight_decay = 0.01), link = link)
+
+
+    model$build(df = biotic$df, 
+                l1 = biotic$l1_cov, 
+                l2 = biotic$l2_cov, 
+                reg_on_Diag = biotic$on_diag,
+                optimizer = fa$optimizer_adamax(lr = learning_rate, weight_decay = 0.01), 
+                link = link)
     return(model)
   }
   model = out$get_model()
 
-  time = system.time({model$fit(X, Y, batch_size = step_size, epochs = as.integer(iter), parallel = parallel, sampling = as.integer(sampling))})[3]
+  time = system.time({model$fit(env$X, Y, batch_size = step_size, epochs = as.integer(iter), parallel = parallel, sampling = as.integer(sampling))})[3]
 
-  out$logLik = model$logLik(X, Y,batch_size = step_size,parallel = parallel)
-  if(se) try({ out$se = t(abind::abind(model$se(X, Y, batch_size = step_size, parallel = parallel),along=0L)) })
+  out$logLik = model$logLik(env$X, Y,batch_size = step_size,parallel = parallel)
+  if(se && !inherits(env, "envDNN")) try({ out$se = t(abind::abind(model$se(X, Y, batch_size = step_size, parallel = parallel),along=0L)) })
 
   out$model = model
-  out$settings = list( df = df, l1_coefs = l1_coefs, l2_coefs = l2_coefs, 
-                       l1_cov = l1_cov, l2_cov = l2_cov, iter = iter, 
-                       step_size = step_size,learning_rate = learning_rate, 
-                       parallel = parallel,device = device, dtype = dtype)
+  out$settings = list(biotic = biotic, env = env, spatial = spatial,iter = iter, 
+                      step_size = step_size,learning_rate = learning_rate, 
+                      parallel = parallel,device = device, dtype = dtype)
   out$time = time
-  out$data = list(X = X, Y = Y)
+  out$data = list(X = env$X, Y = Y)
   out$sessionInfo = utils::sessionInfo()
   out$weights = model$weights_numpy
   out$sigma = model$get_sigma_numpy()
   out$history = model$history
   torch$cuda$empty_cache()
-  class(out) = "sjSDM"
+  if(inherits(env, "envLinear")) class(out) = "sjSDM"
+  else class(out) = "sjSDM_DNN"
   return(out)
 }
 

diff --git a/sjSDM/R/utils.R b/sjSDM/R/utils.R
@@ -78,7 +78,11 @@ is_linux = function() {
   identical(tolower(Sys.info()[["sysname"]]), "linux")
 }
 
-
+#' check modul
+#' check if modul is loaded
+check_modul = function(){
+  if(reticulate::py_is_null_xptr(fa)) .onLoad()
+}
 
 #' @importFrom magrittr %>%
 #' @export

diff --git a/sjSDM/inst/python/sjSDM_py/model.py b/sjSDM/inst/python/sjSDM_py/model.py
@@ -148,6 +148,12 @@ def build(self, df=None, optimizer=None, l1=0.0, l2=0.0,
         if optimizer != None:
             self.optimizer = optimizer(params = params)
 
+
+
+   # def fill_lower_tril(self, sigma):
+   #     xc = torch.cat([sigma[self.r_dim:], sigma.flip(dims=[0])])
+   #     y = xc.view(self.r_dim, self.r_dim)
+   #     return torch.tril(y)
     def fit(self, X=None, Y=None, batch_size=25, epochs=100, sampling=100, parallel=0):
         """fit model
 

diff --git a/sjSDM/man/DNN.Rd b/sjSDM/man/DNN.Rd