From faca92348cf1f2556af87316ae8e33c6b09ea23a Mon Sep 17 00:00:00 2001 From: eblondel Date: Thu, 2 May 2024 18:44:47 +0200 Subject: [PATCH] fix #384 --- DESCRIPTION | 4 +- R/executeWorkflowJob.R | 2 + R/geoflow_entity.R | 101 ++++++++++++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 19 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5ff0b3d2..4eff74ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: geoflow -Version: 0.20240419 -Date: 2024-04-19 +Version: 0.20240502 +Date: 2024-05-02 Title: Tools to Orchestrate Geospatial (Meta)Data Management Workflows and Manage FAIR Services Description: An engine to facilitate the orchestration and execution of metadata-driven data management workflows, in compliance with FAIR (Findable, Accessible, Interoperable and Reusable) data management principles. By means of a pivot metadata model, relying on the DublinCore standard (), diff --git a/R/executeWorkflowJob.R b/R/executeWorkflowJob.R index 5ebbcc0d..8799385f 100644 --- a/R/executeWorkflowJob.R +++ b/R/executeWorkflowJob.R @@ -135,6 +135,8 @@ executeWorkflowJob <- function(config, jobdir = NULL, queue = NULL, monitor = NU config$logger.info("SkipDataDownload is false: copying and fetching data...") #we copy data to job data dir (for data files) entity$copyDataToJobDir(config, jobdir) + #enrich with data types + entity$enrichWithDatatypes(config, jobdir) #vector data: we enrich entity with features #control is added in case of entity already enriched with features/coverages (when loaded from custom R entity handlers) if(!skipEnrichWithData) if(is.null(entity$data$features) && is.null(entity$data$coverages)){ diff --git a/R/geoflow_entity.R b/R/geoflow_entity.R index 20877ede..d78b2cb7 100644 --- a/R/geoflow_entity.R +++ b/R/geoflow_entity.R @@ -574,21 +574,15 @@ geoflow_entity <- R6Class("geoflow_entity", }, - #'@description This function will enrich the entity data objects with data features (vector data) or coverages (grid data). This method will overwrite - #' spatial metadata such as the bounding box (unless global option \code{skipDynamicBbox} is enabled). Note that the user spatial extent is not overwriten - #' since it may contain finer geometries than a bounding box. + #'@description Function that will scan zip data files and resolve data objects sourceType and uploadType #'@param config geoflow config object #'@param jobdir relative path of the job directory - enrichWithData = function(config, jobdir = NULL){ + enrichWithDatatypes = function(config, jobdir = NULL){ if(is.null(jobdir)) jobdir <- config$job wd <- getwd() setwd("./data") - skipDynamicBbox <- if(!is.null(config$profile$options$skipDynamicBbox)) config$profile$options$skipDynamicBbox else FALSE - enrichDataStrategy <- if(!is.null(config$profile$options$enrichDataStrategy)) config$profile$options$enrichDataStrategy else "first" - #TODO enrichDataSourceStrategy <- if(!is.null(config$profile$options$enrichDataSourceStrategy)) config$profile$options$enrichDataSourceStrategy else "first" - data_objects <- list() if(is.null(self$data$dir)){ data_objects <- list(self$data) @@ -596,15 +590,12 @@ geoflow_entity <- R6Class("geoflow_entity", data_objects <- self$data$getData() } - srid <- if(!is.null(self$srid)) self$srid else "" - data_srids <- c() - if(length(data_objects)>0){ data_objects <- lapply(1:length(data_objects), function(k){ - + data_object = data_objects[[k]] - + datasource <- data_object$source[[1]] #TODO we still look at first source datasource_name = NULL datasource_ext = NULL @@ -623,7 +614,7 @@ geoflow_entity <- R6Class("geoflow_entity", #setwd(wd) #return(NULL) } - + #in case of a datasource type requiring a file we check its presence #if absent we abort the function enrich With features types_without_file <- c("dbtable","dbview","dbquery") @@ -637,7 +628,7 @@ geoflow_entity <- R6Class("geoflow_entity", #basefilename basefilename <- datasource_name - + #inherit sourceType for source if(datasource_file_needed){ data_object$sourceType = switch(datasource_ext, @@ -646,7 +637,7 @@ geoflow_entity <- R6Class("geoflow_entity", basefilepath = file.path(getwd(), paste0(basefilename,".zip")) if(file.exists(basefilepath)){ #for srcType != "other" - #(re-zipped files on 'basefinename' with 'other' sourceType do not exist, + #(re-zipped files on 'basefilename' with 'other' sourceType do not exist, #but are just copied, not unzipped/rezipped with different name) zip_files = zip::zip_list(basefilepath) if(any(endsWith(zip_files$filename, ".gpkg"))){ @@ -670,7 +661,8 @@ geoflow_entity <- R6Class("geoflow_entity", ) #additional rule for uploadType if(datasource_ext == "zip") if(!is.null(data_object$uploadType)) if(data_object$uploadType == "other"){ - data_object$uploadType = data_object$sourceType + config$logger.info(sprintf("Zip data archived scanned, setting uploadType based on sourceType '%s'", data_object$sourceType)) + data_object$setUploadType(data_object$sourceType) if(data_object$uploadType == "geotiff") data_object$setSpatialRepresentationType("grid") } #overwrite top sourceType @@ -684,6 +676,81 @@ geoflow_entity <- R6Class("geoflow_entity", self$data$data[[k]]$setSpatialRepresentationType(data_object$spatialRepresentationType) } } + return(data_object) + }) + + if(is.null(self$data$dir)){ + self$data <- data_objects[[1]] + }else{ + self$data$data <- data_objects + } + } + setwd(self$getEntityJobDirPath(config, jobdir)) + }, + + #'@description This function will enrich the entity data objects with data features (vector data) or coverages (grid data). This method will overwrite + #' spatial metadata such as the bounding box (unless global option \code{skipDynamicBbox} is enabled). Note that the user spatial extent is not overwriten + #' since it may contain finer geometries than a bounding box. + #'@param config geoflow config object + #'@param jobdir relative path of the job directory + enrichWithData = function(config, jobdir = NULL){ + + if(is.null(jobdir)) jobdir <- config$job + wd <- getwd() + setwd("./data") + + skipDynamicBbox <- if(!is.null(config$profile$options$skipDynamicBbox)) config$profile$options$skipDynamicBbox else FALSE + enrichDataStrategy <- if(!is.null(config$profile$options$enrichDataStrategy)) config$profile$options$enrichDataStrategy else "first" + #TODO enrichDataSourceStrategy <- if(!is.null(config$profile$options$enrichDataSourceStrategy)) config$profile$options$enrichDataSourceStrategy else "first" + + data_objects <- list() + if(is.null(self$data$dir)){ + data_objects <- list(self$data) + }else{ + data_objects <- self$data$getData() + } + + srid <- if(!is.null(self$srid)) self$srid else "" + data_srids <- c() + + if(length(data_objects)>0){ + + data_objects <- lapply(1:length(data_objects), function(k){ + + data_object = data_objects[[k]] + + datasource <- data_object$source[[1]] #TODO we still look at first source + datasource_name = NULL + datasource_ext = NULL + datasource_file = NULL + if(!is.null(datasource)){ + datasource_parts <- unlist(strsplit(datasource, "\\.(?=[^\\.]+$)", perl=TRUE)) + datasource_name <- datasource_parts[1] + datasource_ext <- datasource_parts[2] + datasource_file <- attr(datasource, "uri") + attributes(datasource) <- NULL + if(is.null(datasource_file)) datasource_file <- datasource + } + + if(data_object$sourceType == "other"){ + config$logger.warn("Metadata dynamic handling based on 'data' not implemented for source type 'other'") + #setwd(wd) + #return(NULL) + } + + #in case of a datasource type requiring a file we check its presence + #if absent we abort the function enrich With features + types_without_file <- c("dbtable","dbview","dbquery") + datasource_file_needed <- !(data_object$sourceType %in% types_without_file) + if(datasource_file_needed && is.null(datasource_file)){ + warnMsg <- sprintf("No source file/URL for datasource '%s'. Data source copying aborted!", datasource_name) + config$logger.warn(warnMsg) + #setwd(wd) + #return(NULL) + } + + #basefilename + basefilename <- datasource_name #encoding mappings st_encoding <- switch(options("encoding")[[1]],