From 10c792f1c554624ba706693d2f039f6c54d9f0f1 Mon Sep 17 00:00:00 2001 From: eblondel Date: Tue, 11 Jun 2024 13:49:40 +0200 Subject: [PATCH] support #390 --- R/geoflow_action.R | 12 +- R/geoflow_software.R | 17 + inst/actions/metadataeditr_create_project.R | 347 ++++++++++++++++++ ...config_metadata_gsheets_metadataeditr.json | 41 +++ ...st_config_metadata_gsheets_metadataeditr.R | 42 +++ 5 files changed, 458 insertions(+), 1 deletion(-) create mode 100644 inst/actions/metadataeditr_create_project.R create mode 100644 inst/extdata/workflows/config_metadata_gsheets_metadataeditr.json create mode 100644 tests/testthat/test_config_metadata_gsheets_metadataeditr.R diff --git a/R/geoflow_action.R b/R/geoflow_action.R index ccd0b09a..0159b083 100644 --- a/R/geoflow_action.R +++ b/R/geoflow_action.R @@ -519,7 +519,17 @@ register_actions <- function(){ output_format = list(def = "output format generate by Rmarkdown template (e.g. 'html','pdf')", class = "character",choices = list("html","pdf","word","odt","rtf","md","github"), add_choices = FALSE, multiple = FALSE, default = "html") ), fun = source(system.file("actions", "rmarkdown_create_metadata.R", package = "geoflow"))$value - ) + ), + geoflow_action$new( + id="metadataeditr-create-project", + types = list("Metadata publication"), + def = "Create and publish a geospatial project in the World bank metadata editor", + target = "entity", + target_dir = "metadata", + packages = list("metadataeditr"), + available_options = list(), + fun = source(system.file("actions", "metadataeditr_create_project.R", package = "geoflow"))$value + ) ) .geoflow$actions <- objs } diff --git a/R/geoflow_software.R b/R/geoflow_software.R index 27bac50c..c8fe1a5d 100644 --- a/R/geoflow_software.R +++ b/R/geoflow_software.R @@ -690,6 +690,23 @@ register_software <- function(){ pwd = list(label = "Password", def = "Password for user authentication", class = "character"), logger = list(label = "Logger", def = "Level for 'geonode4R' logger messages (NULL,INFO or DEBUG)", class = "character", choices = c("INFO", "DEBUG")) ) + ), + #------------------------------------------------------------------------------------------------------- + #WORLDBANK METADATA EDITOR CLIENT + #------------------------------------------------------------------------------------------------------- + geoflow_software$new( + software_type = "metadataeditr", + definition = "World Bank metadata editor client powered by 'metadataeditr' package", + packages = list("metadataeditr"), + handler = try(metadataeditr:::set_api, silent = TRUE), + arguments = list( + api_url = list(label = "API URL", def = "Metadata editor API endpoint URL", class = "character"), + api_key = list(label = "API key", def = "An API user authorization key (to be generated in the Metadata editor)", class = "character"), + verbose = list(label = "verbose", def = "Whether messages should be displayed or not", class = "logical", default = FALSE) + ), + attributes = list( + collection_names = list(label = "Collection_names", def = "A coma-separated list of collection names where projects will be associated with", class = "character") + ) ) ) .geoflow$software <- software diff --git a/inst/actions/metadataeditr_create_project.R b/inst/actions/metadataeditr_create_project.R new file mode 100644 index 00000000..75ac3e09 --- /dev/null +++ b/inst/actions/metadataeditr_create_project.R @@ -0,0 +1,347 @@ +function(action, entity, config){ + + if(!requireNamespace("metadataeditr", quietly = TRUE)){ + stop("The 'metadataeditr-create-project' action requires the 'metadataeditr' package") + } + + #fetch software + MD_EDITOR = config$software$output$metadataeditr + if(is.null(MD_EDITOR)){ + stop("A 'metadataeditr' software must be configured to use this action") + } + MD_EDITOR_CONFIG = config$software$output$metadataeditr_config + collection_names = list() + if(!is.null(MD_EDITOR_CONFIG$properties$collection_names)){ + collection_names = as.list(strsplit(MD_EDITOR_CONFIG$properties$collection_names, ",")[[1]]) + } + + #basic function to map a geoflow_contact to a metadata editor contact + produce_md_contact = function(x){ + + md_contact = list() + + if(is.null(x$firstName)) x$firstName = NA + if(is.null(x$lastName)) x$lastName = NA + if(!is.na(x$firstName) && !is.na(x$lastName)) md_contact$individualName = paste(x$firstName, x$lastName) + if(!is.na(x$organizationName)) md_contact$organisationName = x$organizationName + if(!is.na(x$positionName)) md_contact$positionName = x$positionName + if(!is.na(x$role)) md_contact$role = x$role + + md_contact$contactInfo = list() + md_contact$contactInfo$address = list() + if(!is.na(x$email)) md_contact$contactInfo$address$electronicMailAddress = x$email + if(!is.na(x$postalAddress)) md_contact$contactInfo$address$deliveryPoint = x$postalAddress + if(!is.na(x$city)) md_contact$contactInfo$address$city = x$city + if(!is.na(x$postalCode)) md_contact$contactInfo$address$postalCode = x$postalCode + if(!is.na(x$country)) md_contact$contactInfo$address$country = x$country + md_contact$contactInfo$phone = list() + if(!is.na(x$voice)) md_contact$contactInfo$phone$voice = x$voice + if(!is.na(x$facsimile)) md_contact$contactInfo$phone$facsimile = x$facsimile + md_contact$contactInfo$onlineResource = list() + if(!is.na(x$websiteUrl)) md_contact$contactInfo$onlineResource$linkage = x$websiteUrl + if(!is.na(x$websiteName)) md_contact$contactInfo$onlineResource$name = x$websiteName + + return(md_contact) + } + + metadata_maintainers = entity$contacts[sapply(entity$contacts, function(x){tolower(x$role) %in% c("metadata")})] + producers = entity$contacts[sapply(entity$contacts, function(x){tolower(x$role) %in% c("owner","originator")})] + poc = entity$contacts[sapply(entity$contacts, function(x){!tolower(x$role) %in% c("metadata", "processor")})] + distributors = entity$contacts[sapply(entity$contacts, function(x){!tolower(x$role) %in% c("distributor")})] + + thumbnails = entity$relations[sapply(entity$relations, function(x){x$key == "thumbnail"})] + #thumbnail management + if(length(thumbnails)>0){ + dir.create("thumbnails") + + thumbnails = lapply(1:length(thumbnails), function(i){ + thumbnail = thumbnails[[i]] + if(startsWith(thumbnail$link, "http")){ + req_head = httr::HEAD(thumbnail$link) + if(httr::status_code(req_head) == 200){ + fileext = unlist(strsplit(httr::headers(req_head)[["content-type"]], "image/"))[2] + filename = paste0("thumbnail_", i, ".", fileext) + download.file(thumbnail$link, destfile = file.path(getwd(), "thumbnails", filename), mode = "wb") + thumbnail$link = filename + } + }else{ + file.copy(from = thumbnail$link, to = file.path(getwd(), "thumbnails", basename(thumbnail$link))) + thumbnail$link = basename(thumbnail$link) + } + return(thumbnail) + }) + } + + project <- list() + + production_date = Sys.Date() + + #metadata_information + project$metadata_information = list( + title = entity$titles[["title"]], + producers = lapply(producers, function(x){ + contact = produce_md_contact(x) + name = contact$organisationName + if(!is.null(contact$individualName)) name = contact$individualName + list(name = name) + }), + production_date = production_date, + version = entity$descriptions$edition + ) + + #description (~ ISO 19115) + #description/metadata + project$description = list( + idno = entity$identifiers[["id"]], + language = entity$language, + characterSet = list(codeListValue = "utf8"), + hierarchyLevel = entity$types[["generic"]], + contact = lapply(metadata_maintainers, produce_md_contact), + dateStamp = production_date, + metadataStandardName = "ISO 19115:2003/19139" + ) + + #description/spatialRepresentationInfo + project$description$spatialRepresentationInfo = list() + #spatial representation + if(!is.null(entity$data)) { + spatialRepresentationType <- entity$data$spatialRepresentationType + if(!is.null(spatialRepresentationType)){ + if(spatialRepresentationType=="vector"){ + features = entity$data$features + if(!is.null(features)){ + #support vector spatial representation + if(is(features, "sf")){ + geomtypes <- as.list(table(sf::st_geometry_type(features))) + geomtypes <- geomtypes[geomtypes > 0] + if(length(geomtypes)>0){ + #spatialRepresentationType <- "vector" + for(geomtype in names(geomtypes)){ + vsr = list() + geomLevel <- "geometryOnly" + if(geomtype == "TIN") geomLevel = "planarGraph" + if(geomLevel == "geometryOnly"){ + isoGeomType <- switch(geomtype, + "GEOMETRY" = "composite", "GEOMETRYCOLLECTION" = "composite", + "POINT" = "point", "MULTIPOINT" = "point", + "LINESTRING" = "curve", "CIRCULARSTRING" = "curve", "MULTILINESTRING" = "curve", "CURVE" = "curve", "COMPOUNDCURVE" = "curve", + "POLYGON" = "surface", "MULTIPOLYGON" = "surface", "TRIANGLE" = "surface", + "CURVEPOLYGON" = "surface", "SURFACE" = "surface", "MULTISURFACE" = "surface", + "POLYHEDRALSURFACE" = "solid" + ) + + vsr = list( + topologyLevel = geomLevel, + geometricObjects = list( + list( + geometricObjectType = isoGeomType, + geometricObjectCount = nrow(features[sf::st_geometry_type(features)==geomtype,]) + ) + ) + ) + } + project$description$spatialRepresentationInfo[[1]] = list( + vectorSpatialRepresentation = vsr + ) + } + }else{ + spatialRepresentationType <- "textTable" + } + } + } + } + + if(spatialRepresentationType=="grid"){ + gsr = list() + gsr$numberOfDimensions = length(entity$data$dimensions) + for(dimension in names(entity$data$dimensions)){ + dimObject <- list() + dimObject$dimensionName = dimension + dimObject$dimensionSize = entity$data$dimensions[[dimension]]$size + resolution<-entity$data$dimensions[[dimension]]$resolution + if(!is.null(resolution$value)){ + dimObject$resolution = resolution$value + } + gsr$axisDimensionproperties[[length(gsr$axisDimensionproperties)+1]] = dimObject + } + gsr$cellGeometry = "area" + project$description$spatialRepresentationInfo = list( + gridSpatialRepresentation = gsr + ) + } + } + } + + #description/referenceSystemInfo + project$description$referenceSystemInfo = list() + if(!is.null(entity$srid)){ + project$description$referenceSystemInfo[[1]] = list( + code = as.character(entity$srid), + codeSpace = "EPSG" + ) + if(entity$srid == 4326){ + #we add also the WGS one + project$description$referenceSystemInfo[[2]] = list( + code = "WGS 84", + codeSpace = "World Geodetic System (WGS)" + ) + } + } + + #description/identificationInfo + project$description$identificationInfo = list( + citation = list( + title = entity$titles[["title"]], + alternateTitle = if(!is.null(entity$titles[["alternative"]])) entity$titles[["alternative"]] else "", + date = lapply(entity$dates, function(x){ + list(date = x$value, type = x$key) + }), + edition = entity$descriptions$edition, + editionDate = if(any(sapply(entity$dates, function(x){x$key == "edition"}))){ + entity$dates[sapply(entity$dates, function(x){x$key == "edition"})][[1]]$value + }else "", + identifier = list(authority = "WB-DECDG", code = entity$identifiers[["id"]]), + #otherCitationDetails = "" + citedResponsibleParty = lapply(producers, produce_md_contact) + ), + abstract = entity$descriptions$abstract, + purpose = if(!is.null(entity$descriptions$purpose)) entity$descriptions$purpose else "", + credit = if(!is.null(entity$descriptions$credit)) entity$descriptions$credit else "", + status = if(!is.null(entity$descriptions$status)) entity$descriptions$status else "", + pointOfContact = lapply(poc, produce_md_contact), + resourceMaintenance = list( + list(maintenanceOrUpdateFrequency = "asNeeded") + ), + graphicOverview= if(length(thumbnails)>0) lapply(1:length(thumbnails), function(i){ + thumbnail = thumbnails[[i]] + th = list(fileName = thumbnail$link) + if(!is.null(thumbnail$description)) th$fileDescription = thumbnail$description + return(th) + }) else list(), + resourceFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "resource"})], function(resourceFormat){ + rf = list(name = resourceFormat$name) + if(!is.null(resourceFormat$description)) rf$specification = resourceFormat$description + return(rf) + }), + descriptiveKeywords = do.call(c, lapply(entity$subjects[sapply(entity$subjects, function(x){return(x$key != "topic")})], function(subject){ + lapply(subject$keywords, function(kwd){ + out_kwd = list(type = subject$key, keyword = kwd$name) + if(!is.null(subject$name)) out_kwd$thesaurusName = subject$name + return(out_kwd) + }) + })), + resourceConstraints = list( + list( + legalConstraints = list( + useLimitation = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "uselimitation"})], function(cons){ + cons$values[[1]] + }), + accessConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "accessconstraint"})], function(cons){ + cons$values[[1]] + }), + useConstraints = lapply(entity$rights[sapply(entity$rights, function(x){tolower(x$key) == "useconstraint"})], function(cons){ + cons$values[[1]] + }) + ) + ) + ), + #resourceSpecificUsage + #aggregationInfo + extent = list( + geographicElement = list( + list( + geographicBoundingBox = list( + southBoundLatitude = entity$spatial_bbox$ymin, + westBoundLongitude = entity$spatial_bbox$xmin, + northBoundLatitude = entity$spatial_bbox$ymax, + eastBoundLongitude = entity$spatial_bbox$xmax + ) + ) + ), + temporalElement = list( + if(!is.null(entity$temporal_extent$instant)){ + list(timePosition = entity$temporal_extent$instant) + }else if(!is.null(entity$temporal_extent$start) & !is.null(entity$temporal_extent$end)){ + list(beginPosition = entity$temporal_extent$start, endPosition = entity$temporal_extent$end) + } + ) + ), + spatialRepresentationType = entity$data$spatialRepresentationType, + language = list(entity$language), + characterSet = list( + list(codeListValue = "utf8") + ), + supplementalInformation = if(!is.null(entity$descriptions$info)) entity$descriptions$info else "" + ) + + #description/distributionInfo + project$description$distributionInfo = list( + distributionFormat = lapply(entity$formats[sapply(entity$formats, function(x){x$key == "distribution"})], function(distFormat){ + df = list(name = distFormat$name) + if(!is.null(distFormat$description)) df$specification = distFormat$description + return(df) + }), + distributor = lapply(distributors, produce_md_contact) + ) + + #description/dataQualityInfo + project$description$dataQualityInfo = list() + if(!is.null(entity$provenance)){ + project$description$dataQualityInfo = list( + list( + lineage = list( + statement = entity$provenance$statement, + processStep = lapply(entity$provenance$processes, function(process){ + list( + description = process$description, + rationale = process$rationale, + processor = lapply(process$processors, produce_md_contact) + ) + }) + ) + ) + ) + } + + #description/metadataMaintenance + project$description$metadataMaintenance = list(maintenanceAndUpdateFrequency = "asNeeded") + + #description/contentInfo + #description/feature_catalogue (common to all metadata standards?) + + #creation + output = metadataeditr::create_project( + type = "geospatial", + idno = entity$identifiers[["id"]], + metadata = project, + collection_names = collection_names, + thumbnail = if(length(thumbnails)>0) file.path(getwd(), "thumbnails", thumbnails[[1]]$link) else NULL, #TODO + overwrite = TRUE + ) + + if(output$response$status == "success"){ + config$logger.info(sprintf("Project '%s' successfully submitted to metadata editor", entity$identifiers$id)) + } + + #add resources + #first remove existing resources + reslist = metadataeditr::resources_list(entity$identifiers[["id"]]) + if(reslist$status_code==200){ + existing_resources = reslist$response$resources + if(length(existing_resources)>0) for(i in 1:nrow(existing_resources)){ + metadataeditr::resources_delete(idno = entity$identifiers[["id"]], resource_id = existing_resources[i,]$id) + } + } + + #thumbnails + if(length(thumbnails)>0){ + for(thumbnail in thumbnails){ + metadataeditr::resources_add( + idno = entity$identifiers[["id"]], + dctype = "pic", + title = thumbnail$name, + file_path = file.path(getwd(), "thumbnails", thumbnail$link) + ) + } + } +} \ No newline at end of file diff --git a/inst/extdata/workflows/config_metadata_gsheets_metadataeditr.json b/inst/extdata/workflows/config_metadata_gsheets_metadataeditr.json new file mode 100644 index 00000000..376c3fe2 --- /dev/null +++ b/inst/extdata/workflows/config_metadata_gsheets_metadataeditr.json @@ -0,0 +1,41 @@ +{ + "profile": { + "id": "my-workflow", + "name": "My workflow", + "project": "Test geoflow project", + "organization": "My organization", + "mode": "entity" + }, + "metadata": { + "entities": [ + { + "handler": "gsheet", + "source": "https://docs.google.com/spreadsheets/d/1KSc_IrM86GrNpUASCIVfq6L9cmML77iwpcSbliynYZk/edit?gid=1962881097#gid=1962881097" + } + ], + "contacts" : [ + { + "handler": "gsheet", + "source": "https://docs.google.com/spreadsheets/d/1BqlXwA2fKiRuozNAQhBb_PbQVSPTCfl8_Q9rfM8E2ws/edit?usp=sharing" + } + ] + }, + "software": [ + { + "id": "wb-metadataeditr", + "type": "output", + "software_type": "metadataeditr", + "parameters": { + "api_url": "{{ WB_METADATAEDITOR_API_URL }}", + "api_key": "{{ WB_METADATAEDITOR_API_KEY }}", + "verbose": true + } + } + ], + "actions": [ + { + "id": "metadataeditr-create-project", + "run": true + } + ] +} diff --git a/tests/testthat/test_config_metadata_gsheets_metadataeditr.R b/tests/testthat/test_config_metadata_gsheets_metadataeditr.R new file mode 100644 index 00000000..47c6bccb --- /dev/null +++ b/tests/testthat/test_config_metadata_gsheets_metadataeditr.R @@ -0,0 +1,42 @@ +# test_config_metadata_gsheets.R +# Author: Emmanuel Blondel +# +# Description: Integration tests for config_metadata_gsheets.json workflow +#======================= +require(geoflow, quietly = TRUE) +require(testthat) + +cfg_file = system.file("extdata/workflows/config_metadata_gsheets_metadataeditr.json", package = "geoflow") + +#init +test_that("init",{ + CFG <- geoflow::initWorkflow(cfg_file) + expect_is(CFG$metadata$content, "list") + expect_equal(length(CFG$metadata$content), 2L) + expect_equal(names(CFG$metadata$content), c("contacts", "entities")) + expect_equal(length(CFG$metadata$content$contacts), 4L) + expect_equal(length(CFG$getContacts()), 4L) + expect_equal(length(CFG$metadata$content$entities), 2L) + expect_equal(length(CFG$getEntities()), 2L) + expect_equal(length(CFG$actions), 0L) + expect_equal(length(CFG$software), 2L) + expect_equal(names(CFG$software), c("input", "output")) + expect_equal(length(CFG$software$input), 0L) + expect_equal(length(CFG$software$output), 0L) +}) + +#debug +test_that("debug",{ + DEBUG <- geoflow::debugWorkflow(cfg_file) + expect_equal(names(DEBUG), c("config", "entity")) + expect_is(DEBUG$config, "list") + expect_is(DEBUG$entity, "geoflow_entity") + expect_equal(DEBUG$entity$identifiers[["id"]], "my-geoflow-record") + +}) + +#execute +test_that("execute",{ + EXEC <- geoflow::executeWorkflow(cfg_file, dir = ".") + expect_true(dir.exists(EXEC)) +})