From 8758de614066ddc06fce2cdd7aa02d3896064a1e Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Thu, 2 Jun 2022 16:54:01 +0200 Subject: [PATCH 01/13] Start write_dwc() function Currently generates metadata only --- DESCRIPTION | 5 +- R/write_dwc.R | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 R/write_dwc.R diff --git a/DESCRIPTION b/DESCRIPTION index 51b2bb08..2548ded7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ Depends: R (>= 3.5.0) Imports: assertthat, + camtraptor, dplyr, frictionless, glue, @@ -50,6 +51,8 @@ Imports: rlang, stringr, tidyr +Remotes: + inbo/movepub Suggests: covr, knitr, @@ -61,5 +64,5 @@ Encoding: UTF-8 LazyData: true LazyDataCompression: bzip2 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.2.0 Config/testthat/edition: 3 diff --git a/R/write_dwc.R b/R/write_dwc.R new file mode 100644 index 00000000..5296e29e --- /dev/null +++ b/R/write_dwc.R @@ -0,0 +1,153 @@ +#' Transform camera trap data to Darwin Core +#' +#' Transforms a published [Camera Trap Data Package +#' (Camtrap DP)](https://github.com/tdwg/camtrap-dp) to Darwin Core CSV and EML +#' files that can be uploaded to a [GBIF IPT](https://www.gbif.org/ipt) for +#' publication. +#' A `meta.xml` file is not created. +#' +#' @param package A Camtrap DP, as read by [read_camtrap_dp()]. +#' @param directory Path to local directory to write files to. +#' @param doi DOI of the original dataset, used to get metadata. +#' @param contact Person to be set as resource contact and metadata provider. +#' To be provided as a `person()`. +#' @param rights_holder Acronym of the organization owning or managing the +#' rights over the data. +#' @return CSV (data) and EML (metadata) files written to disk. +#' @export +#' @section Metadata: +#' +#' Metadata are derived from the original dataset by looking up its `doi` in +#' DataCite ([example](https://doi.org/10.5281/zenodo.5590881)) and transforming +#' these to EML. +#' Uses `movepub::datacite_to_eml()` under the hood. +#' The following properties are set: +#' +#' - TO TEST **title**: Original title + `[subsampled representation]`. +#' - **description**: Automatically created first paragraph describing this is +#' a derived dataset, followed by the original dataset description. +#' - **creators**: Creators of the original dataset. +#' - **license**: License of the original dataset. +#' - **contact**: `contact` or first creator of the original dataset. +#' - **metadata provider**: `contact` or first creator of the original dataset. +#' - **keywords**: Keywords of the original dataset. +#' - **alternative identifier**: DOI of original dataset. This way, no new DOI +#' will be created when publishing to GBIF. +#' - TO TEST **external link** (and alternative identifier): URL of the Movebank study. +#' +#' To be set manually in the GBIF IPT: **type**, **subtype**, +#' **update frequency**, and **publishing organization**. +#' +#' Not set: geographic, taxonomic, temporal coverage, associated parties, +#' project data, sampling methods, and citations. Not applicable: collection +#' data. +#' +#' @section Data: +#' +#' `package` is expected to contain the resources `deployments`, `media` and +#' `observations`. +#' Their CSV data are loaded in to a SQLite database, +#' [transformed to Darwin Core using SQL](https://github.com/inbo/camtraptor/tree/main/inst/sql) +#' and written to disk as CSV file(s). +#' +#' Key features of the Darwin Core transformation: +#' - TODO +#' @examples +#' # TODO +write_dwc <- function(package, directory = ".", doi = package$id, + contact = NULL, rights_holder = NULL) { + # TODO: Hotfix to deal with 1 level deep metadata + package <- package$datapackage + + # Retrieve metadata from DataCite and build EML + assertthat::assert_that( + !is.null(doi), + msg = "No DOI found in `package$id`, provide one in `doi` parameter." + ) + message("Creating EML metadata.") + eml <- movepub::datacite_to_eml(doi) + + # Update title + title <- paste(eml$dataset$title, "[subsampled representation]") # Used in DwC + eml$dataset$title <- title + + # Update license + license_url <- eml$dataset$intellectualRights$rightsUri # Used in DwC + license_code <- eml$dataset$intellectualRights$rightsIdentifier + eml$dataset$intellectualRights <- NULL # Remove original license elements that make EML invalid + eml$dataset$intellectualRights$para <- license_code + + # Add extra paragraph to description + first_author <- eml$dataset$creator[[1]]$individualName$surName + pub_year <- substr(eml$dataset$pubDate, 1, 4) + doi_url <- eml$dataset$alternateIdentifier[[1]] # Used in DwC + first_para <- glue::glue( + # Add span to circumvent https://github.com/ropensci/EML/issues/342 + "This camera trap dataset is derived from ", + "{first_author} et al. ({pub_year}, {doi_url}). ", + "Data have been standardized to Darwin Core using the ", + "camtraptor R package ", + "and exclude observations of humans and absence records. ", + "The original dataset description follows.", + .null = "" + ) + eml$dataset$abstract$para <- purrr::prepend( + eml$dataset$abstract$para, + paste0("") + ) + + # Update contact and set metadata provider + if (!is.null(contact)) { + eml$dataset$contact <- EML::set_responsibleParty( + givenName = contact$given, + surName = contact$family, + electronicMailAddress = contact$email, + userId = if (!is.null(contact$comment[["ORCID"]])) { + list(directory = "http://orcid.org/", contact$comment[["ORCID"]]) + } else { + NULL + } + ) + } + eml$dataset$metadataProvider <- eml$dataset$contact + + # Read data from package + # message("Reading data from `package`.") + # assertthat::assert_that( + # c("reference-data") %in% frictionless::resources(package), + # msg = "`package` must contain resource `reference-data`." + # ) + # assertthat::assert_that( + # c("gps") %in% frictionless::resources(package), + # msg = "`package` must contain resource `gps`." + # ) + # ref <- frictionless::read_resource(package, "reference-data") + # gps <- frictionless::read_resource(package, "gps") + + # Create database + # message("Creating database and transforming to Darwin Core.") + # con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + # DBI::dbWriteTable(con, "reference_data", ref) + # DBI::dbWriteTable(con, "gps", gps) + + # Query database + # dwc_occurrence_sql <- glue::glue_sql( + # readr::read_file( + # system.file("sql/movebank_dwc_occurrence.sql", package = "movepub") + # ), + # .con = con + # ) + # dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql) + # DBI::dbDisconnect(con) + + # Write files + if (!dir.exists(directory)) { + dir.create(directory, recursive = TRUE) + } + EML::write_eml(eml, file.path(directory, "eml.xml")) + # readr::write_csv( + # dwc_occurrence, + # file.path(directory, "dwc_occurrence.csv"), + # na = "" + # ) +} From 50a1243e4f2177ddd8795fc592db8e49b3bfde18 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 08:52:16 +0200 Subject: [PATCH 02/13] Minor edits to get_species() --- R/get_species.R | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/R/get_species.R b/R/get_species.R index af1e9e8a..7f33d000 100644 --- a/R/get_species.R +++ b/R/get_species.R @@ -14,19 +14,17 @@ #' #' @examples #' get_species(mica) -#' get_species <- function(datapkg) { - - # check input data package + # Check input data package check_datapkg(datapkg) - - # get vernacular names and scientific names from datapackage (taxonomic - # slot) - if (!"taxonomic" %in% names(datapkg$datapackage)) return(NULL) - else { + + # Get taxonomic information from package metadata + if (!"taxonomic" %in% names(datapkg$datapackage)) { + return(NULL) + } else { taxonomy <- datapkg$datapackage$taxonomic if ("vernacularNames" %in% names(taxonomy[[1]])) { - # get all languages used in vernacularNames + # Get all languages used in vernacularNames langs <- map(taxonomy, function(x) { vernacular_languages <- NULL if ("vernacularNames" %in% names(x)) { @@ -34,20 +32,18 @@ get_species <- function(datapkg) { } }) langs <- unique(unlist(langs)) - - # fill empty vernacular names with NA - taxonomy <- map(taxonomy, - function(x) { - missing_langs <- langs[!langs %in% names(x$vernacularNames)] - for (i in missing_langs) { - x$vernacularNames[[i]] <- NA_character_ - } - x - }) + + # Fill empty vernacular names with NA + taxonomy <- map(taxonomy, function(x) { + missing_langs <- langs[!langs %in% names(x$vernacularNames)] + for (i in missing_langs) { + x$vernacularNames[[i]] <- NA_character_ + } + x + }) } - map_dfr( - taxonomy, - function(x) x %>% as.data.frame()) %>% - tibble() + map_dfr(taxonomy, function(x) { + tibble(as.data.frame(x)) + }) } } From b9e4446624ccb112157c38ff9b80d49cb17afc60 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 10:21:59 +0200 Subject: [PATCH 03/13] Set geographic, taxonomic, temporal coverage --- R/write_dwc.R | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index 5296e29e..3b7d29a7 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -23,24 +23,26 @@ #' Uses `movepub::datacite_to_eml()` under the hood. #' The following properties are set: #' -#' - TO TEST **title**: Original title + `[subsampled representation]`. +#' - **title**: Original title + `[subsampled representation]`. #' - **description**: Automatically created first paragraph describing this is #' a derived dataset, followed by the original dataset description. -#' - **creators**: Creators of the original dataset. #' - **license**: License of the original dataset. +#' - **creators**: Creators of the original dataset. #' - **contact**: `contact` or first creator of the original dataset. #' - **metadata provider**: `contact` or first creator of the original dataset. #' - **keywords**: Keywords of the original dataset. -#' - **alternative identifier**: DOI of original dataset. This way, no new DOI -#' will be created when publishing to GBIF. -#' - TO TEST **external link** (and alternative identifier): URL of the Movebank study. +#' - **geographic coverage**: Bounding box as defined `package$spatial`. +#' - **taxonomic coverage**: Species as defined in `package$taxonomic`. +#' - **temporal coverage**: Date range as defined in `package$temporal`. +#' - **alternative identifier**: DOI of the original dataset. This way, no new +#' DOI will be created when publishing to GBIF. +#' #' #' To be set manually in the GBIF IPT: **type**, **subtype**, #' **update frequency**, and **publishing organization**. #' -#' Not set: geographic, taxonomic, temporal coverage, associated parties, -#' project data, sampling methods, and citations. Not applicable: collection -#' data. +#' Not set: associated parties, project data, sampling methods, and citations. +#' Not applicable: collection data. #' #' @section Data: #' @@ -57,6 +59,7 @@ write_dwc <- function(package, directory = ".", doi = package$id, contact = NULL, rights_holder = NULL) { # TODO: Hotfix to deal with 1 level deep metadata + orig_package <- package package <- package$datapackage # Retrieve metadata from DataCite and build EML @@ -111,6 +114,33 @@ write_dwc <- function(package, directory = ".", doi = package$id, } eml$dataset$metadataProvider <- eml$dataset$contact + # Set taxonomic coverage + taxonomy <- get_species(orig_package) + if ("taxonRank" %in% names(taxonomy)) { + taxonomy <- dplyr::filter(taxonomy, taxonRank == "species") + } + sci_names <- + rename(taxonomy, Species = scientificName) %>% + select(Species) + + # Set temporal coverage + begin <- package$temporal$start + end <- package$temporal$end + + # Set geographic coverage + bbox <- dp$datapackage$spatial$bbox + + # Set coverage + eml$dataset$coverage <- set_coverage( + begin = begin, + end = end, + west = bbox[1], + south = bbox[2], + east = bbox[3], + north = bbox[4], + sci_names = sci_names + ) + # Read data from package # message("Reading data from `package`.") # assertthat::assert_that( From 78cad3319666a970506f0f1b283a2fec94b071c2 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 13:04:13 +0200 Subject: [PATCH 04/13] Add project data, associated parties, project url --- R/write_dwc.R | 66 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index 3b7d29a7..a3916cb3 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -31,17 +31,22 @@ #' - **contact**: `contact` or first creator of the original dataset. #' - **metadata provider**: `contact` or first creator of the original dataset. #' - **keywords**: Keywords of the original dataset. +#' - **associated parties**: Organizations as defined in +#' `package$organizations`. #' - **geographic coverage**: Bounding box as defined `package$spatial`. #' - **taxonomic coverage**: Species as defined in `package$taxonomic`. #' - **temporal coverage**: Date range as defined in `package$temporal`. +#' - **project data**: Title, identifier, description, and sampling design +#' information as defined in `package$project`. #' - **alternative identifier**: DOI of the original dataset. This way, no new #' DOI will be created when publishing to GBIF. -#' +#' - **external link**: URL of the project as defined in `package$project$path`. #' #' To be set manually in the GBIF IPT: **type**, **subtype**, #' **update frequency**, and **publishing organization**. #' #' Not set: associated parties, project data, sampling methods, and citations. +#' Not set: sampling methods and citations. #' Not applicable: collection data. #' #' @section Data: @@ -57,7 +62,7 @@ #' @examples #' # TODO write_dwc <- function(package, directory = ".", doi = package$id, - contact = NULL, rights_holder = NULL) { + contact = NULL, rights_holder = package$rightsHolder) { # TODO: Hotfix to deal with 1 level deep metadata orig_package <- package package <- package$datapackage @@ -87,10 +92,14 @@ write_dwc <- function(package, directory = ".", doi = package$id, first_para <- glue::glue( # Add span to circumvent https://github.com/ropensci/EML/issues/342 "This camera trap dataset is derived from ", - "{first_author} et al. ({pub_year}, {doi_url}). ", + "{first_author} et al. ({pub_year}, {doi_url}), ", + "a Camera Trap Data Package ", + "(Camtrap DP). ", "Data have been standardized to Darwin Core using the ", "camtraptor R package ", - "and exclude observations of humans and absence records. ", + "and only include observations (and associated media) of animals. ", + "Excluded are records that document blank or unclassified media, ", + "vehicles and observations of humans. ", "The original dataset description follows.", .null = "" ) @@ -114,7 +123,16 @@ write_dwc <- function(package, directory = ".", doi = package$id, } eml$dataset$metadataProvider <- eml$dataset$contact - # Set taxonomic coverage + # Add organizations as associated parties + eml$dataset$associatedParty <- + purrr::map(package$organizations, ~ EML::set_responsibleParty( + givenName = "", # Circumvent https://github.com/ropensci/EML/issues/345 + organizationName = .$title, + onlineUrl = .$path + )) + + # Set coverage + bbox <- dp$datapackage$spatial$bbox taxonomy <- get_species(orig_package) if ("taxonRank" %in% names(taxonomy)) { taxonomy <- dplyr::filter(taxonomy, taxonRank == "species") @@ -123,17 +141,9 @@ write_dwc <- function(package, directory = ".", doi = package$id, rename(taxonomy, Species = scientificName) %>% select(Species) - # Set temporal coverage - begin <- package$temporal$start - end <- package$temporal$end - - # Set geographic coverage - bbox <- dp$datapackage$spatial$bbox - - # Set coverage eml$dataset$coverage <- set_coverage( - begin = begin, - end = end, + begin = package$temporal$start, + end = package$temporal$end, west = bbox[1], south = bbox[2], east = bbox[3], @@ -141,6 +151,32 @@ write_dwc <- function(package, directory = ".", doi = package$id, sci_names = sci_names ) + # Set project metadata + project <- package$project + capture_method <- paste(package$project$captureMethod, collapse = " and ") + animal_type <- paste(package$project$animalTypes, collapse = " and ") + design_para <- glue::glue( + "This project uses a {project$samplingDesign} sampling design, ", + "with {animal_type} animals and ", + "camera traps taking media using {capture_method}. ", + "Media are classified at {project$classificationLevel} level." + ) + eml$dataset$project <- list( + id = project$id, # Can be NULL, assigned as + title = project$title, + abstract = list(para = project$description), # Can be NULL + designDescription = list(description = list(para = design_para)) + ) + + # Set external link to project URL (can be NULL) + if (!is.null(project$path)) { + eml$dataset$distribution = list( + scope = "document", online = list( + url = list("function" = "information", project$path) + ) + ) + } + # Read data from package # message("Reading data from `package`.") # assertthat::assert_that( From bcff79c2fb2e591a046d1e7aaa1e0f44d3b880ac Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 13:04:24 +0200 Subject: [PATCH 05/13] Update title --- R/write_dwc.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index a3916cb3..99497da6 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -23,7 +23,7 @@ #' Uses `movepub::datacite_to_eml()` under the hood. #' The following properties are set: #' -#' - **title**: Original title + `[subsampled representation]`. +#' - **title**: Original title + `[animal observations]`. #' - **description**: Automatically created first paragraph describing this is #' a derived dataset, followed by the original dataset description. #' - **license**: License of the original dataset. @@ -45,7 +45,6 @@ #' To be set manually in the GBIF IPT: **type**, **subtype**, #' **update frequency**, and **publishing organization**. #' -#' Not set: associated parties, project data, sampling methods, and citations. #' Not set: sampling methods and citations. #' Not applicable: collection data. #' @@ -76,7 +75,7 @@ write_dwc <- function(package, directory = ".", doi = package$id, eml <- movepub::datacite_to_eml(doi) # Update title - title <- paste(eml$dataset$title, "[subsampled representation]") # Used in DwC + title <- paste(eml$dataset$title, "[animal observations]") # Used in DwC eml$dataset$title <- title # Update license From 6f65da2c5ebbbfdee93c4e8fe77245ee94b163cc Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 13:38:14 +0200 Subject: [PATCH 06/13] Remove camtraptor as its own dependency --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2548ded7..2d7c9a15 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,6 @@ Depends: R (>= 3.5.0) Imports: assertthat, - camtraptor, dplyr, frictionless, glue, From ba433cb2b61fab55533b9f0ab85d0d9e4f0d8f68 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 13:56:35 +0200 Subject: [PATCH 07/13] Add dependencies --- DESCRIPTION | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 2d7c9a15..fc8d8f71 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,7 +38,9 @@ Depends: R (>= 3.5.0) Imports: assertthat, + DBI, dplyr, + EML, frictionless, glue, htmltools, @@ -48,6 +50,7 @@ Imports: purrr, readr, rlang, + RSQLite, stringr, tidyr Remotes: From ed7c86a4a415220ea5795a2aae3f56524ed0fb3e Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 14:12:43 +0200 Subject: [PATCH 08/13] Upload SQL as it was in movepub --- inst/sql/dwc_multimedia.sql | 78 +++++++++++++ inst/sql/dwc_occurrence.sql | 213 ++++++++++++++++++++++++++++++++++++ 2 files changed, 291 insertions(+) create mode 100644 inst/sql/dwc_multimedia.sql create mode 100644 inst/sql/dwc_occurrence.sql diff --git a/inst/sql/dwc_multimedia.sql b/inst/sql/dwc_multimedia.sql new file mode 100644 index 00000000..8bb443a3 --- /dev/null +++ b/inst/sql/dwc_multimedia.sql @@ -0,0 +1,78 @@ +/* +Created by Peter Desmet (INBO) +Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp +Mapping to Audubon Media Description: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml +Y = included in DwC, N = not included in DwC + +CAMTRAP DP MEDIA + +mediaID Y: as link to observation +deploymentID N: included at observation level +sequenceID Y: as link to observation +captureMethod ? +timestamp Y +filePath Y +fileName Y: to sort data +fileMediatype Y +exifData N +favourite N +comments N +_id N + +*/ + +-- Observations can be based on sequences (sequenceID) or individual files (mediaID) +-- Make two joins and union to capture both cases without overlap +WITH observations_media AS ( +-- Sequence based observations + SELECT obs.observationID, obs.timestamp AS observationTimestamp, med.* + FROM observations AS obs + LEFT JOIN media AS med ON obs.sequenceID = med.sequenceID + WHERE obs.observationType = 'animal' AND obs.mediaID IS NULL + UNION +-- File based observations + SELECT obs.observationID, obs.timestamp AS observationTimestamp, med.* + FROM observations AS obs + LEFT JOIN media AS med ON obs.mediaID = med.mediaID + WHERE obs.observationType = 'animal' AND obs.mediaID IS NOT NULL +) + +SELECT +-- occurrenceID + obs_med.observationID AS occurrenceID, +-- creator +-- providerLiteral +-- provider +-- rights + {metadata$mediaLicense} AS rights, +-- owner +-- identifier + obs_med.mediaID AS identifier, +-- type + CASE + WHEN obs_med.fileMediatype LIKE '%video%' THEN 'MovingImage' + ELSE 'StillImage' + END AS type, +-- providerManagedID + obs_med._id AS providerManagedID, +-- captureDevice +-- dep.cameraModel AS captureDevice, +-- resourceCreationTechnique + obs_med.captureMethod AS resourceCreationTechnique, +-- accessURI + obs_med.filePath AS accessURI, +-- format + obs_med.fileMediatype AS format, +-- CreateDate + STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS createDate + +FROM + observations_media AS obs_med + LEFT JOIN deployments AS dep + ON obs_med.deploymentID = dep.deploymentID + +ORDER BY +-- Order is not retained in observations_media, so important to sort + obs_med.observationTimestamp, + obs_med.timestamp, + obs_med.fileName diff --git a/inst/sql/dwc_occurrence.sql b/inst/sql/dwc_occurrence.sql new file mode 100644 index 00000000..ab27ca31 --- /dev/null +++ b/inst/sql/dwc_occurrence.sql @@ -0,0 +1,213 @@ +/* +Created by Peter Desmet (INBO) +Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp +Mapping to Darwin Core Occurrence: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml +Y = included in DwC, N = not included in DwC + +CAMTRAP DP DEPLOYMENTS + +deploymentID Y +locationID Y +locationName Y +longitude Y +latitude Y +coordinateUncertainty Y +start Y +end Y +setupBy N +cameraID N +cameraModel N +cameraInterval N +cameraHeight N +cameraTilt N +cameraHeading N +timestampIssues N +baitUse Y +session N +array N +featureType Y +habitat Y +tags Y +comments Y +_id N + +CAMTRAP DP OBSERVATIONS + +observationID Y +deploymentID Y +sequenceID Y +mediaID N: see dwc_multimedia +timestamp Y +observationType Y: as filter +cameraSetup N +taxonID Y +scientificName Y +count Y +countNew N +lifeStage Y +sex Y +behaviour Y +individualID Y +classificationMethod Y +classifiedBy Y +classificationTimestamp Y +classificationConfidence Y +comments Y +_id N + +*/ + +SELECT +-- RECORD-LEVEL +-- type + -- Static value + 'Event' AS type, +-- license + {metadata$dataLicense} AS license, +-- rightsHolder + {metadata$rightsHolder} AS rightsHolder, +-- bibliographicCitation + -- How *record* should be cited, don't use dataset-wide metadata$bibliographicCitation +-- datasetID + {metadata$id} AS datasetID, +-- institutionCode + {metadata$organization} AS institutionCode, +-- collectionCode + {metadata$source} AS collectionCode, +-- datasetName + {metadata$projectTitle} AS datasetName, +-- basisOfRecord + -- Static value + 'MachineObservation' AS basisOfRecord, +-- informationWithheld +-- dataGeneralizations +-- dynamicProperties + +-- OCCURRENCE +-- occurrenceID + obs.observationID AS occurrenceID, +-- individualCount + obs.count AS individualCount, +-- sex + obs.sex AS sex, +-- lifeStage + obs.lifeStage AS lifeStage, +-- behavior + obs.behaviour AS behavior, +-- occurrenceStatus + -- Static value + 'present' AS occurrenceStatus, +-- occurrenceRemarks + obs.comments AS occurrenceRemarks, + +-- ORGANISM +-- organismID + obs.individualID AS organismID, + +-- MATERIALSAMPLE + -- Not applicable + +-- EVENT +-- eventID + obs.sequenceID AS eventID, +-- parentEventID + obs.deploymentID AS parentEventID, +-- eventDate + -- ISO-8601 in UTC + strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.timestamp, 'unixepoch')) AS eventDate, +-- eventTime + -- Included in eventDate +-- habitat + dep.habitat AS habitat, +-- samplingProtocol + 'camera trap' || + CASE + WHEN dep.baitUse IS 'none' THEN ' without bait' + WHEN dep.baitUse IS NOT NULL THEN ' with bait' + ELSE '' + END AS samplingProtocol, +-- samplingEffort + -- Duration of deployment + strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.start, 'unixepoch')) || + '/' || + strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort, +-- eventRemarks + CASE + WHEN dep.comments IS NOT NULL THEN dep.comments + ELSE '' + END || + CASE + WHEN dep.comments IS NOT NULL AND dep.tags IS NOT NULL THEN ' | ' + ELSE '' + END || + CASE + WHEN dep.tags IS NOT NULL THEN 'tags: ' || dep.tags + ELSE '' + END AS eventRemarks, + +-- LOCATION +-- locationID + dep.locationID AS locationID, +-- countryCode + -- Single value might not apply to whole dataset, assumes coordinates are provided. +-- locality + dep.locationName AS locality, +-- locationRemarks + dep.featureType AS locationRemarks, +-- decimalLatitude + dep.latitude AS decimalLatitude, +-- decimalLongitude + dep.longitude AS decimalLongitude, +-- geodeticDatum + -- Static value + 'WGS84' AS geodeticDatum, +-- coordinateUncertaintyInMeters + dep.coordinateUncertainty AS coordinateUncertaintyInMeters, + +-- GEOLOGICAL CONTEXT + -- Not applicable + +-- IDENTIFICATION +-- identifiedBy + obs.classifiedBy AS identifiedBy, +-- identifiedByID +-- dateIdentified + -- ISO-8601 in UTC + strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.classificationTimestamp, 'unixepoch')) AS dateIdentified, +-- identificationRemarks + CASE + WHEN obs.classificationMethod IS NOT NULL THEN 'classificationMethod: ' || obs.classificationMethod + ELSE '' + END || + CASE + WHEN obs.classificationMethod IS NOT NULL AND obs.classificationConfidence IS NOT NULL THEN ' | ' + ELSE '' + END || + CASE + WHEN obs.classificationConfidence IS NOT NULL THEN 'classificationConfidence: ' || obs.classificationConfidence + ELSE '' + END AS identificationRemarks, + +-- TAXON +-- taxonID + obs.taxonID AS taxonID, +-- scientificName + obs.scientificName AS scientificName, +-- kingdom + -- Static value: records are filtered on animals in WHERE clause + 'Animalia' AS kingdom +-- taxonRank +-- vernacularName + +FROM + observations AS obs + LEFT JOIN deployments AS dep + ON obs.deploymentID = dep.deploymentID + +WHERE + -- Select biological observations only (excluding observations marked as human, blank, vehicle) + -- Same filter should be used in dwc_multimedia.sql + obs.observationType = 'animal' + +ORDER BY + obs.timestamp From 3b275dd9ba97119bacf9453ea3e68bc020458c47 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 14:13:02 +0200 Subject: [PATCH 09/13] Fix naming issues --- R/write_dwc.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index 99497da6..210267b8 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -131,16 +131,16 @@ write_dwc <- function(package, directory = ".", doi = package$id, )) # Set coverage - bbox <- dp$datapackage$spatial$bbox + bbox <- package$spatial$bbox taxonomy <- get_species(orig_package) if ("taxonRank" %in% names(taxonomy)) { taxonomy <- dplyr::filter(taxonomy, taxonRank == "species") } sci_names <- - rename(taxonomy, Species = scientificName) %>% - select(Species) + dplyr::rename(taxonomy, Species = scientificName) %>% + dplyr::select(Species) - eml$dataset$coverage <- set_coverage( + eml$dataset$coverage <- EML::set_coverage( begin = package$temporal$start, end = package$temporal$end, west = bbox[1], From fc5e60d0a65a434ea60bfea302f479c2eb0ec4cb Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 14:13:30 +0200 Subject: [PATCH 10/13] Transform data with function --- R/write_dwc.R | 55 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index 210267b8..2970c8d0 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -177,42 +177,41 @@ write_dwc <- function(package, directory = ".", doi = package$id, } # Read data from package - # message("Reading data from `package`.") - # assertthat::assert_that( - # c("reference-data") %in% frictionless::resources(package), - # msg = "`package` must contain resource `reference-data`." - # ) - # assertthat::assert_that( - # c("gps") %in% frictionless::resources(package), - # msg = "`package` must contain resource `gps`." - # ) - # ref <- frictionless::read_resource(package, "reference-data") - # gps <- frictionless::read_resource(package, "gps") + # Already read with read_camtrap_dp() # Create database - # message("Creating database and transforming to Darwin Core.") - # con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - # DBI::dbWriteTable(con, "reference_data", ref) - # DBI::dbWriteTable(con, "gps", gps) + message("Creating database and transforming to Darwin Core.") + con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") + DBI::dbWriteTable(con, "deployments", orig_package$deployments) + DBI::dbWriteTable(con, "media", orig_package$media) + DBI::dbWriteTable(con, "observations", orig_package$observations) # Query database - # dwc_occurrence_sql <- glue::glue_sql( - # readr::read_file( - # system.file("sql/movebank_dwc_occurrence.sql", package = "movepub") - # ), - # .con = con - # ) - # dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql) - # DBI::dbDisconnect(con) + dwc_occurrence_sql <- glue::glue_sql( + readr::read_file( + system.file("sql/dwc_occurrence.sql", package = "camtraptor") + ), + .con = con + ) + dwc_multimedia_sql <- glue::glue_sql( + readr::read_file( + system.file("sql/dwc_multimedia.sql", package = "camtraptor") + ), + .con = con + ) + dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql) + dwc_multimedia <- DBI::dbGetQuery(con, dwc_multimedia_sql) + DBI::dbDisconnect(con) # Write files if (!dir.exists(directory)) { dir.create(directory, recursive = TRUE) } EML::write_eml(eml, file.path(directory, "eml.xml")) - # readr::write_csv( - # dwc_occurrence, - # file.path(directory, "dwc_occurrence.csv"), - # na = "" - # ) + readr::write_csv( + dwc_occurrence, file.path(directory, "dwc_occurrence.csv"), na = "" + ) + readr::write_csv( + dwc_multimedia, file.path(directory, "dwc_multimedia.csv"), na = "" + ) } From e0f2b0e3523378b26bfc104bce77ac6e66ac2a88 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 14:13:49 +0200 Subject: [PATCH 11/13] Circumvent #116 --- R/write_dwc.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index 2970c8d0..beeced0a 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -182,9 +182,9 @@ write_dwc <- function(package, directory = ".", doi = package$id, # Create database message("Creating database and transforming to Darwin Core.") con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") - DBI::dbWriteTable(con, "deployments", orig_package$deployments) - DBI::dbWriteTable(con, "media", orig_package$media) - DBI::dbWriteTable(con, "observations", orig_package$observations) + DBI::dbWriteTable(con, "deployments", dplyr::tibble(orig_package$deployments)) + DBI::dbWriteTable(con, "media", dplyr::tibble(orig_package$media)) + DBI::dbWriteTable(con, "observations", dplyr::tibble(orig_package$observations)) # Query database dwc_occurrence_sql <- glue::glue_sql( From 9ed4b8a7b400d39c120aa455bc5dc9fa1f290275 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 16:59:08 +0200 Subject: [PATCH 12/13] Rework mapping (still 0.1.7 format) --- R/write_dwc.R | 6 + inst/sql/dwc_multimedia.sql | 69 ++++----- inst/sql/dwc_occurrence.sql | 271 ++++++++++++------------------------ 3 files changed, 123 insertions(+), 223 deletions(-) diff --git a/R/write_dwc.R b/R/write_dwc.R index beeced0a..ef843d38 100644 --- a/R/write_dwc.R +++ b/R/write_dwc.R @@ -74,6 +74,9 @@ write_dwc <- function(package, directory = ".", doi = package$id, message("Creating EML metadata.") eml <- movepub::datacite_to_eml(doi) + # Set platform + platform <- package$platform$title # Use in DwC + # Update title title <- paste(eml$dataset$title, "[animal observations]") # Used in DwC eml$dataset$title <- title @@ -84,6 +87,9 @@ write_dwc <- function(package, directory = ".", doi = package$id, eml$dataset$intellectualRights <- NULL # Remove original license elements that make EML invalid eml$dataset$intellectualRights$para <- license_code + # Set media license + media_license_url <- purrr::keep(package$licenses, ~ .$scope == "media")[[1]]$path + # Add extra paragraph to description first_author <- eml$dataset$creator[[1]]$individualName$surName pub_year <- substr(eml$dataset$pubDate, 1, 4) diff --git a/inst/sql/dwc_multimedia.sql b/inst/sql/dwc_multimedia.sql index 8bb443a3..b0e42fac 100644 --- a/inst/sql/dwc_multimedia.sql +++ b/inst/sql/dwc_multimedia.sql @@ -1,24 +1,19 @@ /* -Created by Peter Desmet (INBO) -Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp -Mapping to Audubon Media Description: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml -Y = included in DwC, N = not included in DwC - -CAMTRAP DP MEDIA - -mediaID Y: as link to observation -deploymentID N: included at observation level -sequenceID Y: as link to observation -captureMethod ? -timestamp Y -filePath Y -fileName Y: to sort data -fileMediatype Y -exifData N -favourite N -comments N -_id N +Schema: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml +Camtrap DP terms and whether they are included in DwC (Y) or not (N): +media.mediaID Y: as link to observation +media.deploymentID N: included at observation level +media.sequenceID Y: as link to observation +media.captureMethod Y +media.timestamp Y +media.filePath Y +media.fileName Y: to sort data +media.fileMediatype Y +media.exifData N +media.favourite N +media.comments Y +media._id N */ -- Observations can be based on sequences (sequenceID) or individual files (mediaID) @@ -38,33 +33,21 @@ WITH observations_media AS ( ) SELECT --- occurrenceID - obs_med.observationID AS occurrenceID, --- creator --- providerLiteral --- provider --- rights - {metadata$mediaLicense} AS rights, --- owner --- identifier - obs_med.mediaID AS identifier, --- type + obs_med.observationID AS occurrenceID, +-- provider: can be org managing the platform, but that info is not available + {media_license_url} AS rights, + obs_med.mediaID AS identifier, CASE WHEN obs_med.fileMediatype LIKE '%video%' THEN 'MovingImage' ELSE 'StillImage' - END AS type, --- providerManagedID - obs_med._id AS providerManagedID, --- captureDevice --- dep.cameraModel AS captureDevice, --- resourceCreationTechnique - obs_med.captureMethod AS resourceCreationTechnique, --- accessURI - obs_med.filePath AS accessURI, --- format - obs_med.fileMediatype AS format, --- CreateDate - STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS createDate + END AS type, + obs_med._id AS providerManagedID, + obs_med.comments AS comments, + dep.cameraModel AS captureDevice, + obs_med.captureMethod AS resourceCreationTechnique, + obs_med.filePath AS accessURI, + obs_med.fileMediatype AS format, + STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS CreateDate FROM observations_media AS obs_med diff --git a/inst/sql/dwc_occurrence.sql b/inst/sql/dwc_occurrence.sql index ab27ca31..aabfcba3 100644 --- a/inst/sql/dwc_occurrence.sql +++ b/inst/sql/dwc_occurrence.sql @@ -1,203 +1,114 @@ /* -Created by Peter Desmet (INBO) -Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp -Mapping to Darwin Core Occurrence: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml -Y = included in DwC, N = not included in DwC - -CAMTRAP DP DEPLOYMENTS - -deploymentID Y -locationID Y -locationName Y -longitude Y -latitude Y -coordinateUncertainty Y -start Y -end Y -setupBy N -cameraID N -cameraModel N -cameraInterval N -cameraHeight N -cameraTilt N -cameraHeading N -timestampIssues N -baitUse Y -session N -array N -featureType Y -habitat Y -tags Y -comments Y -_id N - -CAMTRAP DP OBSERVATIONS - -observationID Y -deploymentID Y -sequenceID Y -mediaID N: see dwc_multimedia -timestamp Y -observationType Y: as filter -cameraSetup N -taxonID Y -scientificName Y -count Y -countNew N -lifeStage Y -sex Y -behaviour Y -individualID Y -classificationMethod Y -classifiedBy Y -classificationTimestamp Y -classificationConfidence Y -comments Y -_id N - +Schema: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml +Camtrap DP terms and whether they are included in DwC (Y) or not (N): + +deployments.deploymentID Y +deployments.locationID Y +deployments.locationName Y +deployments.longitude Y +deployments.latitude Y +deployments.coordinateUncertainty Y +deployments.start Y +deployments.end Y +deployments.setupBy N +deployments.cameraID N +deployments.cameraModel Y: in dwc_multimedia +deployments.cameraInterval N +deployments.cameraHeight N +deployments.cameraTilt N +deployments.cameraHeading N +deployments.timestampIssues N +deployments.baitUse Y +deployments.session N +deployments.array N +deployments.featureType Y +deployments.habitat Y +deployments.tags Y +deployments.comments Y +deployments._id N +observations.observationID Y +observations.deploymentID Y +observations.sequenceID Y +observations.mediaID N: in dwc_multimedia +observations.timestamp Y +observations.observationType Y: as filter +observations.cameraSetup N +observations.taxonID Y +observations.scientificName Y +observations.count Y +observations.countNew N +observations.lifeStage Y +observations.sex Y +observations.behaviour Y +observations.individualID Y +observations.classificationMethod Y +observations.classifiedBy Y +observations.classificationTimestamp Y +observations.classificationConfidence Y +observations.comments Y +observations._id N */ SELECT -- RECORD-LEVEL --- type - -- Static value - 'Event' AS type, --- license - {metadata$dataLicense} AS license, --- rightsHolder - {metadata$rightsHolder} AS rightsHolder, --- bibliographicCitation - -- How *record* should be cited, don't use dataset-wide metadata$bibliographicCitation --- datasetID - {metadata$id} AS datasetID, --- institutionCode - {metadata$organization} AS institutionCode, --- collectionCode - {metadata$source} AS collectionCode, --- datasetName - {metadata$projectTitle} AS datasetName, --- basisOfRecord - -- Static value - 'MachineObservation' AS basisOfRecord, --- informationWithheld --- dataGeneralizations --- dynamicProperties - + 'Event' AS type, + {license_url} AS license, + {rights_holder} AS rightsHolder, +-- bibliographicCitation: how *record* should be cited, so not package bibliographicCitation + {doi_url} AS datasetID, +-- institutionCode: org managing the platform/collection, but that info is not available + {platform} AS collectionCode, + {title} AS datasetName, + 'MachineObservation' AS basisOfRecord, + 'see metadata' AS informationWithheld, -- OCCURRENCE --- occurrenceID - obs.observationID AS occurrenceID, --- individualCount - obs.count AS individualCount, --- sex - obs.sex AS sex, --- lifeStage - obs.lifeStage AS lifeStage, --- behavior - obs.behaviour AS behavior, --- occurrenceStatus - -- Static value - 'present' AS occurrenceStatus, --- occurrenceRemarks - obs.comments AS occurrenceRemarks, - + obs.observationID AS occurrenceID, + obs.count AS individualCount, + obs.sex AS sex, + obs.lifeStage AS lifeStage, + obs.behaviour AS behavior, + 'present' AS occurrenceStatus, + obs.comments AS occurrenceRemarks, -- ORGANISM --- organismID - obs.individualID AS organismID, - --- MATERIALSAMPLE - -- Not applicable - + obs.individualID AS organismID, -- EVENT --- eventID - obs.sequenceID AS eventID, --- parentEventID - obs.deploymentID AS parentEventID, --- eventDate - -- ISO-8601 in UTC + obs.sequenceID AS eventID, + obs.deploymentID AS parentEventID, strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.timestamp, 'unixepoch')) AS eventDate, --- eventTime - -- Included in eventDate --- habitat - dep.habitat AS habitat, --- samplingProtocol + dep.habitat AS habitat, 'camera trap' || CASE WHEN dep.baitUse IS 'none' THEN ' without bait' WHEN dep.baitUse IS NOT NULL THEN ' with bait' ELSE '' - END AS samplingProtocol, --- samplingEffort - -- Duration of deployment + END AS samplingProtocol, strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.start, 'unixepoch')) || '/' || - strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort, --- eventRemarks - CASE - WHEN dep.comments IS NOT NULL THEN dep.comments - ELSE '' - END || - CASE - WHEN dep.comments IS NOT NULL AND dep.tags IS NOT NULL THEN ' | ' - ELSE '' - END || - CASE - WHEN dep.tags IS NOT NULL THEN 'tags: ' || dep.tags - ELSE '' - END AS eventRemarks, - + strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort, -- Duration of deployment + COALESCE( + dep.comments || ' | tags: ' || dep.tags, + 'tags: ' || dep.tags, + dep.comments + ) AS eventRemarks, -- LOCATION --- locationID - dep.locationID AS locationID, --- countryCode - -- Single value might not apply to whole dataset, assumes coordinates are provided. --- locality - dep.locationName AS locality, --- locationRemarks - dep.featureType AS locationRemarks, --- decimalLatitude - dep.latitude AS decimalLatitude, --- decimalLongitude - dep.longitude AS decimalLongitude, --- geodeticDatum - -- Static value - 'WGS84' AS geodeticDatum, --- coordinateUncertaintyInMeters - dep.coordinateUncertainty AS coordinateUncertaintyInMeters, - --- GEOLOGICAL CONTEXT - -- Not applicable - + dep.locationID AS locationID, + dep.locationName AS locality, + dep.featureType AS locationRemarks, + dep.latitude AS decimalLatitude, + dep.longitude AS decimalLongitude, + 'WGS84' AS geodeticDatum, + dep.coordinateUncertainty AS coordinateUncertaintyInMeters, -- IDENTIFICATION --- identifiedBy - obs.classifiedBy AS identifiedBy, --- identifiedByID --- dateIdentified - -- ISO-8601 in UTC + obs.classifiedBy AS identifiedBy, strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.classificationTimestamp, 'unixepoch')) AS dateIdentified, --- identificationRemarks - CASE - WHEN obs.classificationMethod IS NOT NULL THEN 'classificationMethod: ' || obs.classificationMethod - ELSE '' - END || - CASE - WHEN obs.classificationMethod IS NOT NULL AND obs.classificationConfidence IS NOT NULL THEN ' | ' - ELSE '' - END || - CASE - WHEN obs.classificationConfidence IS NOT NULL THEN 'classificationConfidence: ' || obs.classificationConfidence - ELSE '' - END AS identificationRemarks, - + COALESCE( + 'classified by ' || obs.classificationMethod || ' with ' || obs.classificationConfidence || ' confidence', + 'classified by ' || obs.classificationMethod + ) AS identificationRemarks, -- TAXON --- taxonID - obs.taxonID AS taxonID, --- scientificName - obs.scientificName AS scientificName, --- kingdom - -- Static value: records are filtered on animals in WHERE clause - 'Animalia' AS kingdom --- taxonRank --- vernacularName + obs.taxonID AS taxonID, + obs.scientificName AS scientificName, + 'Animalia' AS kingdom FROM observations AS obs From ed85cd5068d527f2f4c47869c60686f9396ebb89 Mon Sep 17 00:00:00 2001 From: peterdesmet Date: Fri, 3 Jun 2022 17:01:06 +0200 Subject: [PATCH 13/13] devtools::document() --- NAMESPACE | 1 + R/read_camtrap_dp.R | 4 +- man/filter_predicate.Rd | 24 +++++++---- man/get_species.Rd | 1 - man/read_camtrap_dp.Rd | 4 +- man/write_dwc.Rd | 90 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 13 deletions(-) create mode 100644 man/write_dwc.Rd diff --git a/NAMESPACE b/NAMESPACE index 5ba538c2..59158582 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(pred_notna) export(pred_or) export(read_camtrap_dp) export(transform_effort_to_common_units) +export(write_dwc) importFrom(assertthat,assert_that) importFrom(dplyr,"%>%") importFrom(dplyr,.data) diff --git a/R/read_camtrap_dp.R b/R/read_camtrap_dp.R index e9b7100c..ca1159be 100644 --- a/R/read_camtrap_dp.R +++ b/R/read_camtrap_dp.R @@ -1,7 +1,7 @@ -#' Read camtrap-dp formatted data +#' Read Camtrap DP formatted data #' #' This function reads camera trap data formatted following the [Camera Trap -#' Data Package (Camtrap DP)](https://github.com/tdwg/camtrap-dp) format. The +#' Data Package (Camtrap DP)](https://tdwg.github.io/camtrap-dpdp) format. The #' function is built upon the functions \link[frictionless]{read_package} and #' \link[frictionless]{read_resource}. This means a.o. that all datetime #' information included in the camera trap data package is automatically diff --git a/man/filter_predicate.Rd b/man/filter_predicate.Rd index bee6d4ff..8cd12bdf 100644 --- a/man/filter_predicate.Rd +++ b/man/filter_predicate.Rd @@ -117,7 +117,9 @@ Internally, the input to \verb{pred*} functions turn into a character string, which forms the body of a filter expression. For example: -\code{pred("tags", "boven de stroom")} gives:\preformatted{$arg +\code{pred("tags", "boven de stroom")} gives: + +\if{html}{\out{
}}\preformatted{$arg [1] "tags" $value @@ -129,16 +131,22 @@ $type $expr (tags == "boven de stroom") -} +}\if{html}{\out{
}} -\code{pred_gt("latitude", 51.27)} gives, (only \code{expr} slot shown):\preformatted{(latitude > 51.27) -} +\code{pred_gt("latitude", 51.27)} gives, (only \code{expr} slot shown): -\code{pred_or()} gives:\preformatted{((tags == "boven de stroom") | (latitude > 51.28)) -} +\if{html}{\out{
}}\preformatted{(latitude > 51.27) +}\if{html}{\out{
}} -\code{pred_or()} gives:\preformatted{((tags == "boven de stroom") & (latitude > 51.28)) -} +\code{pred_or()} gives: + +\if{html}{\out{
}}\preformatted{((tags == "boven de stroom") | (latitude > 51.28)) +}\if{html}{\out{
}} + +\code{pred_or()} gives: + +\if{html}{\out{
}}\preformatted{((tags == "boven de stroom") & (latitude > 51.28)) +}\if{html}{\out{
}} } \section{Keys}{ diff --git a/man/get_species.Rd b/man/get_species.Rd index 1660d0e6..8e99a165 100644 --- a/man/get_species.Rd +++ b/man/get_species.Rd @@ -19,5 +19,4 @@ Function to get all identified species } \examples{ get_species(mica) - } diff --git a/man/read_camtrap_dp.Rd b/man/read_camtrap_dp.Rd index 53224695..96bdecaf 100644 --- a/man/read_camtrap_dp.Rd +++ b/man/read_camtrap_dp.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/read_camtrap_dp.R \name{read_camtrap_dp} \alias{read_camtrap_dp} -\title{Read camtrap-dp formatted data} +\title{Read Camtrap DP formatted data} \usage{ read_camtrap_dp(file = NULL, media = TRUE, path = lifecycle::deprecated()) } @@ -26,7 +26,7 @@ A list containing three (tibble) data.frames: and a list with metadata: \code{datapackage}. } \description{ -This function reads camera trap data formatted following the \href{https://github.com/tdwg/camtrap-dp}{Camera Trap Data Package (Camtrap DP)} format. The +This function reads camera trap data formatted following the \href{https://tdwg.github.io/camtrap-dpdp}{Camera Trap Data Package (Camtrap DP)} format. The function is built upon the functions \link[frictionless]{read_package} and \link[frictionless]{read_resource}. This means a.o. that all datetime information included in the camera trap data package is automatically diff --git a/man/write_dwc.Rd b/man/write_dwc.Rd new file mode 100644 index 00000000..a5819c6e --- /dev/null +++ b/man/write_dwc.Rd @@ -0,0 +1,90 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_dwc.R +\name{write_dwc} +\alias{write_dwc} +\title{Transform camera trap data to Darwin Core} +\usage{ +write_dwc( + package, + directory = ".", + doi = package$id, + contact = NULL, + rights_holder = package$rightsHolder +) +} +\arguments{ +\item{package}{A Camtrap DP, as read by \code{\link[=read_camtrap_dp]{read_camtrap_dp()}}.} + +\item{directory}{Path to local directory to write files to.} + +\item{doi}{DOI of the original dataset, used to get metadata.} + +\item{contact}{Person to be set as resource contact and metadata provider. +To be provided as a \code{person()}.} + +\item{rights_holder}{Acronym of the organization owning or managing the +rights over the data.} +} +\value{ +CSV (data) and EML (metadata) files written to disk. +} +\description{ +Transforms a published \href{https://github.com/tdwg/camtrap-dp}{Camera Trap Data Package (Camtrap DP)} to Darwin Core CSV and EML +files that can be uploaded to a \href{https://www.gbif.org/ipt}{GBIF IPT} for +publication. +A \code{meta.xml} file is not created. +} +\section{Metadata}{ + + +Metadata are derived from the original dataset by looking up its \code{doi} in +DataCite (\href{https://doi.org/10.5281/zenodo.5590881}{example}) and transforming +these to EML. +Uses \code{movepub::datacite_to_eml()} under the hood. +The following properties are set: +\itemize{ +\item \strong{title}: Original title + \verb{[animal observations]}. +\item \strong{description}: Automatically created first paragraph describing this is +a derived dataset, followed by the original dataset description. +\item \strong{license}: License of the original dataset. +\item \strong{creators}: Creators of the original dataset. +\item \strong{contact}: \code{contact} or first creator of the original dataset. +\item \strong{metadata provider}: \code{contact} or first creator of the original dataset. +\item \strong{keywords}: Keywords of the original dataset. +\item \strong{associated parties}: Organizations as defined in +\code{package$organizations}. +\item \strong{geographic coverage}: Bounding box as defined \code{package$spatial}. +\item \strong{taxonomic coverage}: Species as defined in \code{package$taxonomic}. +\item \strong{temporal coverage}: Date range as defined in \code{package$temporal}. +\item \strong{project data}: Title, identifier, description, and sampling design +information as defined in \code{package$project}. +\item \strong{alternative identifier}: DOI of the original dataset. This way, no new +DOI will be created when publishing to GBIF. +\item \strong{external link}: URL of the project as defined in \code{package$project$path}. +} + +To be set manually in the GBIF IPT: \strong{type}, \strong{subtype}, +\strong{update frequency}, and \strong{publishing organization}. + +Not set: sampling methods and citations. +Not applicable: collection data. +} + +\section{Data}{ + + +\code{package} is expected to contain the resources \code{deployments}, \code{media} and +\code{observations}. +Their CSV data are loaded in to a SQLite database, +\href{https://github.com/inbo/camtraptor/tree/main/inst/sql}{transformed to Darwin Core using SQL} +and written to disk as CSV file(s). + +Key features of the Darwin Core transformation: +\itemize{ +\item TODO +} +} + +\examples{ +# TODO +}