From 8758de614066ddc06fce2cdd7aa02d3896064a1e Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Thu, 2 Jun 2022 16:54:01 +0200
Subject: [PATCH 01/13] Start write_dwc() function

Currently generates metadata only
---
 DESCRIPTION   |   5 +-
 R/write_dwc.R | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 R/write_dwc.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 51b2bb08..2548ded7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,6 +38,7 @@ Depends:
     R (>= 3.5.0)
 Imports:
     assertthat,
+    camtraptor,
     dplyr,
     frictionless,
     glue,
@@ -50,6 +51,8 @@ Imports:
     rlang,
     stringr,
     tidyr
+Remotes:
+    inbo/movepub
 Suggests: 
     covr,
     knitr,
@@ -61,5 +64,5 @@ Encoding: UTF-8
 LazyData: true
 LazyDataCompression: bzip2
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.0
 Config/testthat/edition: 3
diff --git a/R/write_dwc.R b/R/write_dwc.R
new file mode 100644
index 00000000..5296e29e
--- /dev/null
+++ b/R/write_dwc.R
@@ -0,0 +1,153 @@
+#' Transform camera trap data to Darwin Core
+#'
+#' Transforms a published [Camera Trap Data Package
+#' (Camtrap DP)](https://github.com/tdwg/camtrap-dp) to Darwin Core CSV and EML
+#' files that can be uploaded to a [GBIF IPT](https://www.gbif.org/ipt) for
+#' publication.
+#' A `meta.xml` file is not created.
+#'
+#' @param package A Camtrap DP, as read by [read_camtrap_dp()].
+#' @param directory Path to local directory to write files to.
+#' @param doi DOI of the original dataset, used to get metadata.
+#' @param contact Person to be set as resource contact and metadata provider.
+#'   To be provided as a `person()`.
+#' @param rights_holder Acronym of the organization owning or managing the
+#'   rights over the data.
+#' @return CSV (data) and EML (metadata) files written to disk.
+#' @export
+#' @section Metadata:
+#'
+#' Metadata are derived from the original dataset by looking up its `doi` in
+#' DataCite ([example](https://doi.org/10.5281/zenodo.5590881)) and transforming
+#' these to EML.
+#' Uses `movepub::datacite_to_eml()` under the hood.
+#' The following properties are set:
+#'
+#' - TO TEST **title**: Original title + `[subsampled representation]`.
+#' - **description**: Automatically created first paragraph describing this is
+#'   a derived dataset, followed by the original dataset description.
+#' - **creators**: Creators of the original dataset.
+#' - **license**: License of the original dataset.
+#' - **contact**: `contact` or first creator of the original dataset.
+#' - **metadata provider**: `contact` or first creator of the original dataset.
+#' - **keywords**: Keywords of the original dataset.
+#' - **alternative identifier**: DOI of original dataset. This way, no new DOI
+#'   will be created when publishing to GBIF.
+#' - TO TEST **external link** (and alternative identifier): URL of the Movebank study.
+#'
+#' To be set manually in the GBIF IPT: **type**, **subtype**,
+#' **update frequency**, and **publishing organization**.
+#'
+#' Not set: geographic, taxonomic, temporal coverage, associated parties,
+#' project data, sampling methods, and citations. Not applicable: collection
+#' data.
+#'
+#' @section Data:
+#'
+#' `package` is expected to contain the resources `deployments`, `media` and
+#' `observations`.
+#' Their CSV data are loaded in to a SQLite database,
+#' [transformed to Darwin Core using SQL](https://github.com/inbo/camtraptor/tree/main/inst/sql)
+#' and written to disk as CSV file(s).
+#'
+#' Key features of the Darwin Core transformation:
+#' - TODO
+#' @examples
+#' # TODO
+write_dwc <- function(package, directory = ".", doi = package$id,
+                      contact = NULL, rights_holder = NULL) {
+  # TODO: Hotfix to deal with 1 level deep metadata
+  package <- package$datapackage
+
+  # Retrieve metadata from DataCite and build EML
+  assertthat::assert_that(
+    !is.null(doi),
+    msg = "No DOI found in `package$id`, provide one in `doi` parameter."
+  )
+  message("Creating EML metadata.")
+  eml <- movepub::datacite_to_eml(doi)
+
+  # Update title
+  title <- paste(eml$dataset$title, "[subsampled representation]") # Used in DwC
+  eml$dataset$title <- title
+
+  # Update license
+  license_url <- eml$dataset$intellectualRights$rightsUri # Used in DwC
+  license_code <- eml$dataset$intellectualRights$rightsIdentifier
+  eml$dataset$intellectualRights <- NULL # Remove original license elements that make EML invalid
+  eml$dataset$intellectualRights$para <- license_code
+
+  # Add extra paragraph to description
+  first_author <- eml$dataset$creator[[1]]$individualName$surName
+  pub_year <- substr(eml$dataset$pubDate, 1, 4)
+  doi_url <- eml$dataset$alternateIdentifier[[1]] # Used in DwC
+  first_para <- glue::glue(
+    # Add span to circumvent https://github.com/ropensci/EML/issues/342
+    "<span></span>This camera trap dataset is derived from ",
+    "{first_author} et al. ({pub_year}, <a href=\"{doi_url}\">{doi_url}</a>). ",
+    "Data have been standardized to Darwin Core using the ",
+    "<a href=\"https://inbo.github.io/camtraptor/\">camtraptor</a> R package ",
+    "and exclude observations of humans and absence records. ",
+    "The original dataset description follows.",
+    .null = ""
+  )
+  eml$dataset$abstract$para <- purrr::prepend(
+    eml$dataset$abstract$para,
+    paste0("<![CDATA[", first_para, "]]>")
+  )
+
+  # Update contact and set metadata provider
+  if (!is.null(contact)) {
+    eml$dataset$contact <- EML::set_responsibleParty(
+      givenName = contact$given,
+      surName = contact$family,
+      electronicMailAddress = contact$email,
+      userId = if (!is.null(contact$comment[["ORCID"]])) {
+        list(directory = "http://orcid.org/", contact$comment[["ORCID"]])
+      } else {
+        NULL
+      }
+    )
+  }
+  eml$dataset$metadataProvider <- eml$dataset$contact
+
+  # Read data from package
+  # message("Reading data from `package`.")
+  # assertthat::assert_that(
+  #   c("reference-data") %in% frictionless::resources(package),
+  #   msg = "`package` must contain resource `reference-data`."
+  # )
+  # assertthat::assert_that(
+  #   c("gps") %in% frictionless::resources(package),
+  #   msg = "`package` must contain resource `gps`."
+  # )
+  # ref <- frictionless::read_resource(package, "reference-data")
+  # gps <- frictionless::read_resource(package, "gps")
+
+  # Create database
+  # message("Creating database and transforming to Darwin Core.")
+  # con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  # DBI::dbWriteTable(con, "reference_data", ref)
+  # DBI::dbWriteTable(con, "gps", gps)
+
+  # Query database
+  # dwc_occurrence_sql <- glue::glue_sql(
+  #   readr::read_file(
+  #     system.file("sql/movebank_dwc_occurrence.sql", package = "movepub")
+  #   ),
+  #   .con = con
+  # )
+  # dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql)
+  # DBI::dbDisconnect(con)
+
+  # Write files
+  if (!dir.exists(directory)) {
+    dir.create(directory, recursive = TRUE)
+  }
+  EML::write_eml(eml, file.path(directory, "eml.xml"))
+  # readr::write_csv(
+  #   dwc_occurrence,
+  #   file.path(directory, "dwc_occurrence.csv"),
+  #   na = ""
+  # )
+}

From 50a1243e4f2177ddd8795fc592db8e49b3bfde18 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 08:52:16 +0200
Subject: [PATCH 02/13] Minor edits to get_species()

---
 R/get_species.R | 42 +++++++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/R/get_species.R b/R/get_species.R
index af1e9e8a..7f33d000 100644
--- a/R/get_species.R
+++ b/R/get_species.R
@@ -14,19 +14,17 @@
 #'
 #' @examples
 #' get_species(mica)
-#'
 get_species <- function(datapkg) {
-
-  # check input data package
+  # Check input data package
   check_datapkg(datapkg)
-  
-  # get vernacular names and scientific names from datapackage (taxonomic
-  # slot)
-  if (!"taxonomic" %in% names(datapkg$datapackage)) return(NULL)
-  else {
+
+  # Get taxonomic information from package metadata
+  if (!"taxonomic" %in% names(datapkg$datapackage)) {
+    return(NULL)
+  } else {
     taxonomy <- datapkg$datapackage$taxonomic
     if ("vernacularNames" %in% names(taxonomy[[1]])) {
-      # get all languages used in vernacularNames
+      # Get all languages used in vernacularNames
       langs <- map(taxonomy, function(x) {
         vernacular_languages <- NULL
         if ("vernacularNames" %in% names(x)) {
@@ -34,20 +32,18 @@ get_species <- function(datapkg) {
         }
       })
       langs <- unique(unlist(langs))
-      
-      # fill empty vernacular names with NA
-      taxonomy <- map(taxonomy,
-                      function(x) {
-                        missing_langs <- langs[!langs %in% names(x$vernacularNames)]
-                        for (i in missing_langs) {
-                          x$vernacularNames[[i]] <- NA_character_
-                        }
-                        x
-                      })
+
+      # Fill empty vernacular names with NA
+      taxonomy <- map(taxonomy, function(x) {
+        missing_langs <- langs[!langs %in% names(x$vernacularNames)]
+        for (i in missing_langs) {
+          x$vernacularNames[[i]] <- NA_character_
+        }
+        x
+      })
     }
-    map_dfr(
-      taxonomy,
-      function(x) x %>% as.data.frame()) %>% 
-      tibble()
+    map_dfr(taxonomy, function(x) {
+      tibble(as.data.frame(x))
+    })
   }
 }

From b9e4446624ccb112157c38ff9b80d49cb17afc60 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 10:21:59 +0200
Subject: [PATCH 03/13] Set geographic, taxonomic, temporal coverage

---
 R/write_dwc.R | 46 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index 5296e29e..3b7d29a7 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -23,24 +23,26 @@
 #' Uses `movepub::datacite_to_eml()` under the hood.
 #' The following properties are set:
 #'
-#' - TO TEST **title**: Original title + `[subsampled representation]`.
+#' - **title**: Original title + `[subsampled representation]`.
 #' - **description**: Automatically created first paragraph describing this is
 #'   a derived dataset, followed by the original dataset description.
-#' - **creators**: Creators of the original dataset.
 #' - **license**: License of the original dataset.
+#' - **creators**: Creators of the original dataset.
 #' - **contact**: `contact` or first creator of the original dataset.
 #' - **metadata provider**: `contact` or first creator of the original dataset.
 #' - **keywords**: Keywords of the original dataset.
-#' - **alternative identifier**: DOI of original dataset. This way, no new DOI
-#'   will be created when publishing to GBIF.
-#' - TO TEST **external link** (and alternative identifier): URL of the Movebank study.
+#' - **geographic coverage**: Bounding box as defined `package$spatial`.
+#' - **taxonomic coverage**: Species as defined in `package$taxonomic`.
+#' - **temporal coverage**: Date range as defined in `package$temporal`.
+#' - **alternative identifier**: DOI of the original dataset. This way, no new
+#'   DOI will be created when publishing to GBIF.
+#'
 #'
 #' To be set manually in the GBIF IPT: **type**, **subtype**,
 #' **update frequency**, and **publishing organization**.
 #'
-#' Not set: geographic, taxonomic, temporal coverage, associated parties,
-#' project data, sampling methods, and citations. Not applicable: collection
-#' data.
+#' Not set: associated parties, project data, sampling methods, and citations.
+#' Not applicable: collection data.
 #'
 #' @section Data:
 #'
@@ -57,6 +59,7 @@
 write_dwc <- function(package, directory = ".", doi = package$id,
                       contact = NULL, rights_holder = NULL) {
   # TODO: Hotfix to deal with 1 level deep metadata
+  orig_package <- package
   package <- package$datapackage
 
   # Retrieve metadata from DataCite and build EML
@@ -111,6 +114,33 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   }
   eml$dataset$metadataProvider <- eml$dataset$contact
 
+  # Set taxonomic coverage
+  taxonomy <- get_species(orig_package)
+  if ("taxonRank" %in% names(taxonomy)) {
+    taxonomy <- dplyr::filter(taxonomy, taxonRank == "species")
+  }
+  sci_names <-
+    rename(taxonomy, Species = scientificName) %>%
+    select(Species)
+
+  # Set temporal coverage
+  begin <- package$temporal$start
+  end <- package$temporal$end
+
+  # Set geographic coverage
+  bbox <- dp$datapackage$spatial$bbox
+
+  # Set coverage
+  eml$dataset$coverage <- set_coverage(
+    begin = begin,
+    end = end,
+    west = bbox[1],
+    south = bbox[2],
+    east = bbox[3],
+    north = bbox[4],
+    sci_names = sci_names
+  )
+
   # Read data from package
   # message("Reading data from `package`.")
   # assertthat::assert_that(

From 78cad3319666a970506f0f1b283a2fec94b071c2 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 13:04:13 +0200
Subject: [PATCH 04/13] Add project data, associated parties, project url

---
 R/write_dwc.R | 66 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 15 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index 3b7d29a7..a3916cb3 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -31,17 +31,22 @@
 #' - **contact**: `contact` or first creator of the original dataset.
 #' - **metadata provider**: `contact` or first creator of the original dataset.
 #' - **keywords**: Keywords of the original dataset.
+#' - **associated parties**: Organizations as defined in
+#'   `package$organizations`.
 #' - **geographic coverage**: Bounding box as defined `package$spatial`.
 #' - **taxonomic coverage**: Species as defined in `package$taxonomic`.
 #' - **temporal coverage**: Date range as defined in `package$temporal`.
+#' - **project data**: Title, identifier, description, and sampling design
+#'   information as defined in `package$project`.
 #' - **alternative identifier**: DOI of the original dataset. This way, no new
 #'   DOI will be created when publishing to GBIF.
-#'
+#' - **external link**: URL of the project as defined in `package$project$path`.
 #'
 #' To be set manually in the GBIF IPT: **type**, **subtype**,
 #' **update frequency**, and **publishing organization**.
 #'
 #' Not set: associated parties, project data, sampling methods, and citations.
+#' Not set: sampling methods and citations.
 #' Not applicable: collection data.
 #'
 #' @section Data:
@@ -57,7 +62,7 @@
 #' @examples
 #' # TODO
 write_dwc <- function(package, directory = ".", doi = package$id,
-                      contact = NULL, rights_holder = NULL) {
+                      contact = NULL, rights_holder = package$rightsHolder) {
   # TODO: Hotfix to deal with 1 level deep metadata
   orig_package <- package
   package <- package$datapackage
@@ -87,10 +92,14 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   first_para <- glue::glue(
     # Add span to circumvent https://github.com/ropensci/EML/issues/342
     "<span></span>This camera trap dataset is derived from ",
-    "{first_author} et al. ({pub_year}, <a href=\"{doi_url}\">{doi_url}</a>). ",
+    "{first_author} et al. ({pub_year}, <a href=\"{doi_url}\">{doi_url}</a>), ",
+    "a Camera Trap Data Package ",
+    "(<a href=\"https://tdwg.github.io/camtrap-dp/\">Camtrap DP</a>). ",
     "Data have been standardized to Darwin Core using the ",
     "<a href=\"https://inbo.github.io/camtraptor/\">camtraptor</a> R package ",
-    "and exclude observations of humans and absence records. ",
+    "and only include observations (and associated media) of animals. ",
+    "Excluded are records that document blank or unclassified media, ",
+    "vehicles and observations of humans. ",
     "The original dataset description follows.",
     .null = ""
   )
@@ -114,7 +123,16 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   }
   eml$dataset$metadataProvider <- eml$dataset$contact
 
-  # Set taxonomic coverage
+  # Add organizations as associated parties
+  eml$dataset$associatedParty <-
+    purrr::map(package$organizations, ~ EML::set_responsibleParty(
+      givenName = "", # Circumvent https://github.com/ropensci/EML/issues/345
+      organizationName = .$title,
+      onlineUrl = .$path
+    ))
+
+  # Set coverage
+  bbox <- dp$datapackage$spatial$bbox
   taxonomy <- get_species(orig_package)
   if ("taxonRank" %in% names(taxonomy)) {
     taxonomy <- dplyr::filter(taxonomy, taxonRank == "species")
@@ -123,17 +141,9 @@ write_dwc <- function(package, directory = ".", doi = package$id,
     rename(taxonomy, Species = scientificName) %>%
     select(Species)
 
-  # Set temporal coverage
-  begin <- package$temporal$start
-  end <- package$temporal$end
-
-  # Set geographic coverage
-  bbox <- dp$datapackage$spatial$bbox
-
-  # Set coverage
   eml$dataset$coverage <- set_coverage(
-    begin = begin,
-    end = end,
+    begin = package$temporal$start,
+    end = package$temporal$end,
     west = bbox[1],
     south = bbox[2],
     east = bbox[3],
@@ -141,6 +151,32 @@ write_dwc <- function(package, directory = ".", doi = package$id,
     sci_names = sci_names
   )
 
+  # Set project metadata
+  project <- package$project
+  capture_method <- paste(package$project$captureMethod, collapse = " and ")
+  animal_type <- paste(package$project$animalTypes, collapse = " and ")
+  design_para <- glue::glue(
+    "This project uses a {project$samplingDesign} sampling design, ",
+    "with {animal_type} animals and ",
+    "camera traps taking media using {capture_method}. ",
+    "Media are classified at {project$classificationLevel} level."
+  )
+  eml$dataset$project <- list(
+    id = project$id, # Can be NULL, assigned as <project id="id">
+    title = project$title,
+    abstract = list(para = project$description), # Can be NULL
+    designDescription = list(description = list(para = design_para))
+  )
+
+  # Set external link to project URL (can be NULL)
+  if (!is.null(project$path)) {
+    eml$dataset$distribution = list(
+      scope = "document", online = list(
+        url = list("function" = "information", project$path)
+      )
+    )
+  }
+
   # Read data from package
   # message("Reading data from `package`.")
   # assertthat::assert_that(

From bcff79c2fb2e591a046d1e7aaa1e0f44d3b880ac Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 13:04:24 +0200
Subject: [PATCH 05/13] Update title

---
 R/write_dwc.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index a3916cb3..99497da6 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -23,7 +23,7 @@
 #' Uses `movepub::datacite_to_eml()` under the hood.
 #' The following properties are set:
 #'
-#' - **title**: Original title + `[subsampled representation]`.
+#' - **title**: Original title + `[animal observations]`.
 #' - **description**: Automatically created first paragraph describing this is
 #'   a derived dataset, followed by the original dataset description.
 #' - **license**: License of the original dataset.
@@ -45,7 +45,6 @@
 #' To be set manually in the GBIF IPT: **type**, **subtype**,
 #' **update frequency**, and **publishing organization**.
 #'
-#' Not set: associated parties, project data, sampling methods, and citations.
 #' Not set: sampling methods and citations.
 #' Not applicable: collection data.
 #'
@@ -76,7 +75,7 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   eml <- movepub::datacite_to_eml(doi)
 
   # Update title
-  title <- paste(eml$dataset$title, "[subsampled representation]") # Used in DwC
+  title <- paste(eml$dataset$title, "[animal observations]") # Used in DwC
   eml$dataset$title <- title
 
   # Update license

From 6f65da2c5ebbbfdee93c4e8fe77245ee94b163cc Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 13:38:14 +0200
Subject: [PATCH 06/13] Remove camtraptor as its own dependency

---
 DESCRIPTION | 1 -
 1 file changed, 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2548ded7..2d7c9a15 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,7 +38,6 @@ Depends:
     R (>= 3.5.0)
 Imports:
     assertthat,
-    camtraptor,
     dplyr,
     frictionless,
     glue,

From ba433cb2b61fab55533b9f0ab85d0d9e4f0d8f68 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 13:56:35 +0200
Subject: [PATCH 07/13] Add dependencies

---
 DESCRIPTION | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2d7c9a15..fc8d8f71 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -38,7 +38,9 @@ Depends:
     R (>= 3.5.0)
 Imports:
     assertthat,
+    DBI,
     dplyr,
+    EML,
     frictionless,
     glue,
     htmltools,
@@ -48,6 +50,7 @@ Imports:
     purrr,
     readr,
     rlang,
+    RSQLite,
     stringr,
     tidyr
 Remotes:

From ed7c86a4a415220ea5795a2aae3f56524ed0fb3e Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 14:12:43 +0200
Subject: [PATCH 08/13] Upload SQL as it was in movepub

---
 inst/sql/dwc_multimedia.sql |  78 +++++++++++++
 inst/sql/dwc_occurrence.sql | 213 ++++++++++++++++++++++++++++++++++++
 2 files changed, 291 insertions(+)
 create mode 100644 inst/sql/dwc_multimedia.sql
 create mode 100644 inst/sql/dwc_occurrence.sql

diff --git a/inst/sql/dwc_multimedia.sql b/inst/sql/dwc_multimedia.sql
new file mode 100644
index 00000000..8bb443a3
--- /dev/null
+++ b/inst/sql/dwc_multimedia.sql
@@ -0,0 +1,78 @@
+/*
+Created by Peter Desmet (INBO)
+Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp
+Mapping to Audubon Media Description: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml
+Y = included in DwC, N = not included in DwC
+
+CAMTRAP DP MEDIA
+
+mediaID                         Y: as link to observation
+deploymentID                    N: included at observation level
+sequenceID                      Y: as link to observation
+captureMethod                   ?
+timestamp                       Y
+filePath                        Y
+fileName                        Y: to sort data
+fileMediatype                   Y
+exifData                        N
+favourite                       N
+comments                        N
+_id                             N
+
+*/
+
+-- Observations can be based on sequences (sequenceID) or individual files (mediaID)
+-- Make two joins and union to capture both cases without overlap
+WITH observations_media AS (
+-- Sequence based observations
+  SELECT obs.observationID, obs.timestamp AS observationTimestamp, med.*
+  FROM observations AS obs
+    LEFT JOIN media AS med ON obs.sequenceID = med.sequenceID
+  WHERE obs.observationType = 'animal' AND obs.mediaID IS NULL
+  UNION
+-- File based observations
+  SELECT obs.observationID, obs.timestamp AS observationTimestamp, med.*
+  FROM observations AS obs
+    LEFT JOIN media AS med ON obs.mediaID = med.mediaID
+  WHERE obs.observationType = 'animal' AND obs.mediaID IS NOT NULL
+)
+
+SELECT
+-- occurrenceID
+  obs_med.observationID AS occurrenceID,
+-- creator
+-- providerLiteral
+-- provider
+-- rights
+  {metadata$mediaLicense} AS rights,
+-- owner
+-- identifier
+  obs_med.mediaID AS identifier,
+-- type
+  CASE
+    WHEN obs_med.fileMediatype LIKE '%video%' THEN 'MovingImage'
+    ELSE 'StillImage'
+  END AS type,
+-- providerManagedID
+  obs_med._id AS providerManagedID,
+-- captureDevice
+--  dep.cameraModel AS captureDevice,
+-- resourceCreationTechnique
+  obs_med.captureMethod AS resourceCreationTechnique,
+-- accessURI
+  obs_med.filePath AS accessURI,
+-- format
+  obs_med.fileMediatype AS format,
+-- CreateDate
+  STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS createDate
+
+FROM
+  observations_media AS obs_med
+  LEFT JOIN deployments AS dep
+    ON obs_med.deploymentID = dep.deploymentID
+
+ORDER BY
+-- Order is not retained in observations_media, so important to sort
+  obs_med.observationTimestamp,
+  obs_med.timestamp,
+  obs_med.fileName
diff --git a/inst/sql/dwc_occurrence.sql b/inst/sql/dwc_occurrence.sql
new file mode 100644
index 00000000..ab27ca31
--- /dev/null
+++ b/inst/sql/dwc_occurrence.sql
@@ -0,0 +1,213 @@
+/*
+Created by Peter Desmet (INBO)
+Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp
+Mapping to Darwin Core Occurrence: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml
+Y = included in DwC, N = not included in DwC
+
+CAMTRAP DP DEPLOYMENTS
+
+deploymentID                    Y
+locationID                      Y
+locationName                    Y
+longitude                       Y
+latitude                        Y
+coordinateUncertainty           Y
+start                           Y
+end                             Y
+setupBy                         N
+cameraID                        N
+cameraModel                     N
+cameraInterval                  N
+cameraHeight                    N
+cameraTilt                      N
+cameraHeading                   N
+timestampIssues                 N
+baitUse                         Y
+session                         N
+array                           N
+featureType                     Y
+habitat                         Y
+tags                            Y
+comments                        Y
+_id                             N
+
+CAMTRAP DP OBSERVATIONS
+
+observationID                   Y
+deploymentID                    Y
+sequenceID                      Y
+mediaID                         N: see dwc_multimedia
+timestamp                       Y
+observationType                 Y: as filter
+cameraSetup                     N
+taxonID                         Y
+scientificName                  Y
+count                           Y
+countNew                        N
+lifeStage                       Y
+sex                             Y
+behaviour                       Y
+individualID                    Y
+classificationMethod            Y
+classifiedBy                    Y
+classificationTimestamp         Y
+classificationConfidence        Y
+comments                        Y
+_id                             N
+
+*/
+
+SELECT
+-- RECORD-LEVEL
+-- type
+  -- Static value
+  'Event' AS type,
+-- license
+  {metadata$dataLicense} AS license,
+-- rightsHolder
+  {metadata$rightsHolder} AS rightsHolder,
+-- bibliographicCitation
+  -- How *record* should be cited, don't use dataset-wide metadata$bibliographicCitation
+-- datasetID
+  {metadata$id} AS datasetID,
+-- institutionCode
+  {metadata$organization} AS institutionCode,
+-- collectionCode
+  {metadata$source} AS collectionCode,
+-- datasetName
+  {metadata$projectTitle} AS datasetName,
+-- basisOfRecord
+  -- Static value
+  'MachineObservation' AS basisOfRecord,
+-- informationWithheld
+-- dataGeneralizations
+-- dynamicProperties
+
+-- OCCURRENCE
+-- occurrenceID
+  obs.observationID AS occurrenceID,
+-- individualCount
+  obs.count AS individualCount,
+-- sex
+  obs.sex AS sex,
+-- lifeStage
+  obs.lifeStage AS lifeStage,
+-- behavior
+  obs.behaviour AS behavior,
+-- occurrenceStatus
+  -- Static value
+  'present' AS occurrenceStatus,
+-- occurrenceRemarks
+  obs.comments AS occurrenceRemarks,
+
+-- ORGANISM
+-- organismID
+  obs.individualID AS organismID,
+
+-- MATERIALSAMPLE
+  -- Not applicable
+
+-- EVENT
+-- eventID
+  obs.sequenceID AS eventID,
+-- parentEventID
+  obs.deploymentID AS parentEventID,
+-- eventDate
+  -- ISO-8601 in UTC
+  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.timestamp, 'unixepoch')) AS eventDate,
+-- eventTime
+  -- Included in eventDate
+-- habitat
+  dep.habitat AS habitat,
+-- samplingProtocol
+  'camera trap' ||
+  CASE
+    WHEN dep.baitUse IS 'none' THEN ' without bait'
+    WHEN dep.baitUse IS NOT NULL THEN ' with bait'
+    ELSE ''
+  END AS samplingProtocol,
+-- samplingEffort
+  -- Duration of deployment
+  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.start, 'unixepoch')) ||
+  '/' ||
+  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort,
+-- eventRemarks
+  CASE
+    WHEN dep.comments IS NOT NULL THEN dep.comments
+    ELSE ''
+  END ||
+  CASE
+    WHEN dep.comments IS NOT NULL AND dep.tags IS NOT NULL THEN ' | '
+    ELSE ''
+  END ||
+  CASE
+    WHEN dep.tags IS NOT NULL THEN 'tags: ' || dep.tags
+    ELSE ''
+  END AS eventRemarks,
+
+-- LOCATION
+-- locationID
+  dep.locationID AS locationID,
+-- countryCode
+  -- Single value might not apply to whole dataset, assumes coordinates are provided.
+-- locality
+  dep.locationName AS locality,
+-- locationRemarks
+  dep.featureType AS locationRemarks,
+-- decimalLatitude
+  dep.latitude AS decimalLatitude,
+-- decimalLongitude
+  dep.longitude AS decimalLongitude,
+-- geodeticDatum
+  -- Static value
+  'WGS84' AS geodeticDatum,
+-- coordinateUncertaintyInMeters
+  dep.coordinateUncertainty AS coordinateUncertaintyInMeters,
+
+-- GEOLOGICAL CONTEXT
+  -- Not applicable
+
+-- IDENTIFICATION
+-- identifiedBy
+  obs.classifiedBy AS identifiedBy,
+-- identifiedByID
+-- dateIdentified
+  -- ISO-8601 in UTC
+  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.classificationTimestamp, 'unixepoch')) AS dateIdentified,
+-- identificationRemarks
+  CASE
+    WHEN obs.classificationMethod IS NOT NULL THEN 'classificationMethod: ' || obs.classificationMethod
+    ELSE ''
+  END ||
+  CASE
+    WHEN obs.classificationMethod IS NOT NULL AND obs.classificationConfidence IS NOT NULL THEN ' | '
+    ELSE ''
+  END ||
+  CASE
+    WHEN obs.classificationConfidence IS NOT NULL THEN 'classificationConfidence: ' || obs.classificationConfidence
+    ELSE ''
+  END AS identificationRemarks,
+
+-- TAXON
+-- taxonID
+  obs.taxonID AS taxonID,
+-- scientificName
+  obs.scientificName AS scientificName,
+-- kingdom
+  -- Static value: records are filtered on animals in WHERE clause
+  'Animalia' AS kingdom
+-- taxonRank
+-- vernacularName
+
+FROM
+  observations AS obs
+  LEFT JOIN deployments AS dep
+    ON obs.deploymentID = dep.deploymentID
+
+WHERE
+  -- Select biological observations only (excluding observations marked as human, blank, vehicle)
+  -- Same filter should be used in dwc_multimedia.sql
+  obs.observationType = 'animal'
+
+ORDER BY
+  obs.timestamp

From 3b275dd9ba97119bacf9453ea3e68bc020458c47 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 14:13:02 +0200
Subject: [PATCH 09/13] Fix naming issues

---
 R/write_dwc.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index 99497da6..210267b8 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -131,16 +131,16 @@ write_dwc <- function(package, directory = ".", doi = package$id,
     ))
 
   # Set coverage
-  bbox <- dp$datapackage$spatial$bbox
+  bbox <- package$spatial$bbox
   taxonomy <- get_species(orig_package)
   if ("taxonRank" %in% names(taxonomy)) {
     taxonomy <- dplyr::filter(taxonomy, taxonRank == "species")
   }
   sci_names <-
-    rename(taxonomy, Species = scientificName) %>%
-    select(Species)
+    dplyr::rename(taxonomy, Species = scientificName) %>%
+    dplyr::select(Species)
 
-  eml$dataset$coverage <- set_coverage(
+  eml$dataset$coverage <- EML::set_coverage(
     begin = package$temporal$start,
     end = package$temporal$end,
     west = bbox[1],

From fc5e60d0a65a434ea60bfea302f479c2eb0ec4cb Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 14:13:30 +0200
Subject: [PATCH 10/13] Transform data with function

---
 R/write_dwc.R | 55 +++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index 210267b8..2970c8d0 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -177,42 +177,41 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   }
 
   # Read data from package
-  # message("Reading data from `package`.")
-  # assertthat::assert_that(
-  #   c("reference-data") %in% frictionless::resources(package),
-  #   msg = "`package` must contain resource `reference-data`."
-  # )
-  # assertthat::assert_that(
-  #   c("gps") %in% frictionless::resources(package),
-  #   msg = "`package` must contain resource `gps`."
-  # )
-  # ref <- frictionless::read_resource(package, "reference-data")
-  # gps <- frictionless::read_resource(package, "gps")
+  # Already read with read_camtrap_dp()
 
   # Create database
-  # message("Creating database and transforming to Darwin Core.")
-  # con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  # DBI::dbWriteTable(con, "reference_data", ref)
-  # DBI::dbWriteTable(con, "gps", gps)
+  message("Creating database and transforming to Darwin Core.")
+  con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
+  DBI::dbWriteTable(con, "deployments", orig_package$deployments)
+  DBI::dbWriteTable(con, "media", orig_package$media)
+  DBI::dbWriteTable(con, "observations", orig_package$observations)
 
   # Query database
-  # dwc_occurrence_sql <- glue::glue_sql(
-  #   readr::read_file(
-  #     system.file("sql/movebank_dwc_occurrence.sql", package = "movepub")
-  #   ),
-  #   .con = con
-  # )
-  # dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql)
-  # DBI::dbDisconnect(con)
+  dwc_occurrence_sql <- glue::glue_sql(
+    readr::read_file(
+      system.file("sql/dwc_occurrence.sql", package = "camtraptor")
+    ),
+    .con = con
+  )
+  dwc_multimedia_sql <- glue::glue_sql(
+    readr::read_file(
+      system.file("sql/dwc_multimedia.sql", package = "camtraptor")
+    ),
+    .con = con
+  )
+  dwc_occurrence <- DBI::dbGetQuery(con, dwc_occurrence_sql)
+  dwc_multimedia <- DBI::dbGetQuery(con, dwc_multimedia_sql)
+  DBI::dbDisconnect(con)
 
   # Write files
   if (!dir.exists(directory)) {
     dir.create(directory, recursive = TRUE)
   }
   EML::write_eml(eml, file.path(directory, "eml.xml"))
-  # readr::write_csv(
-  #   dwc_occurrence,
-  #   file.path(directory, "dwc_occurrence.csv"),
-  #   na = ""
-  # )
+  readr::write_csv(
+    dwc_occurrence, file.path(directory, "dwc_occurrence.csv"), na = ""
+  )
+  readr::write_csv(
+    dwc_multimedia, file.path(directory, "dwc_multimedia.csv"), na = ""
+  )
 }

From e0f2b0e3523378b26bfc104bce77ac6e66ac2a88 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 14:13:49 +0200
Subject: [PATCH 11/13] Circumvent #116

---
 R/write_dwc.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index 2970c8d0..beeced0a 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -182,9 +182,9 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   # Create database
   message("Creating database and transforming to Darwin Core.")
   con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
-  DBI::dbWriteTable(con, "deployments", orig_package$deployments)
-  DBI::dbWriteTable(con, "media", orig_package$media)
-  DBI::dbWriteTable(con, "observations", orig_package$observations)
+  DBI::dbWriteTable(con, "deployments", dplyr::tibble(orig_package$deployments))
+  DBI::dbWriteTable(con, "media", dplyr::tibble(orig_package$media))
+  DBI::dbWriteTable(con, "observations", dplyr::tibble(orig_package$observations))
 
   # Query database
   dwc_occurrence_sql <- glue::glue_sql(

From 9ed4b8a7b400d39c120aa455bc5dc9fa1f290275 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 16:59:08 +0200
Subject: [PATCH 12/13] Rework mapping (still 0.1.7 format)

---
 R/write_dwc.R               |   6 +
 inst/sql/dwc_multimedia.sql |  69 ++++-----
 inst/sql/dwc_occurrence.sql | 271 ++++++++++++------------------------
 3 files changed, 123 insertions(+), 223 deletions(-)

diff --git a/R/write_dwc.R b/R/write_dwc.R
index beeced0a..ef843d38 100644
--- a/R/write_dwc.R
+++ b/R/write_dwc.R
@@ -74,6 +74,9 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   message("Creating EML metadata.")
   eml <- movepub::datacite_to_eml(doi)
 
+  # Set platform
+  platform <- package$platform$title # Use in DwC
+
   # Update title
   title <- paste(eml$dataset$title, "[animal observations]") # Used in DwC
   eml$dataset$title <- title
@@ -84,6 +87,9 @@ write_dwc <- function(package, directory = ".", doi = package$id,
   eml$dataset$intellectualRights <- NULL # Remove original license elements that make EML invalid
   eml$dataset$intellectualRights$para <- license_code
 
+  # Set media license
+  media_license_url <- purrr::keep(package$licenses, ~ .$scope == "media")[[1]]$path
+
   # Add extra paragraph to description
   first_author <- eml$dataset$creator[[1]]$individualName$surName
   pub_year <- substr(eml$dataset$pubDate, 1, 4)
diff --git a/inst/sql/dwc_multimedia.sql b/inst/sql/dwc_multimedia.sql
index 8bb443a3..b0e42fac 100644
--- a/inst/sql/dwc_multimedia.sql
+++ b/inst/sql/dwc_multimedia.sql
@@ -1,24 +1,19 @@
 /*
-Created by Peter Desmet (INBO)
-Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp
-Mapping to Audubon Media Description: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml
-Y = included in DwC, N = not included in DwC
-
-CAMTRAP DP MEDIA
-
-mediaID                         Y: as link to observation
-deploymentID                    N: included at observation level
-sequenceID                      Y: as link to observation
-captureMethod                   ?
-timestamp                       Y
-filePath                        Y
-fileName                        Y: to sort data
-fileMediatype                   Y
-exifData                        N
-favourite                       N
-comments                        N
-_id                             N
+Schema: https://rs.gbif.org/extension/ac/audubon_2020_10_06.xml
+Camtrap DP terms and whether they are included in DwC (Y) or not (N):
 
+media.mediaID                           Y: as link to observation
+media.deploymentID                      N: included at observation level
+media.sequenceID                        Y: as link to observation
+media.captureMethod                     Y
+media.timestamp                         Y
+media.filePath                          Y
+media.fileName                          Y: to sort data
+media.fileMediatype                     Y
+media.exifData                          N
+media.favourite                         N
+media.comments                          Y
+media._id                               N
 */
 
 -- Observations can be based on sequences (sequenceID) or individual files (mediaID)
@@ -38,33 +33,21 @@ WITH observations_media AS (
 )
 
 SELECT
--- occurrenceID
-  obs_med.observationID AS occurrenceID,
--- creator
--- providerLiteral
--- provider
--- rights
-  {metadata$mediaLicense} AS rights,
--- owner
--- identifier
-  obs_med.mediaID AS identifier,
--- type
+  obs_med.observationID                 AS occurrenceID,
+-- provider: can be org managing the platform, but that info is not available
+  {media_license_url}                   AS rights,
+  obs_med.mediaID                       AS identifier,
   CASE
     WHEN obs_med.fileMediatype LIKE '%video%' THEN 'MovingImage'
     ELSE 'StillImage'
-  END AS type,
--- providerManagedID
-  obs_med._id AS providerManagedID,
--- captureDevice
---  dep.cameraModel AS captureDevice,
--- resourceCreationTechnique
-  obs_med.captureMethod AS resourceCreationTechnique,
--- accessURI
-  obs_med.filePath AS accessURI,
--- format
-  obs_med.fileMediatype AS format,
--- CreateDate
-  STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS createDate
+  END                                   AS type,
+  obs_med._id                           AS providerManagedID,
+  obs_med.comments                      AS comments,
+  dep.cameraModel                       AS captureDevice,
+  obs_med.captureMethod                 AS resourceCreationTechnique,
+  obs_med.filePath                      AS accessURI,
+  obs_med.fileMediatype                 AS format,
+  STRFTIME('%Y-%m-%dT%H:%M:%SZ', datetime(obs_med.timestamp, 'unixepoch')) AS CreateDate
 
 FROM
   observations_media AS obs_med
diff --git a/inst/sql/dwc_occurrence.sql b/inst/sql/dwc_occurrence.sql
index ab27ca31..aabfcba3 100644
--- a/inst/sql/dwc_occurrence.sql
+++ b/inst/sql/dwc_occurrence.sql
@@ -1,203 +1,114 @@
 /*
-Created by Peter Desmet (INBO)
-Mapping from Camtrap DP: https://tdwg.github.io/camtrap-dp
-Mapping to Darwin Core Occurrence: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml
-Y = included in DwC, N = not included in DwC
-
-CAMTRAP DP DEPLOYMENTS
-
-deploymentID                    Y
-locationID                      Y
-locationName                    Y
-longitude                       Y
-latitude                        Y
-coordinateUncertainty           Y
-start                           Y
-end                             Y
-setupBy                         N
-cameraID                        N
-cameraModel                     N
-cameraInterval                  N
-cameraHeight                    N
-cameraTilt                      N
-cameraHeading                   N
-timestampIssues                 N
-baitUse                         Y
-session                         N
-array                           N
-featureType                     Y
-habitat                         Y
-tags                            Y
-comments                        Y
-_id                             N
-
-CAMTRAP DP OBSERVATIONS
-
-observationID                   Y
-deploymentID                    Y
-sequenceID                      Y
-mediaID                         N: see dwc_multimedia
-timestamp                       Y
-observationType                 Y: as filter
-cameraSetup                     N
-taxonID                         Y
-scientificName                  Y
-count                           Y
-countNew                        N
-lifeStage                       Y
-sex                             Y
-behaviour                       Y
-individualID                    Y
-classificationMethod            Y
-classifiedBy                    Y
-classificationTimestamp         Y
-classificationConfidence        Y
-comments                        Y
-_id                             N
-
+Schema: https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml
+Camtrap DP terms and whether they are included in DwC (Y) or not (N):
+
+deployments.deploymentID                Y
+deployments.locationID                  Y
+deployments.locationName                Y
+deployments.longitude                   Y
+deployments.latitude                    Y
+deployments.coordinateUncertainty       Y
+deployments.start                       Y
+deployments.end                         Y
+deployments.setupBy                     N
+deployments.cameraID                    N
+deployments.cameraModel                 Y: in dwc_multimedia
+deployments.cameraInterval              N
+deployments.cameraHeight                N
+deployments.cameraTilt                  N
+deployments.cameraHeading               N
+deployments.timestampIssues             N
+deployments.baitUse                     Y
+deployments.session                     N
+deployments.array                       N
+deployments.featureType                 Y
+deployments.habitat                     Y
+deployments.tags                        Y
+deployments.comments                    Y
+deployments._id                         N
+observations.observationID              Y
+observations.deploymentID               Y
+observations.sequenceID                 Y
+observations.mediaID                    N: in dwc_multimedia
+observations.timestamp                  Y
+observations.observationType            Y: as filter
+observations.cameraSetup                N
+observations.taxonID                    Y
+observations.scientificName             Y
+observations.count                      Y
+observations.countNew                   N
+observations.lifeStage                  Y
+observations.sex                        Y
+observations.behaviour                  Y
+observations.individualID               Y
+observations.classificationMethod       Y
+observations.classifiedBy               Y
+observations.classificationTimestamp    Y
+observations.classificationConfidence   Y
+observations.comments                   Y
+observations._id                        N
 */
 
 SELECT
 -- RECORD-LEVEL
--- type
-  -- Static value
-  'Event' AS type,
--- license
-  {metadata$dataLicense} AS license,
--- rightsHolder
-  {metadata$rightsHolder} AS rightsHolder,
--- bibliographicCitation
-  -- How *record* should be cited, don't use dataset-wide metadata$bibliographicCitation
--- datasetID
-  {metadata$id} AS datasetID,
--- institutionCode
-  {metadata$organization} AS institutionCode,
--- collectionCode
-  {metadata$source} AS collectionCode,
--- datasetName
-  {metadata$projectTitle} AS datasetName,
--- basisOfRecord
-  -- Static value
-  'MachineObservation' AS basisOfRecord,
--- informationWithheld
--- dataGeneralizations
--- dynamicProperties
-
+  'Event'                               AS type,
+  {license_url}                         AS license,
+  {rights_holder}                       AS rightsHolder,
+-- bibliographicCitation: how *record* should be cited, so not package bibliographicCitation
+  {doi_url}                             AS datasetID,
+-- institutionCode: org managing the platform/collection, but that info is not available
+  {platform}                            AS collectionCode,
+  {title}                               AS datasetName,
+  'MachineObservation'                  AS basisOfRecord,
+  'see metadata'                        AS informationWithheld,
 -- OCCURRENCE
--- occurrenceID
-  obs.observationID AS occurrenceID,
--- individualCount
-  obs.count AS individualCount,
--- sex
-  obs.sex AS sex,
--- lifeStage
-  obs.lifeStage AS lifeStage,
--- behavior
-  obs.behaviour AS behavior,
--- occurrenceStatus
-  -- Static value
-  'present' AS occurrenceStatus,
--- occurrenceRemarks
-  obs.comments AS occurrenceRemarks,
-
+  obs.observationID                     AS occurrenceID,
+  obs.count                             AS individualCount,
+  obs.sex                               AS sex,
+  obs.lifeStage                         AS lifeStage,
+  obs.behaviour                         AS behavior,
+  'present'                             AS occurrenceStatus,
+  obs.comments                          AS occurrenceRemarks,
 -- ORGANISM
--- organismID
-  obs.individualID AS organismID,
-
--- MATERIALSAMPLE
-  -- Not applicable
-
+  obs.individualID                      AS organismID,
 -- EVENT
--- eventID
-  obs.sequenceID AS eventID,
--- parentEventID
-  obs.deploymentID AS parentEventID,
--- eventDate
-  -- ISO-8601 in UTC
+  obs.sequenceID                        AS eventID,
+  obs.deploymentID                      AS parentEventID,
   strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.timestamp, 'unixepoch')) AS eventDate,
--- eventTime
-  -- Included in eventDate
--- habitat
-  dep.habitat AS habitat,
--- samplingProtocol
+  dep.habitat                           AS habitat,
   'camera trap' ||
   CASE
     WHEN dep.baitUse IS 'none' THEN ' without bait'
     WHEN dep.baitUse IS NOT NULL THEN ' with bait'
     ELSE ''
-  END AS samplingProtocol,
--- samplingEffort
-  -- Duration of deployment
+  END                                   AS samplingProtocol,
   strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.start, 'unixepoch')) ||
   '/' ||
-  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort,
--- eventRemarks
-  CASE
-    WHEN dep.comments IS NOT NULL THEN dep.comments
-    ELSE ''
-  END ||
-  CASE
-    WHEN dep.comments IS NOT NULL AND dep.tags IS NOT NULL THEN ' | '
-    ELSE ''
-  END ||
-  CASE
-    WHEN dep.tags IS NOT NULL THEN 'tags: ' || dep.tags
-    ELSE ''
-  END AS eventRemarks,
-
+  strftime('%Y-%m-%dT%H:%M:%SZ', datetime(dep.end, 'unixepoch')) AS samplingEffort, -- Duration of deployment
+  COALESCE(
+    dep.comments || ' | tags: ' || dep.tags,
+    'tags: ' || dep.tags,
+    dep.comments
+  )                                     AS eventRemarks,
 -- LOCATION
--- locationID
-  dep.locationID AS locationID,
--- countryCode
-  -- Single value might not apply to whole dataset, assumes coordinates are provided.
--- locality
-  dep.locationName AS locality,
--- locationRemarks
-  dep.featureType AS locationRemarks,
--- decimalLatitude
-  dep.latitude AS decimalLatitude,
--- decimalLongitude
-  dep.longitude AS decimalLongitude,
--- geodeticDatum
-  -- Static value
-  'WGS84' AS geodeticDatum,
--- coordinateUncertaintyInMeters
-  dep.coordinateUncertainty AS coordinateUncertaintyInMeters,
-
--- GEOLOGICAL CONTEXT
-  -- Not applicable
-
+  dep.locationID                        AS locationID,
+  dep.locationName                      AS locality,
+  dep.featureType                       AS locationRemarks,
+  dep.latitude                          AS decimalLatitude,
+  dep.longitude                         AS decimalLongitude,
+  'WGS84'                               AS geodeticDatum,
+  dep.coordinateUncertainty             AS coordinateUncertaintyInMeters,
 -- IDENTIFICATION
--- identifiedBy
-  obs.classifiedBy AS identifiedBy,
--- identifiedByID
--- dateIdentified
-  -- ISO-8601 in UTC
+  obs.classifiedBy                      AS identifiedBy,
   strftime('%Y-%m-%dT%H:%M:%SZ', datetime(obs.classificationTimestamp, 'unixepoch')) AS dateIdentified,
--- identificationRemarks
-  CASE
-    WHEN obs.classificationMethod IS NOT NULL THEN 'classificationMethod: ' || obs.classificationMethod
-    ELSE ''
-  END ||
-  CASE
-    WHEN obs.classificationMethod IS NOT NULL AND obs.classificationConfidence IS NOT NULL THEN ' | '
-    ELSE ''
-  END ||
-  CASE
-    WHEN obs.classificationConfidence IS NOT NULL THEN 'classificationConfidence: ' || obs.classificationConfidence
-    ELSE ''
-  END AS identificationRemarks,
-
+  COALESCE(
+    'classified by ' || obs.classificationMethod || ' with ' || obs.classificationConfidence || ' confidence',
+    'classified by ' || obs.classificationMethod
+  )                                     AS identificationRemarks,
 -- TAXON
--- taxonID
-  obs.taxonID AS taxonID,
--- scientificName
-  obs.scientificName AS scientificName,
--- kingdom
-  -- Static value: records are filtered on animals in WHERE clause
-  'Animalia' AS kingdom
--- taxonRank
--- vernacularName
+  obs.taxonID                           AS taxonID,
+  obs.scientificName                    AS scientificName,
+  'Animalia'                            AS kingdom
 
 FROM
   observations AS obs

From ed85cd5068d527f2f4c47869c60686f9396ebb89 Mon Sep 17 00:00:00 2001
From: peterdesmet <peter.desmet@inbo.be>
Date: Fri, 3 Jun 2022 17:01:06 +0200
Subject: [PATCH 13/13] devtools::document()

---
 NAMESPACE               |  1 +
 R/read_camtrap_dp.R     |  4 +-
 man/filter_predicate.Rd | 24 +++++++----
 man/get_species.Rd      |  1 -
 man/read_camtrap_dp.Rd  |  4 +-
 man/write_dwc.Rd        | 90 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 111 insertions(+), 13 deletions(-)
 create mode 100644 man/write_dwc.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 5ba538c2..59158582 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -29,6 +29,7 @@ export(pred_notna)
 export(pred_or)
 export(read_camtrap_dp)
 export(transform_effort_to_common_units)
+export(write_dwc)
 importFrom(assertthat,assert_that)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,.data)
diff --git a/R/read_camtrap_dp.R b/R/read_camtrap_dp.R
index e9b7100c..ca1159be 100644
--- a/R/read_camtrap_dp.R
+++ b/R/read_camtrap_dp.R
@@ -1,7 +1,7 @@
-#' Read camtrap-dp formatted data
+#' Read Camtrap DP formatted data
 #'
 #' This function reads camera trap data formatted following the [Camera Trap
-#' Data Package (Camtrap DP)](https://github.com/tdwg/camtrap-dp) format. The
+#' Data Package (Camtrap DP)](https://tdwg.github.io/camtrap-dpdp) format. The
 #' function is built upon the functions \link[frictionless]{read_package} and
 #' \link[frictionless]{read_resource}. This means a.o. that all datetime
 #' information included in the camera trap data package is automatically
diff --git a/man/filter_predicate.Rd b/man/filter_predicate.Rd
index bee6d4ff..8cd12bdf 100644
--- a/man/filter_predicate.Rd
+++ b/man/filter_predicate.Rd
@@ -117,7 +117,9 @@ Internally, the input to \verb{pred*} functions turn into a character string,
 which forms the body of a filter expression.
 For example:
 
-\code{pred("tags", "boven de stroom")} gives:\preformatted{$arg
+\code{pred("tags", "boven de stroom")} gives:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{$arg
 [1] "tags"
 
 $value
@@ -129,16 +131,22 @@ $type
 $expr
 (tags == "boven de stroom")
 
-}
+}\if{html}{\out{</div>}}
 
-\code{pred_gt("latitude", 51.27)} gives, (only \code{expr} slot shown):\preformatted{(latitude > 51.27)
-}
+\code{pred_gt("latitude", 51.27)} gives, (only \code{expr} slot shown):
 
-\code{pred_or()} gives:\preformatted{((tags == "boven de stroom") | (latitude > 51.28))
-}
+\if{html}{\out{<div class="sourceCode">}}\preformatted{(latitude > 51.27)
+}\if{html}{\out{</div>}}
 
-\code{pred_or()} gives:\preformatted{((tags == "boven de stroom") & (latitude > 51.28))
-}
+\code{pred_or()} gives:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{((tags == "boven de stroom") | (latitude > 51.28))
+}\if{html}{\out{</div>}}
+
+\code{pred_or()} gives:
+
+\if{html}{\out{<div class="sourceCode">}}\preformatted{((tags == "boven de stroom") & (latitude > 51.28))
+}\if{html}{\out{</div>}}
 }
 
 \section{Keys}{
diff --git a/man/get_species.Rd b/man/get_species.Rd
index 1660d0e6..8e99a165 100644
--- a/man/get_species.Rd
+++ b/man/get_species.Rd
@@ -19,5 +19,4 @@ Function to get all identified species
 }
 \examples{
 get_species(mica)
-
 }
diff --git a/man/read_camtrap_dp.Rd b/man/read_camtrap_dp.Rd
index 53224695..96bdecaf 100644
--- a/man/read_camtrap_dp.Rd
+++ b/man/read_camtrap_dp.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/read_camtrap_dp.R
 \name{read_camtrap_dp}
 \alias{read_camtrap_dp}
-\title{Read camtrap-dp formatted data}
+\title{Read Camtrap DP formatted data}
 \usage{
 read_camtrap_dp(file = NULL, media = TRUE, path = lifecycle::deprecated())
 }
@@ -26,7 +26,7 @@ A list containing three (tibble) data.frames:
 and a list with metadata: \code{datapackage}.
 }
 \description{
-This function reads camera trap data formatted following the \href{https://github.com/tdwg/camtrap-dp}{Camera Trap Data Package (Camtrap DP)} format. The
+This function reads camera trap data formatted following the \href{https://tdwg.github.io/camtrap-dpdp}{Camera Trap Data Package (Camtrap DP)} format. The
 function is built upon the functions \link[frictionless]{read_package} and
 \link[frictionless]{read_resource}. This means a.o. that all datetime
 information included in the camera trap data package is automatically
diff --git a/man/write_dwc.Rd b/man/write_dwc.Rd
new file mode 100644
index 00000000..a5819c6e
--- /dev/null
+++ b/man/write_dwc.Rd
@@ -0,0 +1,90 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_dwc.R
+\name{write_dwc}
+\alias{write_dwc}
+\title{Transform camera trap data to Darwin Core}
+\usage{
+write_dwc(
+  package,
+  directory = ".",
+  doi = package$id,
+  contact = NULL,
+  rights_holder = package$rightsHolder
+)
+}
+\arguments{
+\item{package}{A Camtrap DP, as read by \code{\link[=read_camtrap_dp]{read_camtrap_dp()}}.}
+
+\item{directory}{Path to local directory to write files to.}
+
+\item{doi}{DOI of the original dataset, used to get metadata.}
+
+\item{contact}{Person to be set as resource contact and metadata provider.
+To be provided as a \code{person()}.}
+
+\item{rights_holder}{Acronym of the organization owning or managing the
+rights over the data.}
+}
+\value{
+CSV (data) and EML (metadata) files written to disk.
+}
+\description{
+Transforms a published \href{https://github.com/tdwg/camtrap-dp}{Camera Trap Data Package (Camtrap DP)} to Darwin Core CSV and EML
+files that can be uploaded to a \href{https://www.gbif.org/ipt}{GBIF IPT} for
+publication.
+A \code{meta.xml} file is not created.
+}
+\section{Metadata}{
+
+
+Metadata are derived from the original dataset by looking up its \code{doi} in
+DataCite (\href{https://doi.org/10.5281/zenodo.5590881}{example}) and transforming
+these to EML.
+Uses \code{movepub::datacite_to_eml()} under the hood.
+The following properties are set:
+\itemize{
+\item \strong{title}: Original title + \verb{[animal observations]}.
+\item \strong{description}: Automatically created first paragraph describing this is
+a derived dataset, followed by the original dataset description.
+\item \strong{license}: License of the original dataset.
+\item \strong{creators}: Creators of the original dataset.
+\item \strong{contact}: \code{contact} or first creator of the original dataset.
+\item \strong{metadata provider}: \code{contact} or first creator of the original dataset.
+\item \strong{keywords}: Keywords of the original dataset.
+\item \strong{associated parties}: Organizations as defined in
+\code{package$organizations}.
+\item \strong{geographic coverage}: Bounding box as defined \code{package$spatial}.
+\item \strong{taxonomic coverage}: Species as defined in \code{package$taxonomic}.
+\item \strong{temporal coverage}: Date range as defined in \code{package$temporal}.
+\item \strong{project data}: Title, identifier, description, and sampling design
+information as defined in \code{package$project}.
+\item \strong{alternative identifier}: DOI of the original dataset. This way, no new
+DOI will be created when publishing to GBIF.
+\item \strong{external link}: URL of the project as defined in \code{package$project$path}.
+}
+
+To be set manually in the GBIF IPT: \strong{type}, \strong{subtype},
+\strong{update frequency}, and \strong{publishing organization}.
+
+Not set: sampling methods and citations.
+Not applicable: collection data.
+}
+
+\section{Data}{
+
+
+\code{package} is expected to contain the resources \code{deployments}, \code{media} and
+\code{observations}.
+Their CSV data are loaded in to a SQLite database,
+\href{https://github.com/inbo/camtraptor/tree/main/inst/sql}{transformed to Darwin Core using SQL}
+and written to disk as CSV file(s).
+
+Key features of the Darwin Core transformation:
+\itemize{
+\item TODO
+}
+}
+
+\examples{
+# TODO
+}