Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dataset interface improvements #693

Merged
merged 12 commits into from
Jan 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,25 @@ export(as.download)
export(blanktheme)
export(check_wkt)
export(count_facet)
export(dataset)
export(dataset_comment)
export(dataset_constituents)
export(dataset_contact)
export(dataset_doi)
export(dataset_duplicate)
export(dataset_endpoint)
export(dataset_export)
export(dataset_get)
export(dataset_gridded)
export(dataset_identifier)
export(dataset_machinetag)
export(dataset_metrics)
export(dataset_networks)
export(dataset_noendpoint)
export(dataset_process)
export(dataset_search)
export(dataset_suggest)
export(dataset_tag)
export(datasets)
export(derived_dataset)
export(derived_dataset_prep)
Expand Down
115 changes: 115 additions & 0 deletions R/dataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#' Search for more obscure dataset metadata.
#'
#' @param country The 2-letter country code (as per ISO-3166-1) of the country
#' publishing the dataset.
#' @param type The primary type of the dataset.
#' Available values : OCCURRENCE, CHECKLIST, METADATA, SAMPLING_EVENT,
#' MATERIAL_ENTITY.
#' @param identifierType An identifier type for the identifier parameter.
#' Available values : URL, LSID, HANDLER, DOI, UUID, FTP, URI, UNKNOWN,
#' GBIF_PORTAL, GBIF_NODE, GBIF_PARTICIPANT, GRSCICOLL_ID, GRSCICOLL_URI,
#' IH_IRN, ROR, GRID, CITES, SYMBIOTA_UUID, WIKIDATA, NCBI_BIOCOLLECTION.
#' @param identifier An identifier of the type given by the identifierType
#' parameter.
#' @param machineTagNamespace Filters for entities with a machine tag in the
#' specified namespace.
#' @param machineTagName Filters for entities with a machine tag with the
#' specified name (use in combination with the machineTagNamespace parameter).
#' @param machineTagValue Filters for entities with a machine tag with the
#' specified value (use in combination with the machineTagNamespace and machineTagName parameters).
#' @param modified The modified date of the dataset. Accepts ranges and a ''
#' can be used as a wildcard, e.g.:modified=2023-04-01,
#' @param query Simple full text search parameter. The value for this parameter
#' can be a simple word or a phrase. Wildcards are not supported.
#' @param deleted Logical specifying whether to return only deleted datasets.
#' @param limit Controls the number of results in the page.
#' @param start Determines the start for the search results.
#' @param curlopts options passed on to [crul::HttpClient].
#'
#' @return A `list`.
#'
#' @details
#' This function allows you to search for some more obscure dataset metadata
#' that might not be possible with `dataset_search()`. For example, searching
#' through registry machinetags.
#'
#' @export
#'
#' @examples \dontrun{
#' dataset(limit=3)
#' dataset(country="US",limit=3)
#' dataset(type="CHECKLIST",limit=3)
#' dataset(identifierType = "URL",limit=3)
#' dataset(identifier = 168,limit=3)
#' dataset(machineTagNamespace = "metasync.gbif.org",limit=3)
#' dataset(machineTagName = "datasetTitle",limit=3)
#' dataset(machineTagValue = "Borkhart",limit=3)
#' dataset(modified = "2023-04-01", limit=3)
#' dataset(q = "dog", limit=3)
#' dataset(deleted=TRUE,limit=3)
#' }
dataset <- function(country = NULL,
type = NULL,
identifierType = NULL,
identifier = NULL,
machineTagNamespace = NULL,
machineTagName = NULL,
machineTagValue = NULL,
modified = NULL,
query = NULL,
deleted = FALSE,
limit = NULL,
start = NULL,
curlopts = list()) {

assert(country, "character")
assert(type, "character")
assert(identifierType, "character")
assert(machineTagNamespace, "character")
assert(machineTagName, "character")
assert(machineTagValue, "character")
assert(modified, "character")
assert(query, "character")

args <- as.list(
rgbif_compact(c(q=query,
limit=limit,
offset=start
)))

args <- as.list(
rgbif_compact(c(
args,
convmany(country),
convmany(type),
convmany(identifierType),
convmany(identifier),
convmany(machineTagNamespace),
convmany(machineTagName),
convmany(machineTagValue),
convmany(modified)
)))

if(deleted) {
url <- paste0(gbif_base(), '/dataset/deleted/')
} else {
url <- paste0(gbif_base(), '/dataset/')
}
tt <- gbif_GET(url, args, FALSE, curlopts)

meta <- tt[c('offset','limit','endOfRecords','count')]

if (length(tt$results) == 0) {
out <- NULL

Check warning on line 103 in R/dataset.R

View check run for this annotation

Codecov / codecov/patch

R/dataset.R#L103

Added line #L103 was not covered by tests
} else {
nest_if_needed <- function(x) ifelse(length(x) > 1, list(x), x)
out <- lapply(tt$results,function(x) tibble::as_tibble(lapply(x, nest_if_needed)))
out <- bind_rows(out)
}

list(meta = data.frame(meta), data = out)
}




27 changes: 27 additions & 0 deletions R/dataset_doi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#' Get a GBIF dataset from a doi
#'
#' @param doi the doi of the dataset you wish to lookup.
#' @param limit Controls the number of results in the page.
#' @param start Determines the offset for the search results.
#' @param curlopts options passed on to [crul::HttpClient].
#'
#' @details This function allows for dataset lookup using a doi. Be aware that
#' some doi have more than one dataset associated with them.
#'
#' @return A `list`.
#' @export
#'
#' @examples \dontrun{
#' dataset_doi('10.15468/igasai')
#' }
dataset_doi <- function(doi=NULL, limit = 20, start=NULL, curlopts = list()) {
assert(doi,"character")
is_doi <- grepl("^(10\\.\\d{4,9}/[-._;()/:A-Z0-9]+)$", doi, perl = TRUE,
ignore.case = TRUE)
if(!is_doi) warning("The doi you supplied might not be valid.")
url <- paste0(gbif_base(), '/dataset/doi/',doi)
args <- rgbif_compact(list(limit = as.integer(limit),
offset = start))
res <- gbif_GET(url, args, TRUE, curlopts)
structure(list(meta = get_meta(res), data = parse_results(res,NULL)))
}
68 changes: 68 additions & 0 deletions R/dataset_export.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' @name dataset_search
#' @export
dataset_export <- function(query = NULL,
type = NULL,
publishingCountry= NULL,
subtype = NULL,
license = NULL,
keyword = NULL,
publishingOrg = NULL,
hostingOrg = NULL,
endorsingNodeKey = NULL,
decade = NULL,
projectId = NULL,
hostingCountry = NULL,
networkKey = NULL,
doi = NULL
) {

assert(query,"character")
assert(type,"character")
assert(subtype,"character")
assert(license,"character")
assert(keyword,"character")
assert(publishingOrg,"character")
assert(hostingOrg,"character")
assert(endorsingNodeKey,"character")
assert(publishingCountry,"character")
assert(projectId,"character")
assert(hostingCountry,"character")
assert(networkKey,"character")
assert(doi,"character")

# args with single value
args <- rgbif_compact(list(
format = "TSV",
q = query
))

args <- rgbif_compact(c(
args,
convmany(type),
convmany(subtype),
convmany(license),
convmany(keyword),
convmany(publishingOrg),
convmany(hostingOrg),
convmany(endorsingNodeKey),
convmany(decade),
convmany(publishingCountry),
convmany(projectId),
convmany(hostingCountry),
convmany(networkKey),
convmany(doi)
))

url_query <- paste0(names(args),"=",args,collapse="&")
url <- paste0(gbif_base(),"/dataset/search/export?",url_query)
url <- gsub("\\[|\\]","",url)
url <- utils::URLencode(url)
temp_file <- tempfile()
utils::download.file(url,destfile=temp_file,quiet=TRUE)
out <- tibble::as_tibble(data.table::fread(temp_file, showProgress=FALSE))
colnames(out) <- to_camel(colnames(out))
out[] <- lapply(out, as.character)
out$occurrenceRecordsCount <- as.numeric(out$occurrenceRecordsCount)
out$nameUsagesCount <- as.numeric(out$nameUsagesCount)
out
}
52 changes: 52 additions & 0 deletions R/dataset_list_funs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#' List datasets that are deleted or have no endpoint.
#'
#'
#' @param limit Controls the number of results in the page.
#' @param start Determines the start for the search results.
#' @param curlopts options passed on to [crul::HttpClient].
#'
#' @return A `list`.
#'
#' @details
#' Get a list of deleted datasets or datasets with no endpoint. You get the full
#' and no parameters aside from `limit` and `start` are accepted.
#'
#'
#' @examples \dontrun{
#' dataset_noendpoint(limit=3)
#' }

#' @name dataset_list_funs
#' @export
dataset_duplicate <- function(limit=20,start=NULL,curlopts=list()) {
dataset_list_get_(endpoint="duplicate/",limit=limit,start=start,
curlopts=curlopts,meta=TRUE)

Check warning on line 23 in R/dataset_list_funs.R

View check run for this annotation

Codecov / codecov/patch

R/dataset_list_funs.R#L22-L23

Added lines #L22 - L23 were not covered by tests
}

#' @name dataset_list_funs
#' @export
dataset_noendpoint <- function(limit=20,start=NULL,curlopts=list()) {
dataset_list_get_(endpoint="withNoEndpoint/",limit=limit,start=start,
curlopts=curlopts,meta=TRUE)
}

dataset_list_get_ <- function(endpoint,limit=NULL,start=NULL,curlopts,meta) {
url <- paste0(gbif_base(),"/dataset/",endpoint)
if(!is.null(limit)) {
args <- rgbif_compact(c(limit=limit,offset=start))
tt <- gbif_GET(url, args, TRUE, curlopts)
} else {
tt <- gbif_GET(url, args = NULL, TRUE, curlopts)

Check warning on line 39 in R/dataset_list_funs.R

View check run for this annotation

Codecov / codecov/patch

R/dataset_list_funs.R#L39

Added line #L39 was not covered by tests
}
if(meta) {
meta <- tt[c('offset','limit','endOfRecords','count')]
if (length(tt$results) == 0) {
out <- NULL

Check warning on line 44 in R/dataset_list_funs.R

View check run for this annotation

Codecov / codecov/patch

R/dataset_list_funs.R#L44

Added line #L44 was not covered by tests
} else {
out <- tibble::as_tibble(tt$results)
}
list(meta = data.frame(meta), data = out)
} else {
tibble::as_tibble(tt)

Check warning on line 50 in R/dataset_list_funs.R

View check run for this annotation

Codecov / codecov/patch

R/dataset_list_funs.R#L50

Added line #L50 was not covered by tests
}
}
28 changes: 0 additions & 28 deletions R/dataset_metrics.r

This file was deleted.

Loading
Loading