diff --git a/.gitignore b/.gitignore index 9fcc2cc..e5dc7ad 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ .DS_Store .Rproj.user inst/ignore/auth.R +inst/ignore/issn_title_collect.R .httr-oauth diff --git a/.travis.yml b/.travis.yml index ae37a68..1b9c52c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,11 @@ language: r sudo: false cache: packages +addons: + apt: + packages: + - libv8-dev + matrix: include: - os: linux diff --git a/DESCRIPTION b/DESCRIPTION index 360e38e..bfde92e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Title: Interface to the 'Orcid.org' API Description: Client for the 'Orcid.org' API (). Functions included for searching for people, searching by 'DOI', and searching by 'Orcid' 'ID'. -Version: 0.4.3.9110 +Version: 0.4.3.9113 Authors@R: c(person(given = "Scott", family = "Chamberlain", @@ -30,7 +30,9 @@ Imports: data.table Suggests: testthat, - knitr + knitr, + rcrossref, + handlr RoxygenNote: 6.1.1 VignetteBuilder: knitr X-schema.org-applicationCategory: Literature diff --git a/NAMESPACE b/NAMESPACE index 7f59b60..8ff0c67 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ export(orcid_activities) export(orcid_address) export(orcid_auth) export(orcid_bio) +export(orcid_citations) export(orcid_doi) export(orcid_educations) export(orcid_email) @@ -40,6 +41,7 @@ export(works) importFrom(crul,HttpClient) importFrom(fauxpas,find_error_class) importFrom(fauxpas,http) +importFrom(handlr,HandlrClient) importFrom(httr,add_headers) importFrom(httr,oauth2.0_token) importFrom(httr,oauth_app) diff --git a/R/orcid_citations.R b/R/orcid_citations.R new file mode 100644 index 0000000..b7b3e29 --- /dev/null +++ b/R/orcid_citations.R @@ -0,0 +1,169 @@ +#' Get citations +#' +#' @export +#' @param orcid (character) Orcid identifier(s) of the form +#' XXXX-XXXX-XXXX-XXXX. required. +#' @param put_code (character/integer) one or more put codes. up to +#' 50. optional +#' @param cr_format Used in Crossref queries only. Name of the format. One of +#' "rdf-xml", "turtle", +#' "citeproc-json", "citeproc-json-ish", "text", "ris", "bibtex" (default), +#' "crossref-xml", "datacite-xml","bibentry", or "crossref-tdm". The format +#' "citeproc-json-ish" is a format that is not quite proper citeproc-json. +#' passed to `rcrossref::cr_cn`. The special "citeproc2bibtex" value asks +#' for citeproc-json from Crossref, then converts it into bibtex format +#' using [handlr::HandlrClient] +#' @param cr_style Used in Crossref queries only. A CSL style (for text +#' format only). See ‘get_styles()’ for options. Default: apa. +#' passed to `rcrossref::cr_cn` +#' @param cr_locale Used in Crossref queries only. Language locale. +#' See [Sys.getlocale], passed to `rcrossref::cr_cn` +#' @param ... Curl options passed on to [crul::HttpClient] +#' @template deets +#' @details This function is focused on getting citations only. +#' You can get all citations for an ORCID, or for certain works +#' using a PUT code, or for many PUT codes. +#' +#' We attempt to get citations via Crossref using \pkg{rcrossref} +#' whenever possible as they are the most flexible and don't have as +#' many mistakes in the text. If there is no DOI, we fetch the +#' citation from ORCID. +#' +#' Right now we get JSON citations back. We'd like to support bibtex +#' format. DOI.org supports this but not ORCID. +#' +#' @return data.frame, with the columns: +#' +#' - put: ORCID PUT code, identifying the work identifier in ORCID's records +#' - id: the external identifier +#' - id_type: the type of external identifier +#' - format: the citation format retrieved +#' - citation: the citation as JSON +#' +#' @examples \dontrun{ +#' (res <- orcid_citations(orcid = "0000-0002-9341-7985")) +#' (res2 <- orcid_citations(orcid = "0000-0002-1642-628X")) +#' (res2 <- orcid_citations(orcid = c("0000-0002-9341-7985", "0000-0002-1642-628X"))) +#' +#' # get individual works +#' ## a single put code +#' (a <- orcid_citations(orcid = "0000-0002-9341-7985", put_code = 5011717)) +#' ## many put codes +#' (b <- orcid_citations(orcid = "0000-0002-9341-7985", +#' put_code = c(5011717, 15536016))) +#' +#' # request other formats, Crossref only +#' orcid_citations(orcid = "0000-0002-9341-7985", cr_format = "turtle") +#' +#' # parse citation data if you wish +#' # for parsing bibtex can use bibtex package or others +#' (res <- orcid_citations(orcid = "0000-0002-9341-7985")) +#' lapply(res[res$format == "csl-json", "citation"][[1]], jsonlite::fromJSON) +#' +#' # lots of citations +#' orcid_citations(orcid = "0000-0001-8642-6325") +#' +#' # example with no external identifier, returns NA's +#' orcid_citations(orcid = "0000-0001-8642-6325", 26222265) +#' } +orcid_citations <- function(orcid, put_code = NULL, cr_format = "bibtex", + cr_style = "apa", cr_locale = "en-US", ...) { + + if (!is.null(put_code)) { + if (length(orcid) > 1) { + stop("if 'put_code' is given, 'orcid' must be length 1") + } + } + + tmp <- orcid_works(orcid, put_code) + + dat <- if (!is.null(put_code)) { + list(list(tmp[[1]]$works)) + } else { + lapply(tmp, function(w) split(w$works, w$works$`put-code`)) + } + + if (length(orcid) > 1) { + Map(function(a, b) each_orcid(a, b, put_code, cr_format, cr_style, cr_locale), + dat, orcid, ...) + } else { + # each_orcid(dat[[1]], orcid, put_code, cr_format, cr_style, cr_locale, ...) + do_all(dat[[1]], orcid, put_code, cr_format, cr_style, cr_locale) + } +} + +each_orcid <- function(m, orcid, put_code, cr_format, cr_style, cr_locale, ...) { + cites <- plyr::llply(m, function(z) { + # fix for whenever > 1 put code to make column names more useable + if (all(grepl("work", names(z)))) { + names(z) <- gsub("^work\\.", "", names(z)) + } + pc <- z$`put-code` + if (!is.null(put_code)) { + if (length(put_code) == 1) { + df <- z$`external-ids`$`external-id`[[1]] + process_cites(df, pc, orcid, cr_format, cr_style, cr_locale, ...) + } else { + df <- z$`external-ids` + Map(process_cites, df, pc, orcid = orcid, cr_format = cr_format, + cr_style = cr_style, cr_locale = cr_locale, ...) + } + } else { + df <- z$`external-ids.external-id`[[1]] + process_cites(df, pc, orcid, cr_format, cr_style, cr_locale, ...) + } + }, .inform = TRUE) + # unnest if no names at top level + if (is.null(names(cites[[1]])) && length(cites[[1]]) > 1) cites <- unlist(cites, FALSE) + # combine + as_dt(cites) +} + +process_cites <- function(df, pc, orcid, cr_format, cr_style, cr_locale, ...) { + if (length(df) == 0) { + return(list(put = pc, id = NA_character_, id_type = NA_character_, + format = NA_character_, citation = "")) + } + if ("doi" %in% df$`external-id-type`) { + id <- df[df$`external-id-type` %in% "doi", "external-id-value"] + type <- "doi" + if (cr_format == "citeproc2bibtex") { + chkpkg('handlr') + cr_format = "citeproc-json" + fmat <- "bibtex" + ct <- cite_doi(id, cr_format, cr_style, cr_locale, ...) %||% "" + cli <- handlr::HandlrClient$new(x = ct) + cli$read("citeproc") + ct <- paste0(cli$write("bibtex"), collapse = "\n") + } else { + fmat <- cr_format + ct <- cite_doi(id, cr_format, cr_style, cr_locale, ...) %||% "" + } + } else { + id <- df[df$`external-id-type` %in% "eid", "external-id-value"] + type <- "eid" + fmat <- 'csl-json' + ct <- cite_put(orcid, pc, ...) %||% "" + } + list(put = pc, id = id, id_type = type, format = fmat, citation = ct) +} + +chkpkg <- function(x) { + if (!requireNamespace(x, quietly = TRUE)) { + stop("Please install ", x, call. = FALSE) + } else { + invisible(TRUE) + } +} + +cite_doi <- function(x, cr_format = "bibtex", cr_style = "apa", cr_locale = "en-US", ...) { + chkpkg('rcrossref') + rcrossref::cr_cn(x, format = cr_format, style = cr_style, locale = cr_locale, raw = TRUE, ...) +} + +cite_put <- function(orcid, pc, ...) { + orcid_prof_helper(orcid, file.path("work", pc), + ctype = "application/vnd.citationstyles.csl+json", + # ctype = "application/x-bibtex", + parse = FALSE, ...) +} diff --git a/R/rorcid-package.R b/R/rorcid-package.R index 48e48ad..d22eae7 100644 --- a/R/rorcid-package.R +++ b/R/rorcid-package.R @@ -50,6 +50,7 @@ #' @importFrom crul HttpClient #' @importFrom fauxpas http find_error_class #' @importFrom jsonlite fromJSON +#' @importFrom handlr HandlrClient #' @name rorcid-package #' @docType package #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com} @@ -66,6 +67,13 @@ NULL #' Lookup vector for journal titles by ISSN #' +#' named vector of journal titles. the values are journal titles and +#' the names are ISSN's. +#' +#' length: 57,968 +#' +#' data collected on 2018-06-13 from Crossref +#' #' @name issn_title #' @docType data #' @keywords data diff --git a/R/zzz.R b/R/zzz.R index 0050f51..f871556 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -102,10 +102,10 @@ pluck <- function(x, name, type) { pop <- function(x, name) x[ !names(x) %in% name ] -orcid_prof_helper <- function(x, path, ctype = ojson, ...) { +orcid_prof_helper <- function(x, path, ctype = ojson, parse = TRUE, ...) { url2 <- file.path(orcid_base(), x, path) out <- orc_GET(url2, ctype = ctype, ...) - switch_parser(ctype, out) + if (parse) switch_parser(ctype, out) else out } switch_parser <- function(ctype, x) { diff --git a/man/issn_title.Rd b/man/issn_title.Rd index 5aab1b1..ba93e28 100644 --- a/man/issn_title.Rd +++ b/man/issn_title.Rd @@ -5,6 +5,12 @@ \alias{issn_title} \title{Lookup vector for journal titles by ISSN} \description{ -Lookup vector for journal titles by ISSN +named vector of journal titles. the values are journal titles and +the names are ISSN's. +} +\details{ +length: 57,968 + +data collected on 2018-06-13 from Crossref } \keyword{data} diff --git a/man/orcid_citations.Rd b/man/orcid_citations.Rd new file mode 100644 index 0000000..9980167 --- /dev/null +++ b/man/orcid_citations.Rd @@ -0,0 +1,94 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/orcid_citations.R +\name{orcid_citations} +\alias{orcid_citations} +\title{Get citations} +\usage{ +orcid_citations(orcid, put_code = NULL, cr_format = "bibtex", + cr_style = "apa", cr_locale = "en-US", ...) +} +\arguments{ +\item{orcid}{(character) Orcid identifier(s) of the form +XXXX-XXXX-XXXX-XXXX. required.} + +\item{put_code}{(character/integer) one or more put codes. up to +50. optional} + +\item{cr_format}{Used in Crossref queries only. Name of the format. One of +"rdf-xml", "turtle", +"citeproc-json", "citeproc-json-ish", "text", "ris", "bibtex" (default), +"crossref-xml", "datacite-xml","bibentry", or "crossref-tdm". The format +"citeproc-json-ish" is a format that is not quite proper citeproc-json. +passed to \code{rcrossref::cr_cn}. The special "citeproc2bibtex" value asks +for citeproc-json from Crossref, then converts it into bibtex format +using \link[handlr:HandlrClient]{handlr::HandlrClient}} + +\item{cr_style}{Used in Crossref queries only. A CSL style (for text +format only). See ‘get_styles()’ for options. Default: apa. +passed to \code{rcrossref::cr_cn}} + +\item{cr_locale}{Used in Crossref queries only. Language locale. +See \link{Sys.getlocale}, passed to \code{rcrossref::cr_cn}} + +\item{...}{Curl options passed on to \link[crul:HttpClient]{crul::HttpClient}} +} +\value{ +A list of results for each Orcid ID passed in, with each element +named by the Orcid ID + +data.frame, with the columns: +\itemize{ +\item put: ORCID PUT code, identifying the work identifier in ORCID's records +\item id: the external identifier +\item id_type: the type of external identifier +\item format: the citation format retrieved +\item citation: the citation as JSON +} +} +\description{ +Get citations +} +\details{ +This function is vectorized, so you can pass in many ORCID's, and +there's an element returned for each ORCID you put in. + +This function is focused on getting citations only. +You can get all citations for an ORCID, or for certain works +using a PUT code, or for many PUT codes. + +We attempt to get citations via Crossref using \pkg{rcrossref} +whenever possible as they are the most flexible and don't have as +many mistakes in the text. If there is no DOI, we fetch the +citation from ORCID. + +Right now we get JSON citations back. We'd like to support bibtex +format. DOI.org supports this but not ORCID. +} +\examples{ +\dontrun{ +(res <- orcid_citations(orcid = "0000-0002-9341-7985")) +(res2 <- orcid_citations(orcid = "0000-0002-1642-628X")) +(res2 <- orcid_citations(orcid = c("0000-0002-9341-7985", "0000-0002-1642-628X"))) + +# get individual works +## a single put code +(a <- orcid_citations(orcid = "0000-0002-9341-7985", put_code = 5011717)) +## many put codes +(b <- orcid_citations(orcid = "0000-0002-9341-7985", + put_code = c(5011717, 15536016))) + +# request other formats, Crossref only +orcid_citations(orcid = "0000-0002-9341-7985", cr_format = "turtle") + +# parse citation data if you wish +# for parsing bibtex can use bibtex package or others +(res <- orcid_citations(orcid = "0000-0002-9341-7985")) +lapply(res[res$format == "csl-json", "citation"][[1]], jsonlite::fromJSON) + +# lots of citations +orcid_citations(orcid = "0000-0001-8642-6325") + +# example with no external identifier, returns NA's +orcid_citations(orcid = "0000-0001-8642-6325", 26222265) +} +}