diff --git a/DESCRIPTION b/DESCRIPTION index 5d5126bd..567f62c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -49,10 +49,12 @@ RoxygenNote: 7.1.1 Suggests: bib2df, curl, + digest, googledrive, jsonlite, knitr, mapview, + openssl, parallel, raster (>= 3.3-16), readxl, diff --git a/NAMESPACE b/NAMESPACE index d0561640..5a40dc15 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,13 @@ # Generated by roxygen2: do not edit by hand +export(checksum) export(convert_base4frac_to_dec) export(convert_dec_to_base4frac) export(download_zenodo) export(expand_types) export(fileman_folders) export(fileman_up) +export(md5sum) export(read_GRTSmh) export(read_GRTSmh_base4frac) export(read_GRTSmh_diffres) @@ -26,6 +28,8 @@ export(read_types) export(read_watercourse_100mseg) export(read_watersurfaces) export(read_watersurfaces_hab) +export(sha256sum) +export(xxh64sum) importFrom(assertthat,assert_that) importFrom(assertthat,is.flag) importFrom(assertthat,is.string) @@ -67,6 +71,7 @@ importFrom(git2rdata,read_vc) importFrom(magrittr,set_colnames) importFrom(plyr,mapvalues) importFrom(purrr,map) +importFrom(purrr,map_chr) importFrom(purrr,map_lgl) importFrom(rlang,.data) importFrom(rlang,na_lgl) diff --git a/R/filemanagement.R b/R/filemanagement.R index c7b3cbbe..9e6e1b0f 100644 --- a/R/filemanagement.R +++ b/R/filemanagement.R @@ -373,3 +373,119 @@ fileman_up <- function(name, +#' Calculate file checksums +#' +#' The functions calculate the checksum (digest; hash value) of +#' one or multiple files. +#' They can be used to verify file integrity. +#' +#' A few cryptographic and non-cryptographic hash functions are implemented, +#' either from the OpenSSL library (through +#' \href{https://CRAN.R-project.org/package=openssl}{\code{openssl}}) +#' or as embedded in the +#' \href{https://CRAN.R-project.org/package=digest}{\code{digest}} +#' package. +#' +#' Functions \code{md5sum()} etc. are simple shortcuts to \code{checksum()} +#' with the appropriate hash function preset. +#' Their names were chosen to match those of xxHash and GNU coreutils. +#' +#' The cryptographic algorithms use the OpenSSL implementation and +#' stream-hash the binary +#' contents of the connections to the respective files. +#' They turn the hash-format for binary streams by the \code{openssl} package +#' into a regular hash string. +#' Note that \code{n2khab} will mask +#' \code{\link[tools:md5sum]{tools::md5sum()}}, +#' which is a standalone implementation. +#' +#' @param files Character vector of file path(s). +#' File path(s) can be absolute or relative. +#' @param hash_fun String that defines the hash function. +#' See \emph{Usage} for allowed values; defaults to the first. +#' +#' @return +#' Named character vector with the same length as \code{files} +#' and with the file names as names. +#' +#' @family functions regarding file management for N2KHAB projects +#' +#' @examples +#' # creating two different temporary files: +#' file1 <- tempfile() +#' file2 <- tempfile() +#' files <- c(file1, file2) +#' file.create(files) +#' con <- file(file2) +#' writeLines("some text", con) +#' close(con) +#' +#' # computing alternative checksums: +#' checksum(files) +#' xxh64sum(files) +#' md5sum(files) +#' sha256sum(files) +#' +#' \dontrun{ +#' # This will error: +#' files <- c(file1, file2, tempfile(), tempfile()) +#' checksum(files) +#' } +#' +#' @importFrom purrr +#' map_chr +#' @importFrom stringr +#' str_detect +#' @export +checksum <- function(files, + hash_fun = c("xxh64", "md5", "sha256")) { + + assert_that_allfiles_exist(files) + hash_fun <- match.arg(hash_fun) + + if (str_detect(hash_fun, "^xxh")) { + require_pkgs("digest") + checksums <- map_chr(files, + ~digest::digest(., + algo = "xxhash64", + file = TRUE)) + } else { + require_pkgs("openssl") + fun <- eval(str2lang(paste0("openssl::", hash_fun))) + checksums <- map_chr(files, ~paste(fun(file(.)))) + } + + names(checksums) <- basename(files) + return(checksums) +} + +#' @rdname checksum +#' @export +xxh64sum <- function(files) checksum(files, hash_fun = "xxh64") + +#' @rdname checksum +#' @export +md5sum <- function(files) checksum(files, hash_fun = "md5") + +#' @rdname checksum +#' @export +sha256sum <- function(files) checksum(files, hash_fun = "sha256") + + +#' @importFrom assertthat +#' assert_that +#' @keywords internal +assert_that_allfiles_exist <- function(x) { + exist <- file.exists(x) + assert_that(all(exist), + msg = paste0("The following path(s) do not exist:\n", + paste0(x[!exist], collapse = "\n"))) + isdir <- dir.exists(x) + assert_that(!any(isdir), + msg = paste0("Only files are accepted; ", + "the following path(s) are directories:\n", + paste0(x[isdir], collapse = "\n"))) +} + + + diff --git a/man/checksum.Rd b/man/checksum.Rd new file mode 100644 index 00000000..3282f334 --- /dev/null +++ b/man/checksum.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filemanagement.R +\name{checksum} +\alias{checksum} +\alias{xxh64sum} +\alias{md5sum} +\alias{sha256sum} +\title{Calculate file checksums} +\usage{ +checksum(files, hash_fun = c("xxh64", "md5", "sha256")) + +xxh64sum(files) + +md5sum(files) + +sha256sum(files) +} +\arguments{ +\item{files}{Character vector of file path(s). +File path(s) can be absolute or relative.} + +\item{hash_fun}{String that defines the hash function. +See \emph{Usage} for allowed values; defaults to the first.} +} +\value{ +Named character vector with the same length as \code{files} +and with the file names as names. +} +\description{ +The functions calculate the checksum (digest; hash value) of +one or multiple files. +They can be used to verify file integrity. +} +\details{ +A few cryptographic and non-cryptographic hash functions are implemented, +either from the OpenSSL library (through +\href{https://CRAN.R-project.org/package=openssl}{\code{openssl}}) +or as embedded in the +\href{https://CRAN.R-project.org/package=digest}{\code{digest}} +package. + +Functions \code{md5sum()} etc. are simple shortcuts to \code{checksum()} +with the appropriate hash function preset. +Their names were chosen to match those of xxHash and GNU coreutils. + +The cryptographic algorithms use the OpenSSL implementation and +stream-hash the binary +contents of the connections to the respective files. +They turn the hash-format for binary streams by the \code{openssl} package +into a regular hash string. +Note that \code{n2khab} will mask +\code{\link[tools:md5sum]{tools::md5sum()}}, +which is a standalone implementation. +} +\examples{ +# creating two different temporary files: +file1 <- tempfile() +file2 <- tempfile() +files <- c(file1, file2) +file.create(files) +con <- file(file2) +writeLines("some text", con) +close(con) + +# computing alternative checksums: +checksum(files) +xxh64sum(files) +md5sum(files) +sha256sum(files) + +\dontrun{ +# This will error: +files <- c(file1, file2, tempfile(), tempfile()) +checksum(files) +} + +} +\seealso{ +Other functions regarding file management for N2KHAB projects: +\code{\link{download_zenodo}()}, +\code{\link{fileman_folders}()}, +\code{\link{fileman_up}()} +} +\concept{functions regarding file management for N2KHAB projects} diff --git a/man/download_zenodo.Rd b/man/download_zenodo.Rd index b45c9181..a0c3d2d6 100644 --- a/man/download_zenodo.Rd +++ b/man/download_zenodo.Rd @@ -42,6 +42,7 @@ download_zenodo(doi = "10.5281/zenodo.168478") } \seealso{ Other functions regarding file management for N2KHAB projects: +\code{\link{checksum}()}, \code{\link{fileman_folders}()}, \code{\link{fileman_up}()} } diff --git a/man/fileman_folders.Rd b/man/fileman_folders.Rd index 2e176113..c4d9efe7 100644 --- a/man/fileman_folders.Rd +++ b/man/fileman_folders.Rd @@ -39,6 +39,7 @@ datapath <- fileman_folders(root = "git") } \seealso{ Other functions regarding file management for N2KHAB projects: +\code{\link{checksum}()}, \code{\link{download_zenodo}()}, \code{\link{fileman_up}()} } diff --git a/man/fileman_up.Rd b/man/fileman_up.Rd index 880b6a09..9e04ca66 100644 --- a/man/fileman_up.Rd +++ b/man/fileman_up.Rd @@ -39,6 +39,7 @@ fileman_up("n2khab_data") } \seealso{ Other functions regarding file management for N2KHAB projects: +\code{\link{checksum}()}, \code{\link{download_zenodo}()}, \code{\link{fileman_folders}()} } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 4a75f0d7..65f7b7f6 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -55,3 +55,5 @@ reference: - fileman_up - convert_dec_to_base4frac - convert_base4frac_to_dec + - md5sum + - sha256sum