Skip to content

Commit

Permalink
Merge pull request #122 from inbo/checksums
Browse files Browse the repository at this point in the history
Adding functions md5sum() and sha256sum()
  • Loading branch information
florisvdh authored Mar 25, 2021
2 parents ce99389 + 6f41175 commit eccd527
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 0 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ RoxygenNote: 7.1.1
Suggests:
bib2df,
curl,
digest,
googledrive,
jsonlite,
knitr,
mapview,
openssl,
parallel,
raster (>= 3.3-16),
readxl,
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(checksum)
export(convert_base4frac_to_dec)
export(convert_dec_to_base4frac)
export(download_zenodo)
export(expand_types)
export(fileman_folders)
export(fileman_up)
export(md5sum)
export(read_GRTSmh)
export(read_GRTSmh_base4frac)
export(read_GRTSmh_diffres)
Expand All @@ -26,6 +28,8 @@ export(read_types)
export(read_watercourse_100mseg)
export(read_watersurfaces)
export(read_watersurfaces_hab)
export(sha256sum)
export(xxh64sum)
importFrom(assertthat,assert_that)
importFrom(assertthat,is.flag)
importFrom(assertthat,is.string)
Expand Down Expand Up @@ -67,6 +71,7 @@ importFrom(git2rdata,read_vc)
importFrom(magrittr,set_colnames)
importFrom(plyr,mapvalues)
importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(purrr,map_lgl)
importFrom(rlang,.data)
importFrom(rlang,na_lgl)
Expand Down
116 changes: 116 additions & 0 deletions R/filemanagement.R
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,119 @@ fileman_up <- function(name,



#' Calculate file checksums
#'
#' The functions calculate the checksum (digest; hash value) of
#' one or multiple files.
#' They can be used to verify file integrity.
#'
#' A few cryptographic and non-cryptographic hash functions are implemented,
#' either from the OpenSSL library (through
#' \href{https://CRAN.R-project.org/package=openssl}{\code{openssl}})
#' or as embedded in the
#' \href{https://CRAN.R-project.org/package=digest}{\code{digest}}
#' package.
#'
#' Functions \code{md5sum()} etc. are simple shortcuts to \code{checksum()}
#' with the appropriate hash function preset.
#' Their names were chosen to match those of xxHash and GNU coreutils.
#'
#' The cryptographic algorithms use the OpenSSL implementation and
#' stream-hash the binary
#' contents of the connections to the respective files.
#' They turn the hash-format for binary streams by the \code{openssl} package
#' into a regular hash string.
#' Note that \code{n2khab} will mask
#' \code{\link[tools:md5sum]{tools::md5sum()}},
#' which is a standalone implementation.
#'
#' @param files Character vector of file path(s).
#' File path(s) can be absolute or relative.
#' @param hash_fun String that defines the hash function.
#' See \emph{Usage} for allowed values; defaults to the first.
#'
#' @return
#' Named character vector with the same length as \code{files}
#' and with the file names as names.
#'
#' @family functions regarding file management for N2KHAB projects
#'
#' @examples
#' # creating two different temporary files:
#' file1 <- tempfile()
#' file2 <- tempfile()
#' files <- c(file1, file2)
#' file.create(files)
#' con <- file(file2)
#' writeLines("some text", con)
#' close(con)
#'
#' # computing alternative checksums:
#' checksum(files)
#' xxh64sum(files)
#' md5sum(files)
#' sha256sum(files)
#'
#' \dontrun{
#' # This will error:
#' files <- c(file1, file2, tempfile(), tempfile())
#' checksum(files)
#' }
#'
#' @importFrom purrr
#' map_chr
#' @importFrom stringr
#' str_detect
#' @export
checksum <- function(files,
hash_fun = c("xxh64", "md5", "sha256")) {

assert_that_allfiles_exist(files)
hash_fun <- match.arg(hash_fun)

if (str_detect(hash_fun, "^xxh")) {
require_pkgs("digest")
checksums <- map_chr(files,
~digest::digest(.,
algo = "xxhash64",
file = TRUE))
} else {
require_pkgs("openssl")
fun <- eval(str2lang(paste0("openssl::", hash_fun)))
checksums <- map_chr(files, ~paste(fun(file(.))))
}

names(checksums) <- basename(files)
return(checksums)
}

#' @rdname checksum
#' @export
xxh64sum <- function(files) checksum(files, hash_fun = "xxh64")

#' @rdname checksum
#' @export
md5sum <- function(files) checksum(files, hash_fun = "md5")

#' @rdname checksum
#' @export
sha256sum <- function(files) checksum(files, hash_fun = "sha256")


#' @importFrom assertthat
#' assert_that
#' @keywords internal
assert_that_allfiles_exist <- function(x) {
exist <- file.exists(x)
assert_that(all(exist),
msg = paste0("The following path(s) do not exist:\n",
paste0(x[!exist], collapse = "\n")))
isdir <- dir.exists(x)
assert_that(!any(isdir),
msg = paste0("Only files are accepted; ",
"the following path(s) are directories:\n",
paste0(x[isdir], collapse = "\n")))
}



84 changes: 84 additions & 0 deletions man/checksum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/download_zenodo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/fileman_folders.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions man/fileman_up.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ reference:
- fileman_up
- convert_dec_to_base4frac
- convert_base4frac_to_dec
- md5sum
- sha256sum

0 comments on commit eccd527

Please sign in to comment.