Skip to content

Commit

Permalink
include all required fields for hashing in lockfile (#2059)
Browse files Browse the repository at this point in the history
* start recording extra fields in lockfile

* add a test

* reorganize

* include more fields

* drop Remote fields for cran-like records

* tweaks

* update NEWS; provide option as fallback

* add snapshot test for old lockfile

* test different versions of lockfiles

* compute hash records on demand instead of storing in lockfile

* fixes for windows

* try alternate test for windows

* try once more

* one more attempt?

* it works on my machine (tm)

* also explicitly test file

* split dependency fields
  • Loading branch information
kevinushey authored Jan 10, 2025
1 parent 704e4dd commit c0255d3
Show file tree
Hide file tree
Showing 20 changed files with 339 additions and 94 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

# renv 1.1.0 (UNRELEASED)

* `renv` now includes the contents of each package's DESCRIPTION file in
the package records for generated lockfiles. (#2057)

* Fixed an issue where `renv::snapshot()` could fail if invoked within
a project containing empty or invalid `.ipynb` files. (#2073)

Expand Down
4 changes: 4 additions & 0 deletions R/aaa.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@ testing <- function() {
devel <- function() {
identical(R.version[["status"]], "Under development (unstable)")
}

devmode <- function() {
Sys.getenv("DEVTOOLS_LOAD") == .packageName
}
22 changes: 16 additions & 6 deletions R/backports.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

if (is.null(.BaseNamespaceEnv$startsWith)) {
startsWith <- function(x, prefix) {
pattern <- sprintf("^\\Q%s\\E", prefix)
grepl(pattern, x, perl = TRUE)
if (is.null(.BaseNamespaceEnv$dir.exists)) {

dir.exists <- function(paths) {
info <- suppressWarnings(file.info(paths, extra_cols = FALSE))
info$isdir %in% TRUE
}

}

if (is.null(.BaseNamespaceEnv$lengths)) {
Expand All @@ -15,3 +15,13 @@ if (is.null(.BaseNamespaceEnv$lengths)) {
}

}

if (is.null(.BaseNamespaceEnv$startsWith)) {

startsWith <- function(x, prefix) {
pattern <- sprintf("^\\Q%s\\E", prefix)
grepl(pattern, x, perl = TRUE)
}

}

3 changes: 3 additions & 0 deletions R/bootstrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,9 @@ renv_bootstrap_download_github <- function(version) {

# prepare download options
token <- renv_bootstrap_github_token()
if (is.null(token))
token <- ""

if (nzchar(Sys.which("curl")) && nzchar(token)) {
fmt <- "--location --fail --header \"Authorization: token %s\""
extra <- sprintf(fmt, token)
Expand Down
15 changes: 6 additions & 9 deletions R/files.R
Original file line number Diff line number Diff line change
Expand Up @@ -576,16 +576,13 @@ renv_file_broken_unix <- function(paths) {
!is.na(Sys.readlink(paths)) & !file.exists(paths)
}

# unfortunately, as far as I know, there isn't a more reliable
# way of detecting broken junction points on Windows using vanilla R
renv_file_broken_win32 <- function(paths) {
# TODO: the behavior of file.exists() for a broken junction point
# appears to have changed in the development version of R;
# we have to be extra careful here...
if (getRversion() < "4.2.0") {
info <- renv_file_info(paths)
(info$isdir %in% TRUE) & is.na(info$mtime)
} else {
file.access(paths, mode = 0L) == 0L & !file.exists(paths)
}
time <- Sys.time()
map_lgl(paths, function(path) {
file.access(path) == 0L && !Sys.setFileTime(path, time)
})
}

renv_file_size <- function(path) {
Expand Down
77 changes: 53 additions & 24 deletions R/hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,44 @@ renv_hash_text <- function(text) {
renv_bootstrap_hash_text(text)
}

renv_hash_fields <- function(dcf) {
c(
renv_hash_fields_default(),
renv_hash_fields_remotes(dcf)
)
}

renv_hash_fields_default <- function() {
c(
"Package", "Version", "Title",
"Author", "Maintainer", "Description",
"Depends", "Imports", "Suggests", "LinkingTo"
)
}

renv_hash_fields_remotes <- function(dcf) {

# if this seems to be a cran-like record, only keep remotes
# when RemoteSha appears to be a hash (e.g. for r-universe)
# note that RemoteSha may be a package version when installed
# by e.g. pak
if (renv_record_cranlike(dcf)) {
sha <- dcf[["RemoteSha"]]
if (is.null(sha) || nchar(sha) < 40L)
return(character())
}

# grab the relevant remotes
remotes <- grep("^Remote", names(dcf), perl = TRUE, value = TRUE)

# don't include 'RemoteRef' if it's a non-informative remote
if (identical(dcf[["RemoteRef"]], "HEAD"))
remotes <- setdiff(remotes, "RemoteRef")

remotes

}

renv_hash_description <- function(path) {
filebacked(
context = "renv_hash_description",
Expand All @@ -12,24 +50,26 @@ renv_hash_description <- function(path) {
}

renv_hash_description_impl <- function(path) {
record <- renv_description_read(path)
renv_hash_record(record)
}

dcf <- case(
is.character(path) ~ renv_description_read(path),
is.list(path) ~ path,
~ stop("unexpected path '%s'", path)
)
renv_hash_record <- function(record) {

# include default fields
fields <- c(
"Package", "Version", "Title", "Author", "Maintainer", "Description",
"Depends", "Imports", "Suggests", "LinkingTo"
)
# find relevant fields for hashing
fields <- renv_hash_fields(record)

# add remotes fields
remotes <- renv_hash_description_remotes(dcf)
# collapse vector / list dependency fields
depfields <- c("Depends", "Imports", "Suggests", "LinkingTo", "Enhances")
for (depfield in depfields) {
if (!is.null(record[[depfield]])) {
value <- unlist(record[[depfield]])
record[[depfield]] <- paste(value, collapse = ", ")
}
}

# retrieve these fields
subsetted <- dcf[renv_vector_intersect(c(fields, remotes), names(dcf))]
subsetted <- record[renv_vector_intersect(fields, names(record))]

# sort names (use C locale to ensure consistent ordering)
ordered <- subsetted[csort(names(subsetted))]
Expand Down Expand Up @@ -71,14 +111,3 @@ renv_hash_description_impl <- function(path) {
invisible(hash)

}

renv_hash_description_remotes <- function(dcf) {

# ignore other remote fields for cranlike remotes
if (renv_record_cranlike(dcf))
return(character())

# otherwise, include any other discovered remote fields
grep("^Remote", names(dcf), value = TRUE)

}
25 changes: 22 additions & 3 deletions R/lockfile-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,28 @@ renv_lockfile_read_finish_impl <- function(key, val) {
}

renv_lockfile_read_finish <- function(data) {
data <- enumerate(data, renv_lockfile_read_finish_impl)
class(data) <- "renv_lockfile"
data

# create lockfile
lockfile <- enumerate(data, renv_lockfile_read_finish_impl)
class(lockfile) <- "renv_lockfile"

# compute hashes for records if possible
renv_lockfile_records(lockfile) <-
renv_lockfile_records(lockfile) %>%
map(function(record) {

record$Hash <- record$Hash %||% {
fields <- renv_hash_fields_remotes(record)
if (all(names(record) %in% fields))
renv_hash_record(record)
}

record

})

# return lockfile
lockfile
}

renv_lockfile_read_preflight <- function(contents) {
Expand Down
3 changes: 2 additions & 1 deletion R/lockfile-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ renv_lockfile_write_json <- function(lockfile, file = stdout()) {

prepared <- enumerate(lockfile, renv_lockfile_write_json_prepare)

box <- c("Depends", "Imports", "Suggests", "LinkingTo", "Requirements")
box <- c("Requirements")
config <- list(box = box)

json <- renv_json_convert(prepared, config)
if (is.null(file))
return(json)
Expand Down
1 change: 1 addition & 0 deletions R/lockfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ renv_lockfile_save <- function(lockfile, project) {
}

renv_lockfile_load <- function(project, strict = FALSE) {

path <- renv_lockfile_path(project)
if (file.exists(path))
return(renv_lockfile_read(path))
Expand Down
2 changes: 1 addition & 1 deletion R/record.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,5 +121,5 @@ renv_record_placeholder <- function() {

renv_record_cranlike <- function(record) {
type <- record[["RemoteType"]]
is.null(type) || type %in% c("cran", "repository", "standard")
is.null(type) || tolower(type) %in% c("cran", "repository", "standard")
}
105 changes: 88 additions & 17 deletions R/snapshot.R
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,18 @@ renv_snapshot_description <- function(path = NULL, package = NULL) {

renv_snapshot_description_impl <- function(dcf, path = NULL) {

version <- getOption("renv.lockfile.version", default = 2L)
if (version == 1L)
renv_snapshot_description_impl_v1(dcf, path)
else if (version == 2L)
renv_snapshot_description_impl_v2(dcf, path)
else
stopf("unsupported lockfile version '%s'", format(version))

}

renv_snapshot_description_impl_v1 <- function(dcf, path = NULL) {

# figure out the package source
source <- renv_snapshot_description_source(dcf)
dcf[names(source)] <- source
Expand All @@ -744,7 +756,7 @@ renv_snapshot_description_impl <- function(dcf, path = NULL) {
# generate a hash if we can
dcf[["Hash"]] <- if (the$auto_snapshot_hash) {
if (is.null(path))
renv_hash_description_impl(dcf)
renv_hash_record(dcf)
else
renv_hash_description(path)
}
Expand All @@ -756,7 +768,29 @@ renv_snapshot_description_impl <- function(dcf, path = NULL) {
dcf[["Requirements"]] <- all

# get remotes fields
remotes <- renv_snapshot_description_impl_remotes(dcf)
remotes <- local({

# if this seems to be a cran-like record, only keep remotes
# when RemoteSha appears to be a hash (e.g. for r-universe)
# note that RemoteSha may be a package version when installed
# by e.g. pak
if (renv_record_cranlike(dcf)) {
sha <- dcf[["RemoteSha"]]
if (is.null(sha) || nchar(sha) < 40L)
return(character())
}

# grab the relevant remotes
git <- grep("^git", names(dcf), value = TRUE)
remotes <- grep("^Remote(?!s)", names(dcf), perl = TRUE, value = TRUE)

# don't include 'RemoteRef' if it's a non-informative remote
if (identical(dcf[["RemoteRef"]], "HEAD"))
remotes <- setdiff(remotes, "RemoteRef")

c(git, remotes)

})

# only keep relevant fields
extra <- c("Repository", "OS_type")
Expand All @@ -768,27 +802,64 @@ renv_snapshot_description_impl <- function(dcf, path = NULL) {

}

renv_snapshot_description_impl_remotes <- function(dcf) {
renv_snapshot_description_impl_v2 <- function(dcf, path) {

# figure out the package source
source <- renv_snapshot_description_source(dcf)
dcf[names(source)] <- source

# check for required fields
required <- c("Package", "Version", "Source")
missing <- renv_vector_diff(required, names(dcf))
if (length(missing)) {
fmt <- "required fields %s missing from DESCRIPTION at path '%s'"
stopf(fmt, paste(shQuote(missing), collapse = ", "), path %||% "<unknown>")
}

# if this is a standard remote for a bioconductor package,
# remove the other remote fields
bioc <-
!is.null(dcf[["biocViews"]]) &&
identical(dcf[["RemoteType"]], "standard")

if (bioc) {
fields <- grep("^Remote(?!s)", names(dcf), perl = TRUE, invert = TRUE)
dcf <- dcf[fields]
}

# drop fields that normally only appear in binary packages,
# or fields which might differ from user to user, or might
# differ depending on the mirror used for publication
ignore <- c("Archs", "Built", "Date/Publication", "File", "MD5sum", "Packaged")
dcf[ignore] <- NULL

# if this seems to be a cran-like record, only keep remotes
# when RemoteSha appears to be a hash (e.g. for r-universe)
# note that RemoteSha may be a package version when installed
# by e.g. pak
# drop remote fields for cranlike remotes
if (renv_record_cranlike(dcf)) {
sha <- dcf[["RemoteSha"]]
if (is.null(sha) || nchar(sha) < 40)
return(character())
if (is.null(sha) || nchar(sha) < 40L) {
remotes <- grep("^Remote", names(dcf), perl = TRUE, value = TRUE)
dcf[remotes] <- NULL
}
}

# grab the relevant remotes
git <- grep("^git", names(dcf), value = TRUE)
remotes <- grep("^Remote(?!s)", names(dcf), perl = TRUE, value = TRUE)
# drop the old Github remote fields
github <- grepl("^Github", names(dcf), perl = TRUE)
dcf <- dcf[!github]

# don't include 'RemoteRef' if it's a non-informative remote
if (identical(dcf[["RemoteRef"]], "HEAD"))
remotes <- setdiff(remotes, "RemoteRef")
# split fields which are normally declared as lists of packages
depfields <- c("Depends", "Imports", "Suggests", "LinkingTo", "Enhances")
for (depfield in depfields) {
if (!is.null(dcf[[depfield]])) {
fields <- strsplit(dcf[[depfield]], ",", fixed = TRUE)
dcf[[depfield]] <- as.list(trimws(fields[[1L]]))
}
}

c(git, remotes)
# reorganize fields a bit
dcf <- dcf[c(required, setdiff(names(dcf), required))]

# return as list
as.list(dcf)

}

Expand Down Expand Up @@ -840,7 +911,7 @@ renv_snapshot_description_source <- function(dcf) {

# check for a custom declared remote type
if (!renv_record_cranlike(dcf)) {
type <- dcf[["RemoteType"]]
type <- dcf[["RemoteType"]] %||% "standard"
return(list(Source = alias(type)))
}

Expand Down
Loading

0 comments on commit c0255d3

Please sign in to comment.