Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lintr (fixes #179) #203

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions .ci/lint.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
library(lintr)

args <- commandArgs(
trailingOnly = TRUE
)
SOURCE_DIR <- args[[1L]]

excluded_files <- list.files(
paste(SOURCE_DIR, "uptasticsearch.Rcheck", sep = "/"),
pattern = rex::rex(".", one_of("Rr"), end),
all.files = TRUE,
recursive = TRUE,
full.names = TRUE
)

LINTERS_TO_USE <- list(
"assignment" = lintr::assignment_linter,
"closed_curly" = lintr::closed_curly_linter,
"equals_na" = lintr::equals_na_linter,
"function_left" = lintr::function_left_parentheses_linter,
"commas" = lintr::commas_linter,
"concatenation" = lintr::unneeded_concatenation_linter,
"implicit_integers" = lintr::implicit_integer_linter,
"infix_spaces" = lintr::infix_spaces_linter,
"long_lines" = lintr::line_length_linter(length = 120L),
"tabs" = lintr::no_tab_linter,
"open_curly" = lintr::open_curly_linter,
"paren_brace_linter" = lintr::paren_brace_linter,
"semicolon" = lintr::semicolon_terminator_linter,
"seq" = lintr::seq_linter,
"single_quotes" = lintr::single_quotes_linter,
"spaces_inside" = lintr::spaces_inside_linter,
"spaces_left_parens" = lintr::spaces_left_parentheses_linter,
"todo_comments" = lintr::todo_comment_linter,
"trailing_blank" = lintr::trailing_blank_lines_linter,
"trailing_white" = lintr::trailing_whitespace_linter,
"true_false" = lintr::T_and_F_symbol_linter
)

results <- lintr::lint_dir(
path = SOURCE_DIR,
linters = LINTERS_TO_USE,
cache = FALSE,
exclusions = excluded_files
)

results_df <- as.data.frame(results)

cat(sprintf(
"Found %i linting errors in project\n",
length(results)
))

issues_count <- length(results)
issues_found <- issues_count > 0L

if (issues_found) {
print(addmargins(table(results_df[c("filename", "linter")])))
cat("\n")

print(results)
}

message(paste(issues_count, "issue(s) found.", sep = " "))
quit(save = "no", status = if (issues_found) 1L else 0L)
2 changes: 1 addition & 1 deletion .ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export JAVA_APT_PKG="oracle-java8-set-default"
# install these testing packages we need
if [[ "$TASK" == "rpkg" ]];
then
Rscript -e "install.packages(c('data.table', 'devtools', 'futile.logger', 'knitr', 'testthat', 'rmarkdown', 'uuid'), repos = 'http://cran.rstudio.com')"
Rscript -e "install.packages(c('data.table', 'devtools', 'futile.logger', 'knitr', 'testthat', 'rmarkdown', 'uuid', 'lintr'), repos = 'http://cran.rstudio.com')"
cp test-data/* r-pkg/inst/testdata/
fi

Expand Down
65 changes: 65 additions & 0 deletions .ci/styler.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
args <- commandArgs(
trailingOnly = TRUE
)
SOURCE_DIR <- args[[1L]]

excluded_files <- withr::with_dir(SOURCE_DIR
, dir(path = "./uptasticsearch.Rcheck"
, rex::rex(".", one_of("Rr"), end)
, ignore.case = TRUE
, recursive = TRUE
, full.names = TRUE
, all.files = TRUE))

guide <- styler::tidyverse_style()

fix_implicit_integer <- function(pd_flat) {
op <- pd_flat$token %in% "NUM_CONST"
pd_flat$text[op] <- lapply(
pd_flat$text[op],
function(x) {
gsub("^(\\d+)$", "\\1L", x)
}
)
pd_flat
}

fix_trailing_whitespace_in_comment <- function(pd_flat) {
comments <- pd_flat$token == "COMMENT"
pd_flat$text[comments] <- lapply(
pd_flat$text[comments],
function(x) sub("\\s+$", "", x)
)
pd_flat
}

fix_space_between_paren_and_brace <- function(pd_flat) {
brace_after <- pd_flat$token == "')'" & pd_flat$token_after == "'{'"
if (!any(brace_after)) {
return(pd_flat)
}
pd_flat$spaces[brace_after] <- pmax(pd_flat$spaces[brace_after], 1L)
pd_flat
}

uptasticsearch_guide <- function() {
styler::create_style_guide(
token = list(
fix_quotes = guide$token$fix_quotes,
fix_implicit_integer = fix_implicit_integer,
fix_trailing_whitespace_in_comment = fix_trailing_whitespace_in_comment
),
space = list(
style_space_around_math_token = guide$space$style_space_around_math_token,
fix_space_between_paren_and_brace = fix_space_between_paren_and_brace,
style_space_around_tilde = guide$space$style_space_around_tilde,
add_space_before_brace = styler:::add_space_before_brace,
set_space_after_comma = styler:::set_space_after_comma,
set_space_around_op = function(...) styler:::set_space_around_op(strict = FALSE, ...),
add_space_after_for_if_while = guide$space$add_space_after_for_if_while
),
use_raw_indention = TRUE
)
}

styler::style_dir(SOURCE_DIR, style = uptasticsearch_guide, exclude_files = excluded_files)
2 changes: 2 additions & 0 deletions .ci/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -e

if [[ "$TASK" == "rpkg" ]]; then
R_PACKAGE_DIR=$(pwd)/r-pkg
Rscript .ci/lint.R ${R_PACKAGE_DIR}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please lint the entire repo?

Rscript .ci/lint.R $(pwd)

As I mentioned, I think this linting code should hold our opinion about ALL R code in the project, not only what is in r-pkg/.


R CMD build ${R_PACKAGE_DIR}
R CMD check \
--as-cran \
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ coverage_r: build_r
echo "Done calculating coverage"
open coverage.html

lint_r:
echo "Linting R source..."
Rscript .ci/lint.R $$(pwd)
echo "Done linting"

style_r:
echo "Styling R source..."
Rscript .ci/styler.R $$(pwd)
echo "Done styling"

docs_r: build_r
Rscript -e "devtools::document('r-pkg/')"
Rscript -e "pkgdown::build_site('r-pkg/')"
Expand Down
1 change: 1 addition & 0 deletions r-pkg/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ vignettes/*\.pdf
# Stuff
.Rbuildignore
.*\.gitkeep

1 change: 1 addition & 0 deletions r-pkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Imports:
Suggests:
covr,
knitr,
lintr,
rmarkdown,
testthat
License: BSD_3_clause + file LICENSE
Expand Down
34 changes: 17 additions & 17 deletions r-pkg/R/assertions.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@

# [title] assert_that wrapper
# [name] assert
# [description] When making an assertion you might call:
#
# \code{assertthat::assert_that(assertthat::is.date(x))}
#
# or something like that. This is an alias to \code{\link[assertthat]{assert_that}} to be used
# for two benefits: \enumerate{
# \item{This uses \code{\link{log_fatal}} instead of \code{\link{stop}} on failure}
# \item{Much less clutter in the source code}
# }
#' [title] assert_that wrapper
#' [name] assert
#' [description] When making an assertion you might call:
#'
#' \code{assertthat::assert_that(assertthat::is.date(x))}
#'
#' or something like that. This is an alias to \code{\link[assertthat]{assert_that}} to be used
#' for two benefits: \enumerate{
#' \item{This uses \code{\link{log_fatal}} instead of \code{\link{stop}} on failure}
#' \item{Much less clutter in the source code}
#' }
#' @importFrom assertthat see_if
.assert <- function(..., msg = NULL) {
res <- assertthat::see_if(..., env = parent.frame(), msg = msg)
if (res) {
return(invisible(TRUE))
} else {
log_fatal(attr(res, "msg"))
}
res <- assertthat::see_if(..., env = parent.frame(), msg = msg)
if (res) {
return(invisible(TRUE))
} else {
log_fatal(attr(res, "msg"))
}
}
46 changes: 23 additions & 23 deletions r-pkg/R/chomp_aggs.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @importFrom data.table as.data.table setnames setcolorder
#' @export
#' @param aggs_json A character vector. If its length is greater than 1, its elements will be pasted
#' together. This can contain a JSON returned from an \code{aggs} query in
#' together. This can contain a JSON returned from an \code{aggs} query in
#' Elasticsearch, or a filepath or URL pointing at one.
#' @examples
#' # A sample raw result from an aggs query combining date_histogram and extended_stats:
Expand Down Expand Up @@ -48,15 +48,15 @@ chomp_aggs <- function(aggs_json = NULL) {
)

# Gross special-case handler for one-level extended_stats aggregation
if (.IsExtendedStatsAgg(jsonList[["aggregations"]][[aggNames]])){
if (.IsExtendedStatsAgg(jsonList[["aggregations"]][[aggNames]])) {
log_info("es_search is assuming that this result is a one-level 'extended_stats' result.")
jsonList[["aggregations"]][[1]][["std_deviation_bounds.upper"]] <- jsonList[["aggregations"]][[1]][["std_deviation_bounds"]][["upper"]]
jsonList[["aggregations"]][[1]][["std_deviation_bounds.lower"]] <- jsonList[["aggregations"]][[1]][["std_deviation_bounds"]][["lower"]]
jsonList[["aggregations"]][[1]][["std_deviation_bounds"]] <- NULL
jsonList[["aggregations"]][[1L]][["std_deviation_bounds.upper"]] <- jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]][["upper"]]
jsonList[["aggregations"]][[1L]][["std_deviation_bounds.lower"]] <- jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]][["lower"]]
jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]] <- NULL
}

# Gross special-case handler for one-level percentiles aggregation
if (.IsPercentilesAgg(jsonList[["aggregations"]][[aggNames]])){
if (.IsPercentilesAgg(jsonList[["aggregations"]][[aggNames]])) {
log_info("es_search is assuming that this result is a one-level 'percentiles' result.")

# Replace names like `25.0` with something that will be easier for users to understand
Expand All @@ -66,17 +66,17 @@ chomp_aggs <- function(aggs_json = NULL) {
jsonList[["aggregations"]][[aggNames]] <- percValues
}

if (.IsSigTermsAgg(jsonList[["aggregations"]][[aggNames]])){
if (.IsSigTermsAgg(jsonList[["aggregations"]][[aggNames]])) {
log_info("es_search is assuming that this result is a one-level 'significant terms' result.")

# We can grab that nested data.frame and break out right now
outDT <- data.table::as.data.table(jsonList[["aggregations"]][[aggNames]][["buckets"]])
data.table::setnames(outDT, 'key', aggNames)
data.table::setnames(outDT, "key", aggNames)
return(outDT)
}

# check for an empty result
if (identical(jsonList[["aggregations"]][[aggNames]][["buckets"]], list())){
if (identical(jsonList[["aggregations"]][[aggNames]][["buckets"]], list())) {
log_info("this aggregation result was empty. Returning NULL")
return(invisible(NULL))
}
Expand All @@ -85,7 +85,7 @@ chomp_aggs <- function(aggs_json = NULL) {
outDT <- data.table::as.data.table(jsonList[["aggregations"]][[aggNames]])

# Keep unpacking the nested arrays until you hit 'break'
while(TRUE) {
while (TRUE) {
# Clean up the column names
.clean_aggs_colnames(outDT)

Expand All @@ -96,7 +96,7 @@ chomp_aggs <- function(aggs_json = NULL) {
} else {

# Other bucketed aggregations (not date_histogram) will have "key"
if ("key" %in% names(outDT)){
if ("key" %in% names(outDT)) {
data.table::setnames(outDT, "key", aggNames[length(aggNames)])
} else {
# If we get down here, we know it's not a bucketed aggregation
Expand All @@ -111,11 +111,11 @@ chomp_aggs <- function(aggs_json = NULL) {
if (any(colTypes == "list")) {

# Store the new agg name
aggNames[length(aggNames) + 1] <- names(colTypes[colTypes == "list"])
aggNames[length(aggNames) + 1L] <- names(colTypes[colTypes == "list"])

# Remove unwanted columns
badCols <- grep("doc_count", names(outDT))
if (length(badCols) > 0){
if (length(badCols) > 0L) {
outDT <- outDT[, !badCols, with = FALSE]
}

Expand All @@ -125,7 +125,7 @@ chomp_aggs <- function(aggs_json = NULL) {
} else {
# Remove unwanted columns, but keep doc_count
badCols <- base::setdiff(grep("doc_count", names(outDT), value = TRUE), "doc_count")
if (length(badCols) > 0) {
if (length(badCols) > 0L) {
outDT <- outDT[, !badCols, with = FALSE]
}
break
Expand All @@ -135,7 +135,7 @@ chomp_aggs <- function(aggs_json = NULL) {
# Re-set the column order to mirror the way the user specified their aggs query
# NOTE: If there's no "doc_count" in the names, we know that this was not a bucketed
# / nested query and reordering is unnecessary
if ("doc_count" %in% names(outDT)){
if ("doc_count" %in% names(outDT)) {
data.table::setcolorder(
outDT,
c(aggNames, base::setdiff(names(outDT), c(aggNames, "doc_count")), "doc_count")
Expand All @@ -160,7 +160,7 @@ chomp_aggs <- function(aggs_json = NULL) {
# "extended_stats" aggregation. data.table doesn't handle those
# in a way that's consistent with the way this package handles all other aggregations
# [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
.IsExtendedStatsAgg <- function(aggsList){
.IsExtendedStatsAgg <- function(aggsList) {
statsNames <- c("count", "min", "max", "avg", "sum", "sum_of_squares"
, "variance", "std_deviation", "std_deviation_bounds")

Expand All @@ -172,16 +172,16 @@ chomp_aggs <- function(aggs_json = NULL) {
# "Percentiles" aggregation. data.table doesn't handle those
# in a way that's consistent with the way this package handles all other aggregations
# [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
.IsPercentilesAgg <- function(aggsList){
.IsPercentilesAgg <- function(aggsList) {

# check 1 - has a single element called "values"
if (! identical("values", names(aggsList))){
if (! identical("values", names(aggsList))) {
return(FALSE)
}

# check 2 - all names of "values" are convertible to numbers
numNames <- as.numeric(names(aggsList[["values"]]))
if (all(vapply(numNames, function(val){!is.na(val)}, FUN.VALUE = TRUE))){
if (all(vapply(numNames, function(val) {!is.na(val)}, FUN.VALUE = TRUE))) {
return(TRUE)
} else {
return(FALSE)
Expand All @@ -194,20 +194,20 @@ chomp_aggs <- function(aggs_json = NULL) {
# "significant terms" aggregation. data.table doesn't handle those
# in a way that's consistent with the way this package handles all other aggregations
# [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
.IsSigTermsAgg <- function(aggsList){
.IsSigTermsAgg <- function(aggsList) {

# check 1 - has exactly two keys - "doc_count", "buckets"
if (! identical(sort(names(aggsList)), c('buckets', 'doc_count'))){
if (! identical(sort(names(aggsList)), c("buckets", "doc_count"))) {
return(FALSE)
}

# check 2 - "buckets" is a data.frame
if (!is.data.frame(aggsList[['buckets']])){
if (!is.data.frame(aggsList[["buckets"]])) {
return(FALSE)
}

# check 3 - "buckets" has at least the columns "key", "doc_count", and "bg_count"
if (!all(c('key', 'doc_count', 'bg_count') %in% names(aggsList[['buckets']]))){
if (!all(c("key", "doc_count", "bg_count") %in% names(aggsList[["buckets"]]))) {
return(FALSE)
}

Expand Down
Loading