diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml new file mode 100644 index 00000000..07aafe38 --- /dev/null +++ b/.github/workflows/linter.yml @@ -0,0 +1,71 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. +# +# See https://github.com/r-lib/actions/tree/master/examples#readme for +# additional example workflows available for the R community. + +# ======================================================== # +# Determines when the action is triggered # +# ======================================================== # + +on: [push, pull_request] +name: linter + +# ======================================================== # +# Determine actions to take # +# ======================================================== # + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checking out the repository + uses: actions/checkout@v2 + + - name: Setting up R + uses: r-lib/actions/setup-r@v1 + with: + use-public-rspm: true + + - name: Installing dependencies + uses: r-lib/actions/setup-r-dependencies@v1 + with: + extra-packages: lintr + + - name: Picking on the coding style + run: | + library(lintr) + excluded_files <- list( + "inst/examples/compute_consensus_example.R", + "inst/examples/compute_mallows_example.R", + "inst/examples/compute_mallows_mixtures_example.R", + "inst/examples/compute_posterior_intervals_example.R", + "inst/examples/estimate_partition_function_example.R", + "inst/examples/generate_constraints_example.R", + "inst/examples/generate_initial_ranking_example.R", + "inst/examples/generate_transitive_closure_example.R", + "inst/examples/label_switching_example.R", + "inst/examples/obs_freq_example.R", + "inst/examples/plot_top_k_example.R", + "inst/examples/plot.BayesMallows_example.R" + ) + style_rules <- list( + absolute_path_linter, assignment_linter, closed_curly_linter, + commas_linter, commented_code_linter, cyclocomp_linter, + equals_na_linter, function_left_parentheses_linter, + infix_spaces_linter, line_length_linter, no_tab_linter, + nonportable_path_linter, object_length_linter, + open_curly_linter, paren_brace_linter, pipe_continuation_linter, + semicolon_terminator_linter, seq_linter, single_quotes_linter, + spaces_inside_linter, spaces_left_parentheses_linter, + T_and_F_symbol_linter, todo_comment_linter, + trailing_blank_lines_linter, trailing_whitespace_linter, + undesirable_function_linter, undesirable_operator_linter, + unneeded_concatenation_linter + ) + lint_package(linters = style_rules, exclusions = excluded_files) + shell: Rscript {0} diff --git a/R/RcppExports.R b/R/RcppExports.R index 97204410..df5717cb 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -649,4 +649,3 @@ metropolis_hastings_aug_ranking_pseudo <- function(alpha, rho, n_items, partial_ metropolis_hastings_rho <- function(alpha, n_items, rankings, metric, rho, leap_size) { .Call(`_BayesMallows_metropolis_hastings_rho`, alpha, n_items, rankings, metric, rho, leap_size) } - diff --git a/R/all_topological_sorts.R b/R/all_topological_sorts.R index ec71695b..10480660 100644 --- a/R/all_topological_sorts.R +++ b/R/all_topological_sorts.R @@ -1,10 +1,10 @@ # Translation to R of C++ and Python code found here # https://www.geeksforgeeks.org/all-topological-sorts-of-a-directed-acyclic-graph/ -all_topological_sorts <- function(graph, path, discovered, n_items){ +all_topological_sorts <- function(graph, path, discovered, n_items) { flag <- FALSE - for(i in seq_len(n_items)){ - if(attr(graph, "indegree")[[i]] == 0 && !discovered[[i]]){ + for (i in seq_len(n_items)) { + if (attr(graph, "indegree")[[i]] == 0 && !discovered[[i]]) { attr(graph, "indegree")[graph[[i]]] <- attr(graph, "indegree")[graph[[i]]] - 1 path <- c(path, i) @@ -18,5 +18,5 @@ all_topological_sorts <- function(graph, path, discovered, n_items){ flag <- TRUE } } - if(length(path) == n_items) print(path) + if (length(path) == n_items) print(path) } diff --git a/R/assess_convergence.R b/R/assess_convergence.R index 62aac0be..e90d1ec6 100644 --- a/R/assess_convergence.R +++ b/R/assess_convergence.R @@ -26,17 +26,17 @@ #' @export #' assess_convergence <- function(model_fit, parameter = "alpha", items = NULL, - assessors = NULL){ + assessors = NULL) { stopifnot(inherits(model_fit, "BayesMallows") || inherits(model_fit, "BayesMallowsMixtures")) - if(parameter == "alpha") { - if(inherits(model_fit, "BayesMallows")){ + if (parameter == "alpha") { + if (inherits(model_fit, "BayesMallows")) { m <- model_fit$alpha trace_alpha(m, FALSE) - } else if(inherits(model_fit, "BayesMallowsMixtures")){ - m <- do.call(rbind, lapply(model_fit, function(x){ + } else if (inherits(model_fit, "BayesMallowsMixtures")) { + m <- do.call(rbind, lapply(model_fit, function(x) { dplyr::mutate(x$alpha, cluster = as.character(.data$cluster), n_clusters = x$n_clusters) @@ -44,25 +44,25 @@ assess_convergence <- function(model_fit, parameter = "alpha", items = NULL, trace_alpha(m, TRUE) } - } else if(parameter == "rho"){ - if(inherits(model_fit, "BayesMallows")){ + } else if (parameter == "rho") { + if (inherits(model_fit, "BayesMallows")) { trace_rho(model_fit, items) - } else if(inherits(model_fit, "BayesMallowsMixtures")){ + } else if (inherits(model_fit, "BayesMallowsMixtures")) { cowplot::plot_grid(plotlist = lapply(model_fit, trace_rho, clusters = TRUE, items = items)) } - } else if(parameter == "Rtilde") { + } else if (parameter == "Rtilde") { - if(inherits(model_fit, "BayesMallows")){ + if (inherits(model_fit, "BayesMallows")) { trace_rtilde(model_fit, items, assessors) - } else if(inherits(model_fit, "BayesMallowsMixtures")){ + } else if (inherits(model_fit, "BayesMallowsMixtures")) { stop("Trace plots of augmented data not supported for BayesMallowsMixtures. Please rerun each component k using the k-th list element.") } - } else if (parameter == "cluster_probs"){ - if(inherits(model_fit, "BayesMallows")){ + } else if (parameter == "cluster_probs") { + if (inherits(model_fit, "BayesMallows")) { m <- model_fit$cluster_probs - } else if(inherits(model_fit, "BayesMallowsMixtures")){ - m <- do.call(rbind, lapply(model_fit, function(x){ + } else if (inherits(model_fit, "BayesMallowsMixtures")) { + m <- do.call(rbind, lapply(model_fit, function(x) { dplyr::mutate(x$cluster_probs, cluster = as.character(.data$cluster), n_clusters = x$n_clusters) @@ -70,20 +70,20 @@ assess_convergence <- function(model_fit, parameter = "alpha", items = NULL, } trace_cluster_probs(m) - } else if (parameter == "theta"){ + } else if (parameter == "theta") { trace_theta(model_fit) } else { stop("parameter must be either \"alpha\", \"rho\", \"augmentation\", \"cluster_probs\", or \"theta\".") } } -trace_alpha <- function(m, clusters){ +trace_alpha <- function(m, clusters) { # Create the diagnostic plot for alpha p <- ggplot2::ggplot(m, ggplot2::aes(x = .data$iteration, y = .data$value)) + ggplot2::xlab("Iteration") + ggplot2::ylab(expression(alpha)) - if(!clusters){ + if (!clusters) { p <- p + ggplot2::geom_line() } else { p <- p + @@ -97,16 +97,16 @@ trace_alpha <- function(m, clusters){ return(p) } -trace_rho <- function(model_fit, items, clusters = model_fit$n_clusters > 1){ +trace_rho <- function(model_fit, items, clusters = model_fit$n_clusters > 1) { - if(is.null(items) && model_fit$n_items > 5){ + if (is.null(items) && model_fit$n_items > 5) { message("Items not provided by user. Picking 5 at random.") items <- sample.int(model_fit$n_items, 5) } else if (is.null(items) && model_fit$n_items > 0) { items <- seq.int(from = 1, to = model_fit$n_items) } - if(!is.character(items)){ + if (!is.character(items)) { items <- model_fit$items[items] } @@ -118,7 +118,7 @@ trace_rho <- function(model_fit, items, clusters = model_fit$n_clusters > 1){ ggplot2::xlab("Iteration") + ggplot2::ylab(expression(rho)) - if(clusters){ + if (clusters) { p <- p + ggplot2::facet_wrap(ggplot2::vars(.data$cluster)) } @@ -126,32 +126,32 @@ trace_rho <- function(model_fit, items, clusters = model_fit$n_clusters > 1){ } -trace_rtilde <- function(model_fit, items, assessors, ...){ +trace_rtilde <- function(model_fit, items, assessors, ...) { - if(!model_fit$save_aug){ + if (!model_fit$save_aug) { stop("Please rerun with compute_mallows with save_aug = TRUE") } - if(is.null(items) && model_fit$n_items > 5){ + if (is.null(items) && model_fit$n_items > 5) { message("Items not provided by user. Picking 5 at random.") items <- sample.int(model_fit$n_items, 5) } else if (is.null(items) && model_fit$n_items > 0) { items <- seq.int(from = 1, to = model_fit$n_items) } - if(is.null(assessors) && model_fit$n_assessors > 5){ + if (is.null(assessors) && model_fit$n_assessors > 5) { message("Assessors not provided by user. Picking 5 at random.") assessors <- sample.int(model_fit$n_assessors, 5) } else if (is.null(assessors) && model_fit$n_assessors > 0) { assessors <- seq.int(from = 1, to = model_fit$n_assessors) - } else if(!is.null(assessors)) { - if(length(setdiff(assessors, seq(1, model_fit$n_assessors, 1))) > 0) { + } else if (!is.null(assessors)) { + if (length(setdiff(assessors, seq(1, model_fit$n_assessors, 1))) > 0) { stop("assessors vector must contain numeric indices between 1 and the number of assessors") } } - if(is.factor(model_fit$augmented_data$item) && is.numeric(items)){ + if (is.factor(model_fit$augmented_data$item) && is.numeric(items)) { items <- levels(model_fit$augmented_data$item)[items] } df <- dplyr::filter(model_fit$augmented_data, @@ -169,7 +169,7 @@ trace_rtilde <- function(model_fit, items, assessors, ...){ } -trace_cluster_probs <- function(m){ +trace_cluster_probs <- function(m) { ggplot2::ggplot(m, ggplot2::aes(x = .data$iteration, y = .data$value, color = .data$cluster)) + @@ -184,8 +184,8 @@ trace_cluster_probs <- function(m){ } -trace_theta <- function(model_fit){ - if(is.null(model_fit$theta) || length(model_fit$theta) == 0){ +trace_theta <- function(model_fit) { + if (is.null(model_fit$theta) || length(model_fit$theta) == 0) { stop("Theta not available. Run compute_mallows with error_model = 'bernoulli'.") } # Create the diagnostic plot for theta diff --git a/R/assign_cluster.R b/R/assign_cluster.R index 4198041e..11f646a9 100644 --- a/R/assign_cluster.R +++ b/R/assign_cluster.R @@ -30,12 +30,12 @@ #' #' @export #' -assign_cluster <- function(model_fit, burnin = model_fit$burnin, soft = TRUE, expand = FALSE){ +assign_cluster <- function(model_fit, burnin = model_fit$burnin, soft = TRUE, expand = FALSE) { - if(is.null(burnin)){ + if (is.null(burnin)) { stop("Please specify the burnin.") } - if(is.null(model_fit$cluster_assignment)){ + if (is.null(model_fit$cluster_assignment)) { stop("Rerun compute_mallows with save_clus=TRUE.") } stopifnot(burnin < model_fit$nmc) @@ -53,8 +53,8 @@ assign_cluster <- function(model_fit, burnin = model_fit$burnin, soft = TRUE, ex df <- dplyr::ungroup(df) df <- dplyr::rename(df, cluster = .data$value) - if(expand){ - df <- do.call(rbind, lapply(split(df, f = df$assessor), function(dd){ + if (expand) { + df <- do.call(rbind, lapply(split(df, f = df$assessor), function(dd) { dd2 <- merge(dd, expand.grid(cluster = unique(df$cluster)), by = "cluster", all = TRUE) dd2$assessor <- unique(dd$assessor) @@ -78,7 +78,7 @@ assign_cluster <- function(model_fit, burnin = model_fit$burnin, soft = TRUE, ex # Join map back onto df df <- dplyr::inner_join(df, map, by = "assessor") - if(!soft){ + if (!soft) { df <- dplyr::filter(df, .data$cluster == .data$map_cluster) df <- dplyr::select(df, -.data$cluster) } diff --git a/R/compute_consensus.R b/R/compute_consensus.R index 07fe880a..871cc35d 100644 --- a/R/compute_consensus.R +++ b/R/compute_consensus.R @@ -44,18 +44,18 @@ compute_consensus.BayesMallows <- function( model_fit, type = "CP", burnin = model_fit$burnin, parameter = "rho", assessors = 1L, ... ) { - if(is.null(burnin)){ + if (is.null(burnin)) { stop("Please specify the burnin.") } stopifnot(burnin < model_fit$nmc) stopifnot(class(model_fit) == "BayesMallows") - if(parameter == "Rtilde" && !inherits(model_fit$augmented_data, "data.frame")){ + if (parameter == "Rtilde" && !inherits(model_fit$augmented_data, "data.frame")) { stop("For augmented ranks, please refit model with option 'save_aug = TRUE'.") } - if(parameter == "rho"){ + if (parameter == "rho") { # Filter out the pre-burnin iterations df <- dplyr::filter(model_fit$rho, .data$iteration > burnin) @@ -71,14 +71,14 @@ compute_consensus.BayesMallows <- function( class(df) <- c("consensus_BayesMallows", "tbl_df", "tbl", "data.frame") - df <- if(type == "CP"){ + df <- if (type == "CP") { .compute_cp_consensus(df) - } else if(type == "MAP"){ + } else if (type == "MAP") { .compute_map_consensus(df) } - } else if(parameter == "Rtilde"){ + } else if (parameter == "Rtilde") { # Filter out the pre-burnin iterations and get the right assessors df <- dplyr::filter(model_fit$augmented_data, .data$iteration > burnin, .data$assessor %in% assessors) @@ -96,13 +96,13 @@ compute_consensus.BayesMallows <- function( df <- dplyr::rename(df, cluster = "assessor") class(df) <- c("consensus_BayesMallows", "tbl_df", "tbl", "data.frame") - df <- if(type == "CP"){ + df <- if (type == "CP") { .compute_cp_consensus(df) - } else if(type == "MAP"){ + } else if (type == "MAP") { .compute_map_consensus(df) } - if("cluster" %in% names(df)){ + if ("cluster" %in% names(df)) { df <- dplyr::rename(df, assessor = "cluster") } @@ -136,7 +136,7 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { } } -.compute_cp_consensus.consensus_BayesMallows <- function(df){ +.compute_cp_consensus.consensus_BayesMallows <- function(df) { # Convert items and cluster to character, since factor levels are not needed in this case df <- dplyr::mutate_at(df, dplyr::vars(.data$item, .data$cluster), @@ -155,7 +155,7 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { # Find the cumulative probability, by dividing by the total # count in (item, cluster) and the summing cumulatively - df <- dplyr::mutate(df, cumprob = cumsum(.data$n/sum(.data$n))) + df <- dplyr::mutate(df, cumprob = cumsum(.data$n / sum(.data$n))) # Find the CP consensus per cluster, using the find_cpc function df <- dplyr::ungroup(df) @@ -168,15 +168,15 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { # If there is only one cluster, we drop the cluster column - if(length(unique(df$cluster)) == 1){ + if (length(unique(df$cluster)) == 1) { df <- dplyr::select(df, -.data$cluster) } return(df) } -.compute_cp_consensus.consensus_SMCMallows <- function(model_fit, burnin){ - if(is.null(burnin)){ +.compute_cp_consensus.consensus_SMCMallows <- function(model_fit, burnin) { + if (is.null(burnin)) { stop("Please specify the burnin.") } @@ -184,9 +184,11 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { # Filter out the pre-burnin iterations - if(burnin!=0){ + if (burnin != 0) { df <- dplyr::filter(model_fit, .data$iteration > burnin) - }else {df <- model_fit} + } else { + df <- model_fit + } # Find the problem dimensions n_rows <- nrow(dplyr::distinct(df, .data$item, .data$cluster)) @@ -214,7 +216,7 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { # Find the cumulative probability, by dividing by the total # count in (item, cluster) and the summing cumulatively - df <- dplyr::mutate(df, cumprob = cumsum(.data$n/sum(.data$n))) + df <- dplyr::mutate(df, cumprob = cumsum(.data$n / sum(.data$n))) # Find the CP consensus per cluster, using the find_cpc function df <- dplyr::ungroup(df) @@ -233,7 +235,7 @@ compute_consensus.consensus_SMCMallows <- function(model_fit, type, burnin) { } # Internal function for finding CP consensus. -find_cpc.consensus_BayesMallows <- function(group_df){ +find_cpc.consensus_BayesMallows <- function(group_df) { # Declare the result dataframe before adding rows to it result <- dplyr::tibble( cluster = character(), @@ -243,7 +245,7 @@ find_cpc.consensus_BayesMallows <- function(group_df){ ) n_items <- max(group_df$value) - for(i in seq(from = 1, to = n_items, by = 1)){ + for (i in seq(from = 1, to = n_items, by = 1)) { # Filter out the relevant rows tmp_df <- dplyr::filter(group_df, .data$value == i) @@ -266,7 +268,7 @@ find_cpc.consensus_BayesMallows <- function(group_df){ } # Internal function for finding CP consensus. -find_cpc.consensus_SMCMallows <- function(group_df){ +find_cpc.consensus_SMCMallows <- function(group_df) { # Declare the result dataframe before adding rows to it result <- dplyr::tibble( cluster = character(), @@ -275,7 +277,7 @@ find_cpc.consensus_SMCMallows <- function(group_df){ cumprob = numeric() ) n_items <- max(group_df$value) - for(i in seq(from = 1, to = n_items, by = 1)){ + for (i in seq(from = 1, to = n_items, by = 1)) { # Filter out the relevant rows tmp_df <- dplyr::filter(group_df, group_df$value == i) @@ -303,7 +305,7 @@ find_cpc.consensus_SMCMallows <- function(group_df){ return(result) } -.compute_map_consensus.consensus_BayesMallows <- function(df){ +.compute_map_consensus.consensus_BayesMallows <- function(df) { # Store the total number of iterations after burnin n_samples <- length(unique(df$iteration)) @@ -342,7 +344,7 @@ find_cpc.consensus_SMCMallows <- function(group_df){ # Sort according to cluster and ranking df <- dplyr::arrange(df, .data$cluster, .data$map_ranking) - if(length(unique(df$cluster)) == 1){ + if (length(unique(df$cluster)) == 1) { df <- dplyr::select(df, -.data$cluster) } @@ -353,12 +355,12 @@ find_cpc.consensus_SMCMallows <- function(group_df){ } #AS: added one extra line of code to resolve of the issues in #118 with plotting too many rows in compute_rho_consensus -.compute_map_consensus.consensus_SMCMallows <- function(model_fit, burnin = model_fit$burnin){ - if(is.null(burnin)){ +.compute_map_consensus.consensus_SMCMallows <- function(model_fit, burnin = model_fit$burnin) { + if (is.null(burnin)) { stop("Please specify the burnin.") } - if(burnin != 0){ + if (burnin != 0) { df <- dplyr::filter(model_fit, .data$iteration > burnin) } else { df <- model_fit diff --git a/R/compute_mallows.R b/R/compute_mallows.R index 01b7710b..7cc7ce6d 100644 --- a/R/compute_mallows.R +++ b/R/compute_mallows.R @@ -237,17 +237,17 @@ compute_mallows <- function(rankings = NULL, constraints = NULL, save_ind_clus = FALSE, seed = NULL - ){ + ) { - if(!is.null(seed)) set.seed(seed) + if (!is.null(seed)) set.seed(seed) # Check if there are NAs in rankings, if it is provided - if(!is.null(rankings)){ - if(na_action == "fail" && any(is.na(rankings))){ + if (!is.null(rankings)) { + if (na_action == "fail" && any(is.na(rankings))) { stop("rankings matrix contains NA values") } - if(na_action == "omit" && any(is.na(rankings))){ + if (na_action == "omit" && any(is.na(rankings))) { keeps <- apply(rankings, 1, function(x) !any(is.na(x))) print(paste("Omitting", sum(keeps), "rows from rankings due to NA values")) rankings <- rankings[keeps, , drop = FALSE] @@ -255,46 +255,46 @@ compute_mallows <- function(rankings = NULL, } # Check that at most one of rankings and preferences is set - if(is.null(rankings) && is.null(preferences)){ + if (is.null(rankings) && is.null(preferences)) { stop("Either rankings or preferences (or both) must be provided.") } - if(is.null(preferences) && !is.null(error_model)){ + if (is.null(preferences) && !is.null(error_model)) { stop("Error model requires preferences to be set.") } # Check if obs_freq are provided - if(!is.null(obs_freq)){ - if(is.null(rankings)){ + if (!is.null(obs_freq)) { + if (is.null(rankings)) { stop("rankings matrix must be provided when obs_freq are provided") } - if(nrow(rankings) != length(obs_freq)){ + if (nrow(rankings) != length(obs_freq)) { stop("obs_freq must be of same length as the number of rows in rankings") } } - if(!swap_leap > 0) stop("swap_leap must be strictly positive") - if(nmc <= 0) stop("nmc must be strictly positive") + if (!swap_leap > 0) stop("swap_leap must be strictly positive") + if (nmc <= 0) stop("nmc must be strictly positive") # Check that we do not jump over all alphas - if(alpha_jump >= nmc) stop("alpha_jump must be strictly smaller than nmc") + if (alpha_jump >= nmc) stop("alpha_jump must be strictly smaller than nmc") # Check that we do not jump over all rhos - if(rho_thinning >= nmc) stop("rho_thinning must be strictly smaller than nmc") - if(aug_thinning >= nmc) stop("aug_thinning must be strictly smaller than nmc") + if (rho_thinning >= nmc) stop("rho_thinning must be strictly smaller than nmc") + if (aug_thinning >= nmc) stop("aug_thinning must be strictly smaller than nmc") - if(lambda <= 0) stop("exponential rate parameter lambda must be strictly positive") + if (lambda <= 0) stop("exponential rate parameter lambda must be strictly positive") # Check that all rows of rankings are proper permutations - if(!is.null(rankings) && validate_rankings && !all(apply(rankings, 1, validate_permutation))){ + if (!is.null(rankings) && validate_rankings && !all(apply(rankings, 1, validate_permutation))) { stop("invalid permutations provided in rankings matrix") } # Deal with pairwise comparisons. Generate rankings compatible with them. - if(!is.null(preferences) && is.null(error_model)){ + if (!is.null(preferences) && is.null(error_model)) { - if(!inherits(preferences, "BayesMallowsTC")){ + if (!inherits(preferences, "BayesMallowsTC")) { message("Generating transitive closure of preferences.") # Make sure the preference columns are double preferences <- dplyr::mutate(preferences, @@ -304,15 +304,15 @@ compute_mallows <- function(rankings = NULL, preferences <- generate_transitive_closure(preferences) } - if(is.null(rankings)){ + if (is.null(rankings)) { message("Generating initial ranking.") rankings <- generate_initial_ranking(preferences) } - } else if(!is.null(error_model)){ + } else if (!is.null(error_model)) { stopifnot(error_model == "bernoulli") n_items <- max(c(preferences$bottom_item, preferences$top_item)) n_assessors <- length(unique(preferences$assessor)) - if(is.null(rankings)){ + if (is.null(rankings)) { rankings <- replicate(n_assessors, sample(x = n_items, size = n_items), simplify = "numeric") rankings <- matrix(rankings, ncol = n_items, nrow = n_assessors, byrow = TRUE) } @@ -322,41 +322,41 @@ compute_mallows <- function(rankings = NULL, n_items <- ncol(rankings) # If any row of rankings has only one missing value, replace it with the implied ranking - if(any(is.na(rankings))){ + if (any(is.na(rankings))) { dn <- dimnames(rankings) rankings <- lapply(split(rankings, f = seq_len(nrow(rankings))), function(x) { - if(sum(is.na(x)) == 1) x[is.na(x)] <- setdiff(1:length(x), x) + if (sum(is.na(x)) == 1) x[is.na(x)] <- setdiff(1:length(x), x) return(x) }) rankings <- do.call(rbind, rankings) dimnames(rankings) <- dn } - if(!is.null(rho_init)) { - if(!validate_permutation(rho_init)) stop("rho_init must be a proper permutation") - if(!(sum(is.na(rho_init)) == 0)) stop("rho_init cannot have missing values") - if(length(rho_init) != n_items) stop("rho_init must have the same number of items as implied by rankings or preferences") + if (!is.null(rho_init)) { + if (!validate_permutation(rho_init)) stop("rho_init must be a proper permutation") + if (!(sum(is.na(rho_init)) == 0)) stop("rho_init cannot have missing values") + if (length(rho_init) != n_items) stop("rho_init must have the same number of items as implied by rankings or preferences") rho_init <- matrix(rho_init, ncol = 1) } # Generate the constraint set - if(!is.null(preferences) && is.null(constraints)){ + if (!is.null(preferences) && is.null(constraints)) { constraints <- generate_constraints(preferences, n_items) - } else if (is.null(constraints)){ + } else if (is.null(constraints)) { constraints <- list() } - if(is.null(obs_freq)) obs_freq <- rep(1, nrow(rankings)) + if (is.null(obs_freq)) obs_freq <- rep(1, nrow(rankings)) logz_list <- prepare_partition_function(logz_estimate, metric, n_items) - if(save_ind_clus){ + if (save_ind_clus) { abort <- readline( prompt = paste(nmc, "csv files will be saved in your current working directory.", "Proceed? (yes/no): ")) - if(tolower(abort) %in% c("n", "no")) stop() + if (tolower(abort) %in% c("n", "no")) stop() } # Transpose rankings to get samples along columns, since we typically want @@ -387,7 +387,7 @@ compute_mallows <- function(rankings = NULL, save_ind_clus = save_ind_clus ) - if(verbose){ + if (verbose) { print("Metropolis-Hastings algorithm completed. Post-processing data.") } @@ -407,7 +407,7 @@ compute_mallows <- function(rankings = NULL, fit$save_clus <- save_clus # Add names of item - if(!is.null(colnames(rankings))) { + if (!is.null(colnames(rankings))) { fit$items <- colnames(rankings) } else { fit$items <- paste("Item", seq(from = 1, to = nrow(fit$rho), by = 1)) diff --git a/R/compute_mallows_mixtures.R b/R/compute_mallows_mixtures.R index 68c03dbd..83fcd52f 100644 --- a/R/compute_mallows_mixtures.R +++ b/R/compute_mallows_mixtures.R @@ -22,18 +22,18 @@ #' #' @example /inst/examples/compute_mallows_mixtures_example.R #' -compute_mallows_mixtures <- function(n_clusters, ..., cl = NULL){ +compute_mallows_mixtures <- function(n_clusters, ..., cl = NULL) { stopifnot(is.numeric(n_clusters)) stopifnot(is.null(cl) || inherits(cl, "cluster")) - if(is.null(cl)){ + if (is.null(cl)) { models <- lapply(n_clusters, function(x) { compute_mallows(..., n_clusters = x) }) } else { args <- list(...) parallel::clusterExport(cl = cl, varlist = "args", envir = environment()) - models <- parallel::parLapply(cl = cl, X = n_clusters, fun = function(x){ + models <- parallel::parLapply(cl = cl, X = n_clusters, fun = function(x) { args$n_clusters <- x do.call(compute_mallows, args) }) diff --git a/R/compute_posterior_intervals.R b/R/compute_posterior_intervals.R index 1523f0dd..c3113a7f 100644 --- a/R/compute_posterior_intervals.R +++ b/R/compute_posterior_intervals.R @@ -50,7 +50,7 @@ compute_posterior_intervals.BayesMallows <- function( ) { stopifnot(class(model_fit) == "BayesMallows") - if(is.null(burnin)){ + if (is.null(burnin)) { stop("Please specify the burnin.") } @@ -60,14 +60,14 @@ compute_posterior_intervals.BayesMallows <- function( df <- dplyr::filter(model_fit[[parameter]], .data$iteration > burnin) - if(parameter == "alpha" || parameter == "cluster_probs"){ + if (parameter == "alpha" || parameter == "cluster_probs") { df <- dplyr::group_by(df, .data$cluster) class(df) <- c("posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame") df <- .compute_posterior_intervals(df, parameter, level, decimals) - } else if(parameter == "rho"){ + } else if (parameter == "rho") { decimals <- 0 df <- dplyr::group_by(df, .data$cluster, .data$item) class(df) <- c("posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame") @@ -77,7 +77,7 @@ compute_posterior_intervals.BayesMallows <- function( df <- dplyr::ungroup(df) - if(model_fit$n_clusters == 1) df <- dplyr::select(df, -.data$cluster) + if (model_fit$n_clusters == 1) df <- dplyr::select(df, -.data$cluster) return(df) } @@ -129,14 +129,14 @@ compute_posterior_intervals.SMCMallows <- function( .compute_posterior_intervals.posterior_BayesMallows <- function( df, parameter, level, decimals, discrete = FALSE, ... -){ +) { dplyr::do(df, { format <- paste0("%.", decimals, "f") posterior_mean <- round(base::mean(.data$value), decimals) posterior_median <- round(stats::median(.data$value), decimals) - if(discrete) { + if (discrete) { df <- dplyr::group_by(.data, .data$value) df <- dplyr::summarise(df, n = dplyr::n()) @@ -163,7 +163,7 @@ compute_posterior_intervals.SMCMallows <- function( hpdi <- HDInterval::hdi(.data$value, credMass = level, allowSplit = TRUE) hpdi[] <- sprintf(format, hpdi) - if(is.matrix(hpdi)){ + if (is.matrix(hpdi)) { # Discontinous case hpdi <- paste(apply(hpdi, 1, function(x) paste0("[", x[[1]], ",", x[[2]], "]"))) } else { diff --git a/R/estimate_partition_function.R b/R/estimate_partition_function.R index 3e3f6c30..f2c34af2 100644 --- a/R/estimate_partition_function.R +++ b/R/estimate_partition_function.R @@ -52,31 +52,31 @@ estimate_partition_function <- function(method = "importance_sampling", alpha_vector, n_items, metric, nmc, degree, n_iterations, K, cl = NULL, - seed = NULL){ + seed = NULL) { stopifnot(degree < length(alpha_vector)) - if(method == "importance_sampling"){ - if(!is.null(cl)){ + if (method == "importance_sampling") { + if (!is.null(cl)) { # Split nmc into each cluster nmc_vec <- rep(floor(nmc / length(cl)), length(cl)) i <- 1 - while(sum(nmc_vec) != nmc){ + while (sum(nmc_vec) != nmc) { nmc_vec[i] <- nmc_vec[i] + 1 - if(i > length(cl)) break() + if (i > length(cl)) break } parallel::clusterExport(cl, c("alpha_vector", "n_items", "metric", "seed"), envir = environment()) - estimates <- parallel::parLapply(cl, nmc_vec, function(x){ - if(!is.null(seed)) set.seed(seed) + estimates <- parallel::parLapply(cl, nmc_vec, function(x) { + if (!is.null(seed)) set.seed(seed) compute_importance_sampling_estimate(alpha_vector = alpha_vector, n_items = n_items, metric = metric, nmc = x) }) log_z <- rowMeans(do.call(cbind, estimates)) } else { - if(!is.null(seed)) set.seed(seed) + if (!is.null(seed)) set.seed(seed) log_z <- as.numeric( compute_importance_sampling_estimate( alpha_vector = alpha_vector, n_items = n_items, @@ -88,7 +88,7 @@ estimate_partition_function <- function(method = "importance_sampling", alpha = alpha_vector, log_z = log_z ) - } else if(method == "asymptotic"){ + } else if (method == "asymptotic") { stopifnot(metric %in% c("footrule", "spearman")) estimate <- dplyr::tibble( diff --git a/R/expected_dist.R b/R/expected_dist.R index 65112db6..d026f1e5 100644 --- a/R/expected_dist.R +++ b/R/expected_dist.R @@ -10,32 +10,32 @@ #' #' @example /inst/examples/expected_dist_example.R -expected_dist <- function(alpha,n_items,metric){ +expected_dist <- function(alpha, n_items, metric) { - if(n_items < 1 | floor(n_items) != n_items){ + if (n_items < 1 | floor(n_items) != n_items) { stop("Number of items must be a positive integer") } # Scale alpha to parametrization used alpha <- alpha / n_items - if(alpha < 0){ + if (alpha < 0) { stop("alpha must be a non-negative value") }else{ - if(metric=="kendall"){ - out <- exp_d_tau(alpha,n_items) + if (metric == "kendall") { + out <- exp_d_tau(alpha, n_items) } - if(metric=="cayley"){ - out <- exp_d_cay(alpha,n_items) + if (metric == "cayley") { + out <- exp_d_cay(alpha, n_items) } - if(metric=="hamming"){ - out <- exp_d_ham(alpha,n_items) + if (metric == "hamming") { + out <- exp_d_ham(alpha, n_items) } - if(metric%in%c("ulam","footrule","spearman")){ + if (metric %in% c("ulam", "footrule", "spearman")) { pfd <- dplyr::filter(partition_function_data, .data$metric == !!metric, .data$n_items == !!n_items, .data$type == "cardinalities") - if(nrow(pfd) == 0){ + if (nrow(pfd) == 0) { stop("Given number of items currently not available for the specified metric") } else{ card <- pfd$values[[1]] diff --git a/R/generate_constraints.R b/R/generate_constraints.R index a905c098..15976e83 100644 --- a/R/generate_constraints.R +++ b/R/generate_constraints.R @@ -23,7 +23,7 @@ #' #' @example /inst/examples/generate_constraints_example.R #' -generate_constraints <- function(preferences, n_items, cl = NULL){ +generate_constraints <- function(preferences, n_items, cl = NULL) { stopifnot(is.null(cl) || inherits(cl, "cluster")) @@ -31,7 +31,7 @@ generate_constraints <- function(preferences, n_items, cl = NULL){ # one list element per assessor constraints <- split(preferences[, c("bottom_item", "top_item"), drop = FALSE], preferences$assessor) - if(is.null(cl)) { + if (is.null(cl)) { lapply(constraints, constraint_fun, n_items) } else { parallel::parLapply(cl = cl, X = constraints, fun = constraint_fun, n_items) @@ -40,7 +40,7 @@ generate_constraints <- function(preferences, n_items, cl = NULL){ -constraint_fun <- function(x, n_items){ +constraint_fun <- function(x, n_items) { # Find out which items are constrained constrained_items <- unique(c(x[["bottom_item"]], x[["top_item"]])) diff --git a/R/generate_initial_ranking.R b/R/generate_initial_ranking.R index 2709d684..85c0f34d 100644 --- a/R/generate_initial_ranking.R +++ b/R/generate_initial_ranking.R @@ -69,25 +69,25 @@ generate_initial_ranking <- function(tc, n_items = max(tc[, c("bottom_item", "top_item")]), cl = NULL, shuffle_unranked = FALSE, random = FALSE, - random_limit = 8L){ + random_limit = 8L) { - if(!("BayesMallowsTC" %in% class(tc))){ + if (!("BayesMallowsTC" %in% class(tc))) { stop("tc must be an object returned from generate_transitive_closure") } stopifnot(is.null(cl) || inherits(cl, "cluster")) - if(n_items > random_limit && random){ + if (n_items > random_limit && random) { stop(paste("Number of items exceeds the limit for generation of random permutations,\n", "modify the random_limit argument to override this.\n")) } - if(n_items < max(tc[, c("bottom_item", "top_item")])){ + if (n_items < max(tc[, c("bottom_item", "top_item")])) { stop("Too few items specified. Please see documentation Note about labeling of items.\n") } prefs <- split(tc[, c("bottom_item", "top_item"), drop = FALSE], tc$assessor) - if(is.null(cl)){ + if (is.null(cl)) { prefs <- lapply(prefs, function(x, y, sr, r) create_ranks(as.matrix(x), y, sr, r), n_items, shuffle_unranked, random) } else { @@ -99,15 +99,15 @@ generate_initial_ranking <- function(tc, do.call(rbind, prefs) } -create_ranks <- function(mat, n_items, shuffle_unranked, random){ +create_ranks <- function(mat, n_items, shuffle_unranked, random) { - if(!random){ + if (!random) { g <- igraph::graph_from_edgelist(mat) g <- as.integer(igraph::topo_sort(g)) all_items <- seq(from = 1, to = n_items, by = 1) - if(!shuffle_unranked){ + if (!shuffle_unranked) { # Add unranked elements outside of the range at the end g_final <- c(g, setdiff(all_items, g)) @@ -127,7 +127,7 @@ create_ranks <- function(mat, n_items, shuffle_unranked, random){ mat <- matrix(r, nrow = 1) } else{ graph <- list() - for(i in seq_len(n_items)){ + for (i in seq_len(n_items)) { graph[[i]] <- unique(mat[mat[, "top_item"] == i, "bottom_item"]) } indegree_init <- rep(0, n_items) @@ -138,8 +138,8 @@ create_ranks <- function(mat, n_items, shuffle_unranked, random){ discovered <- rep(FALSE, n_items) path <- numeric() - stdout <- vector('character') - con <- textConnection('stdout', 'wr', local = TRUE) + stdout <- vector("character") + con <- textConnection("stdout", "wr", local = TRUE) sink(con) all_topological_sorts(graph, path, discovered, n_items) sink() diff --git a/R/generate_transitive_closure.R b/R/generate_transitive_closure.R index 66dfcd34..1835af8d 100644 --- a/R/generate_transitive_closure.R +++ b/R/generate_transitive_closure.R @@ -41,12 +41,12 @@ #' #' @example /inst/examples/generate_transitive_closure_example.R #' -generate_transitive_closure <- function(df, cl = NULL){ +generate_transitive_closure <- function(df, cl = NULL) { stopifnot(is.null(cl) || inherits(cl, "cluster")) prefs <- split(df[, c("bottom_item", "top_item"), drop = FALSE], df$assessor) - if(is.null(cl)){ + if (is.null(cl)) { prefs <- mapply(function(x, y) cbind(y, .generate_transitive_closure(cbind(x$bottom_item, x$top_item))), prefs, unique(df$assessor), SIMPLIFY = FALSE) } else { @@ -66,7 +66,7 @@ generate_transitive_closure <- function(df, cl = NULL){ "bottom_item" = "top_item", "top_item" = "bottom_item")) - if(nrow(check) > 0){ + if (nrow(check) > 0) { print("Inconsistent rankings:") print(check) stop("Cannot compute transitive closure. Please run compute_mallows with error_model='bernoulli'.") @@ -83,7 +83,7 @@ generate_transitive_closure <- function(df, cl = NULL){ #' @param mat A matrix in which column 1 is the lower ranked item and column 2 is the #' upper ranked item. #' @keywords internal -.generate_transitive_closure <- function(mat){ +.generate_transitive_closure <- function(mat) { # This line was an answer to StackOverflow question 51794127 my_set <- do.call(sets::set, apply(mat, 1, sets::as.tuple)) @@ -104,4 +104,3 @@ generate_transitive_closure <- function(df, cl = NULL){ return(result) } - diff --git a/R/lik_db_mix.R b/R/lik_db_mix.R index 2cbe591f..3587f221 100644 --- a/R/lik_db_mix.R +++ b/R/lik_db_mix.R @@ -31,23 +31,23 @@ #' @example inst/examples/lik_db_mix_example.R #' lik_db_mix <- function(rho, alpha, weights, metric, - rankings, obs_freq = NULL, log = FALSE){ + rankings, obs_freq = NULL, log = FALSE) { - if(!is.matrix(rankings)){ + if (!is.matrix(rankings)) { rankings <- matrix(rankings, nrow = 1) } - if(!is.null(obs_freq)){ - if(nrow(rankings) != length(obs_freq)){ + if (!is.null(obs_freq)) { + if (nrow(rankings) != length(obs_freq)) { stop("obs_freq must be of same length as the number of rows in rankings") } } - if(!is.matrix(rho)){ + if (!is.matrix(rho)) { rho <- matrix(rho, nrow = 1) } - if(is.null(obs_freq)){ + if (is.null(obs_freq)) { obs_freq <- rep(1, nrow(rankings)) } @@ -63,7 +63,7 @@ lik_db_mix <- function(rho, alpha, weights, metric, rankings = rankings, obs_freq = obs_freq) - if(!log) out <- exp(out) + if (!log) out <- exp(out) return(out) } diff --git a/R/misc.R b/R/misc.R index 213f5988..a617fa47 100644 --- a/R/misc.R +++ b/R/misc.R @@ -4,7 +4,7 @@ NULL -.onUnload <- function (libpath) { +.onUnload <- function(libpath) { library.dynam.unload("BayesMallows", libpath) } @@ -12,10 +12,10 @@ NULL #' @param vec a vector #' @return TRUE if vec is a permutation #' @keywords internal -validate_permutation <- function(vec){ - if(!any(is.na(vec))){ +validate_permutation <- function(vec) { + if (!any(is.na(vec))) { return(all(sort(vec) == seq_along(vec))) - } else if(all(is.na(vec))){ + } else if (all(is.na(vec))) { return(TRUE) } else { return(all(vec[!is.na(vec)] <= length(vec)) && @@ -33,16 +33,16 @@ scalefun <- function(x) sprintf("%d", as.integer(x)) -prepare_partition_function <- function(logz_estimate, metric, n_items){ +prepare_partition_function <- function(logz_estimate, metric, n_items) { # First, has the user supplied an estimate? - if(!is.null(logz_estimate)){ + if (!is.null(logz_estimate)) { return(list(cardinalities = NULL, logz_estimate = logz_estimate)) } # Second, do we have a sequence? relevant_params <- dplyr::filter(partition_function_data, .data$n_items == !!n_items, .data$metric == !!metric, .data$type == "cardinalities") - if(nrow(relevant_params) == 1){ + if (nrow(relevant_params) == 1) { return(list(cardinalities = unlist(relevant_params$values), logz_estimate = NULL)) } @@ -50,12 +50,12 @@ prepare_partition_function <- function(logz_estimate, metric, n_items){ relevant_params <- dplyr::filter(partition_function_data, .data$n_items == !!n_items, .data$metric == !!metric, .data$type == "importance_sampling") - if(nrow(relevant_params) == 1){ + if (nrow(relevant_params) == 1) { return(list(cardinalities = NULL, logz_estimate = unlist(relevant_params$values))) } # Fourth, is it the Ulam distance? - if(metric == "ulam"){ + if (metric == "ulam") { return(list( cardinalities = unlist(lapply(0:(n_items - 1), function(x) PerMallows::count.perms(perm.length = n_items, dist.value = x, dist.name = "ulam"))) @@ -63,7 +63,7 @@ prepare_partition_function <- function(logz_estimate, metric, n_items){ } # Fifth, can we compute the partition function in our C++ code? - if(metric %in% c("cayley", "hamming", "kendall")){ + if (metric %in% c("cayley", "hamming", "kendall")) { return(list(cardinalities = NULL, logz_estimate = NULL)) } @@ -74,7 +74,7 @@ prepare_partition_function <- function(logz_estimate, metric, n_items){ # function taken from PLMIX package: # Copyright Cristina Mollica and Luca Tardella -unit_to_freq <- function (data) { +unit_to_freq <- function(data) { data <- fill_single_entries(data = data) K <- ncol(data) freq <- table(apply(data, 1, paste, collapse = "-")) @@ -88,11 +88,11 @@ unit_to_freq <- function (data) { # function taken from PLMIX package: # Copyright Cristina Mollica and Luca Tardella -fill_single_entries <- function (data) { +fill_single_entries <- function(data) { if (is.vector(data)) { data <- t(data) } - K = ncol(data) + K <- ncol(data) r_single_miss <- (rowSums(data == 0) == 1) if (any(r_single_miss)) { w_row <- which(r_single_miss) @@ -109,15 +109,15 @@ fill_single_entries <- function (data) { ## Source: https://stackoverflow.com/questions/11095992/generating-all-distinct-permutations-of-a-list-in-r -permutations <- function(n){ - if(n==1){ +permutations <- function(n) { + if (n == 1) { return(matrix(1)) } else { - sp <- permutations(n-1) + sp <- permutations(n - 1) p <- nrow(sp) - A <- matrix(nrow=n*p,ncol=n) - for(i in 1:n){ - A[(i-1)*p+1:p,] <- cbind(i,sp+(sp>=i)) + A <- matrix(nrow = n * p, ncol = n) + for (i in 1:n) { + A[(i - 1) * p + 1:p, ] <- cbind(i, sp + (sp >= i)) } return(A) } diff --git a/R/misc_expected_dist.R b/R/misc_expected_dist.R index d619999a..e2667ab2 100644 --- a/R/misc_expected_dist.R +++ b/R/misc_expected_dist.R @@ -5,14 +5,15 @@ #/' #/' @return Expected value of the Kendall metric under the Mallows rank model with the Kendall distance. -exp_d_tau <- function(alpha,n_items){ - if(alpha>0){ +exp_d_tau <- function(alpha, n_items) { + if (alpha > 0) { idx <- seq(from = 1, to = n_items, by = 1) - out <- n_items*exp(-alpha)/(1-exp(-alpha))-sum((idx*exp(-idx*alpha))/(1-exp(-idx*alpha))) - }else{ - if(alpha==0){ - out <- n_items*(n_items-1)/4 - }else{ + out <- n_items * exp(-alpha) / (1 - exp(-alpha)) - + sum((idx * exp(-idx * alpha)) / (1 - exp(-idx * alpha))) + } else { + if (alpha == 0) { + out <- n_items * (n_items - 1) / 4 + } else { stop("alpha must be a non-negative value") } } @@ -26,9 +27,9 @@ exp_d_tau <- function(alpha,n_items){ #/' #/' @return Expected value of the Cayley metric under the Mallows rank model with the Cayley distance. -exp_d_cay <- function(alpha,n_items){ +exp_d_cay <- function(alpha, n_items) { idx <- seq(from = 1, to = n_items - 1, by = 1) - out <- sum(idx/(idx+exp(alpha))) + out <- sum(idx / (idx + exp(alpha))) return(out) } @@ -39,9 +40,11 @@ exp_d_cay <- function(alpha,n_items){ #/' #/' @return Expected value of the Hamming metric under the Mallows rank model with the Hamming distance. -exp_d_ham <- function(alpha,n_items){ +exp_d_ham <- function(alpha, n_items) { idx <- seq(from = 0, to = n_items, by = 1) - out <- n_items-exp(alpha)*sum(((exp(alpha)-1)^idx[-(n_items+1)])/base::factorial(idx[-(n_items+1)]))/sum(((exp(alpha)-1)^idx)/base::factorial(idx)) + out <- n_items - exp(alpha) * + sum(((exp(alpha) - 1)^idx[- (n_items + 1)]) / base::factorial(idx[- (n_items + 1)])) / + sum(((exp(alpha) - 1)^idx) / base::factorial(idx)) return(out) } @@ -53,20 +56,17 @@ exp_d_ham <- function(alpha,n_items){ #/' @return Expected value of the Ulam metric under the Mallows rank model with the Ulam distance. # The function based on the command from the PerMallows package is slow because it has to generate the distance frequencies. - -# exp_d_ulam_old=function(alpha,n_items){ -# out=PerMallows::expectation.mm(theta=alpha,perm.length=n_items,dist.name="ulam") -# return(out) -# } - -exp_d_ulam <- function(alpha,n_items){ # for n_items<=95 +exp_d_ulam <- function(alpha, n_items) { # for n_items<=95 idx <- seq(from = 0, to = n_items - 1, by = 1) pfd <- partition_function_data - card <- pfd$values[pfd$metric=="ulam"][[n_items]] - norm_const <- exp(get_partition_function(alpha=alpha*n_items,n_items=n_items, - metric="ulam", - cardinalities=card)) - out <- sum(idx*exp(-alpha*idx)*card)/norm_const + card <- pfd$values[pfd$metric == "ulam"][[n_items]] + norm_const <- exp( + get_partition_function( + alpha = alpha * n_items, n_items = n_items, metric = "ulam", + cardinalities = card + ) + ) + out <- sum(idx * exp(-alpha * idx) * card) / norm_const return(out) } @@ -77,16 +77,17 @@ exp_d_ulam <- function(alpha,n_items){ # for n_items<=95 #/' #/' @return Expected value of the Footrule metric under the Mallows rank model with the Footrule distance. -exp_d_foot <- function(alpha,n_items){ # for n_items<=50 - idx <- seq(0,floor(n_items^2/2),by=2) +exp_d_foot <- function(alpha, n_items) { # for n_items<=50 + idx <- seq(0, floor(n_items^2 / 2), by = 2) pfd <- partition_function_data - card <- pfd$values[pfd$metric=="footrule"][[n_items]] - norm_const <- exp(get_partition_function(alpha=alpha*n_items,n_items=n_items, - metric="footrule", - cardinalities=card)) - # print(length(idx)) - # print(length(card)) - out <- sum(idx*exp(-alpha*idx)*card)/norm_const + card <- pfd$values[pfd$metric == "footrule"][[n_items]] + norm_const <- exp( + get_partition_function( + alpha = alpha * n_items, n_items = n_items, metric = "footrule", + cardinalities = card + ) + ) + out <- sum(idx * exp(-alpha * idx) * card) / norm_const return(out) } @@ -97,15 +98,16 @@ exp_d_foot <- function(alpha,n_items){ # for n_items<=50 #/' #/' @return Expected value of the Spearman metric under the Mallows rank model with the Spearman distance. -exp_d_spear <- function(alpha,n_items){ # for n_items<=14 - idx <- seq(0,2*base::choose(n_items+1,3),by=2) +exp_d_spear <- function(alpha, n_items) { # for n_items<=14 + idx <- seq(0, 2 * base::choose(n_items + 1, 3), by = 2) pfd <- partition_function_data - card <- pfd$values[pfd$metric=="spearman"][[n_items]] - norm_const <- exp(get_partition_function(alpha=alpha*n_items,n_items=n_items, - metric="spearman", - cardinalities=card)) - # print(length(idx)) - # print(length(card)) - out <- sum(idx*exp(-alpha*idx)*card)/norm_const + card <- pfd$values[pfd$metric == "spearman"][[n_items]] + norm_const <- exp( + get_partition_function( + alpha = alpha * n_items, n_items = n_items, metric = "spearman", + cardinalities = card + ) + ) + out <- sum(idx * exp(-alpha * idx) * card) / norm_const return(out) } diff --git a/R/misc_likelihood.R b/R/misc_likelihood.R index a941e819..d1d04216 100644 --- a/R/misc_likelihood.R +++ b/R/misc_likelihood.R @@ -11,29 +11,38 @@ #/' @return The log-likelihood value corresponding to one or more observed rankings under the Mallows rank model with distance specified by the \code{metric} argument. #/' -log_lik_db <- function(rho, alpha, metric, rankings, obs_freq){ +log_lik_db <- function(rho, alpha, metric, rankings, obs_freq) { N <- sum(obs_freq) n_items <- ncol(rankings) - if(metric %in% c("kendall", "cayley", "hamming")){ - log_lik <- -(alpha*rank_dist_sum(rankings=t(rankings),rho=rho,metric=metric,obs_freq=obs_freq)+N*get_partition_function(n_items=n_items,alpha=alpha*n_items,metric=metric)) + if (metric %in% c("kendall", "cayley", "hamming")) { + log_lik <- - ( + alpha * + rank_dist_sum( + rankings = t(rankings), rho = rho, metric = metric, obs_freq = obs_freq + ) + + N * + get_partition_function( + n_items = n_items, alpha = alpha * n_items, metric = metric + ) + ) } - if(metric %in% c( "ulam", "footrule", "spearman")){ + if (metric %in% c("ulam", "footrule", "spearman")) { pfd <- dplyr::filter(partition_function_data, .data$metric == !!metric, .data$n_items == !!n_items, .data$type == "cardinalities") - if(nrow(pfd) == 0){ + if (nrow(pfd) == 0) { stop("Given number of items currently not available for the specified metric") } else{ card <- pfd$values[[1]] } - log_lik <- -( + log_lik <- - ( alpha * rank_dist_sum(rankings = t(rankings), rho = rho, metric = metric, obs_freq = obs_freq) + - N * get_partition_function( alpha = alpha * n_items, + N * get_partition_function(alpha = alpha * n_items, n_items = n_items, metric = metric, cardinalities = card)) #TODO #91: write this part in particular as new function? } @@ -62,19 +71,19 @@ log_lik_db <- function(rho, alpha, metric, rankings, obs_freq){ #/' log_lik_db_mix <- function(rho, alpha, weights, metric, - rankings,obs_freq){ + rankings, obs_freq) { L <- length(obs_freq) n_clusters <- length(weights) temp <- matrix(NA, nrow = n_clusters, ncol = L) - for(l in seq_len(L)){ - for(g in seq_len(n_clusters)){ + for (l in seq_len(L)) { + for (g in seq_len(n_clusters)) { temp[g, l] <- exp(log_lik_db( rho = rho[g, ], alpha = alpha[g], metric = metric, - rankings = rankings[ l, , drop = FALSE], + rankings = rankings[l, , drop = FALSE], obs_freq = obs_freq[l])) } } - log_lik <- sum(log(weights%*%temp)) + log_lik <- sum(log(weights %*% temp)) return(log_lik) } diff --git a/R/plot.BayesMallows.R b/R/plot.BayesMallows.R index c27265ac..b1091a20 100644 --- a/R/plot.BayesMallows.R +++ b/R/plot.BayesMallows.R @@ -24,19 +24,19 @@ #' #' @example /inst/examples/plot.BayesMallows_example.R #' -plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = NULL, ...){ +plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = NULL, ...) { # Note, the first argument must be named x, otherwise R CMD CHECK will # issue a warning. This is because plot.BayesMallows must have the same # required arguments as graphics::plot. - if(is.null(burnin)){ + if (is.null(burnin)) { stop("Please specify the burnin.") } - if(x$nmc <= burnin) stop("burnin must be <= nmc") + if (x$nmc <= burnin) stop("burnin must be <= nmc") stopifnot(parameter %in% c("alpha", "rho", "cluster_probs", "cluster_assignment", "theta")) - if(parameter == "alpha") { + if (parameter == "alpha") { df <- dplyr::filter(x$alpha, .data$iteration > burnin) p <- ggplot2::ggplot(df, ggplot2::aes(x = .data$value)) + @@ -44,22 +44,22 @@ plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = ggplot2::xlab(expression(alpha)) + ggplot2::ylab("Posterior density") - if(x$n_clusters > 1){ + if (x$n_clusters > 1) { p <- p + ggplot2::facet_wrap(~ .data$cluster, scales = "free_x") } return(p) - } else if(parameter == "rho") { + } else if (parameter == "rho") { - if(is.null(items) && x$n_items > 5){ + if (is.null(items) && x$n_items > 5) { message("Items not provided by user. Picking 5 at random.") items <- sample.int(x$n_items, 5) } else if (is.null(items) && x$n_items > 0) { items <- seq.int(from = 1, to = x$n_items) } - if(!is.character(items)){ + if (!is.character(items)) { items <- x$items[items] } @@ -78,14 +78,14 @@ plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = ggplot2::xlab("rank") + ggplot2::ylab("Posterior probability") - if(x$n_clusters == 1){ + if (x$n_clusters == 1) { p <- p + ggplot2::facet_wrap(~ .data$item) } else { p <- p + ggplot2::facet_wrap(~ .data$cluster + .data$item) } return(p) - } else if(parameter == "cluster_probs"){ + } else if (parameter == "cluster_probs") { df <- dplyr::filter(x$cluster_probs, .data$iteration > burnin) ggplot2::ggplot(df, ggplot2::aes(x = .data$value)) + @@ -94,9 +94,9 @@ plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = ggplot2::ylab("Posterior density") + ggplot2::facet_wrap(~ .data$cluster) - } else if(parameter == "cluster_assignment"){ + } else if (parameter == "cluster_assignment") { - if(is.null(x$cluster_assignment)){ + if (is.null(x$cluster_assignment)) { stop("Please rerun compute_mallows with save_clus = TRUE") } @@ -121,9 +121,9 @@ plot.BayesMallows <- function(x, burnin = x$burnin, parameter = "alpha", items = ) + ggplot2::xlab(paste0("Assessors (", min(assessor_order), " - ", max(assessor_order), ")")) - } else if(parameter == "theta") { + } else if (parameter == "theta") { - if(is.null(x$theta)){ + if (is.null(x$theta)) { stop("Please run compute_mallows with error_model = 'bernoulli'.") } diff --git a/R/plot_elbow.R b/R/plot_elbow.R index 3b599244..1da27321 100644 --- a/R/plot_elbow.R +++ b/R/plot_elbow.R @@ -23,32 +23,32 @@ #' #' @example /inst/examples/compute_mallows_mixtures_example.R #' -plot_elbow <- function(..., burnin = NULL){ +plot_elbow <- function(..., burnin = NULL) { # Put the models into a list. These are typically fitted with different number of clusters models <- list(...) # Taking into account the case where the user has entered a list of models - if(length(models) == 1 && !(class(models[[1]]) == "BayesMallows")){ + if (length(models) == 1 && !(class(models[[1]]) == "BayesMallows")) { models <- models[[1]] } df <- do.call(rbind, lapply(models, function(x) { stopifnot(class(x) == "BayesMallows") - if(!("burnin" %in% names(x))){ - if(is.null(burnin)){ + if (!("burnin" %in% names(x))) { + if (is.null(burnin)) { stop("burnin not provided") } else { x$burnin <- burnin } } - if(!x$include_wcd) stop("To get an elbow plot, set include_wcd=TRUE in compute_mallows") + if (!x$include_wcd) stop("To get an elbow plot, set include_wcd=TRUE in compute_mallows") df <- dplyr::filter(x$within_cluster_distance, .data$iteration > x$burnin) - if(nrow(df) <= 0) stop("burnin must be strictly smaller than the number of MCMC samples") + if (nrow(df) <= 0) stop("burnin must be strictly smaller than the number of MCMC samples") # Need to sum the within-cluster distances across clusters, for each iteration df <- dplyr::group_by(df, .data$iteration) diff --git a/R/plot_top_k.R b/R/plot_top_k.R index 0c32d496..d2b94011 100644 --- a/R/plot_top_k.R +++ b/R/plot_top_k.R @@ -25,7 +25,7 @@ #' plot_top_k <- function(model_fit, burnin = model_fit$burnin, k = 3, - rel_widths = c(model_fit$n_clusters, 10)){ + rel_widths = c(model_fit$n_clusters, 10)) { validate_top_k(model_fit, burnin) @@ -35,10 +35,10 @@ plot_top_k <- function(model_fit, burnin = model_fit$burnin, # Factors are not needed in this case rho <- dplyr::mutate(rho, item = as.character(.data$item)) rho <- dplyr::group_by(rho, .data$item, .data$cluster) - rho <- dplyr::summarise(rho, prob = dplyr::n()/n_samples, .groups = "drop") + rho <- dplyr::summarise(rho, prob = dplyr::n() / n_samples, .groups = "drop") # Find the complete set of items per cluster - rho <- do.call(rbind, lapply(split(rho, f = rho$cluster), function(dd){ + rho <- do.call(rbind, lapply(split(rho, f = rho$cluster), function(dd) { dd2 <- merge(dd, expand.grid(item = unique(rho$item)), by = "item", all = TRUE) dd2$cluster[is.na(dd2$cluster)] <- unique(dd$cluster) @@ -48,7 +48,7 @@ plot_top_k <- function(model_fit, burnin = model_fit$burnin, # Sort the items according to probability in Cluster 1 item_ordering <- compute_consensus(model_fit, type = "CP", burnin = burnin) - if(model_fit$n_clusters > 1){ + if (model_fit$n_clusters > 1) { item_ordering <- rev(item_ordering[item_ordering$cluster == "Cluster 1", ]$item) } else { item_ordering <- rev(item_ordering$item) @@ -57,7 +57,7 @@ plot_top_k <- function(model_fit, burnin = model_fit$burnin, rho <- dplyr::mutate(rho, item = factor(.data$item, levels = unique(item_ordering))) # Trick to make the plot look nicer - if(model_fit$n_clusters == 1){ + if (model_fit$n_clusters == 1) { rho <- dplyr::mutate(rho, cluster = "") } @@ -82,7 +82,7 @@ plot_top_k <- function(model_fit, burnin = model_fit$burnin, ggplot2::xlab(expression(rho)) + ggplot2::theme(legend.position = "none") - if(model_fit$n_clusters > 1){ + if (model_fit$n_clusters > 1) { rho_plot <- rho_plot + ggplot2::facet_wrap(~ .data$cluster) } diff --git a/R/predict_top_k.R b/R/predict_top_k.R index 5a8f1daa..37f6a196 100644 --- a/R/predict_top_k.R +++ b/R/predict_top_k.R @@ -23,7 +23,7 @@ #' @seealso \code{\link{plot_top_k}} #' predict_top_k <- function(model_fit, burnin = model_fit$burnin, - k = 3){ + k = 3) { validate_top_k(model_fit, burnin) .predict_top_k(model_fit, burnin, k) @@ -31,15 +31,15 @@ predict_top_k <- function(model_fit, burnin = model_fit$burnin, -.predict_top_k <- function(model_fit, burnin, k){ +.predict_top_k <- function(model_fit, burnin, k) { rankings <- dplyr::filter(model_fit$augmented_data, .data$iteration > burnin, .data$value <= k) n_samples <- length(unique(rankings$iteration)) rankings <- dplyr::mutate(rankings, item = as.character(.data$item)) rankings <- dplyr::group_by(rankings, .data$assessor, .data$item) - rankings <- dplyr::summarise(rankings, prob = dplyr::n()/n_samples, .groups = "drop") + rankings <- dplyr::summarise(rankings, prob = dplyr::n() / n_samples, .groups = "drop") - do.call(rbind, lapply(split(rankings, f = rankings$assessor), function(dd){ + do.call(rbind, lapply(split(rankings, f = rankings$assessor), function(dd) { dd2 <- merge(dd, expand.grid(item = unique(rankings$item)), by = "item", all = TRUE) dd2$assessor[is.na(dd2$assessor)] <- unique(dd$assessor) @@ -50,13 +50,13 @@ predict_top_k <- function(model_fit, burnin = model_fit$burnin, } -validate_top_k <- function(model_fit, burnin){ - if(is.null(burnin)){ +validate_top_k <- function(model_fit, burnin) { + if (is.null(burnin)) { stop("Please specify the burnin.") } stopifnot(burnin < model_fit$nmc) - if(!exists("augmented_data", model_fit)){ + if (!exists("augmented_data", model_fit)) { stop("model_fit must have element augmented_data. Please set save_aug = TRUE in compute_mallows in order to create a top-k plot.") } diff --git a/R/print.BayesMallows.R b/R/print.BayesMallows.R index 10cf1cc6..a288c90e 100644 --- a/R/print.BayesMallows.R +++ b/R/print.BayesMallows.R @@ -11,16 +11,14 @@ #' @export #' #' -print.BayesMallows <- function(x, ...){ +print.BayesMallows <- function(x, ...) { # Note, the first argument must be named x, otherwise R CMD CHECK will # issue a warning. This is because print.BayesMallows must have the same # required arguments as base::print. - if(is.null(x$n_items) || is.null(x$n_assessors)) { + if (is.null(x$n_items) || is.null(x$n_assessors)) { stop("BayesMallows object must have elements n_items and n_assessors.") } cat("Bayesian Mallows Model with", x$n_items, "items and", x$n_assessors, "assessors.\n") cat("Use functions assess_convergence() or plot() to visualize the object.") } - - diff --git a/R/print.BayesMallowsMixtures.R b/R/print.BayesMallowsMixtures.R index ac874384..3749b24d 100644 --- a/R/print.BayesMallowsMixtures.R +++ b/R/print.BayesMallowsMixtures.R @@ -11,12 +11,12 @@ #' @export #' #' -print.BayesMallowsMixtures <- function(x, ...){ +print.BayesMallowsMixtures <- function(x, ...) { # Note, the first argument must be named x, otherwise R CMD CHECK will # issue a warning. This is because print.BayesMallowsMixtures must have the same # required arguments as base::print. - if(!Reduce(`&`, lapply(x, function(x) inherits(x, "BayesMallows")))) { + if (!Reduce(`&`, lapply(x, function(x) inherits(x, "BayesMallows")))) { stop("All elements of a BayesMallowsMixtures object must be of class BayesMallows.") } @@ -25,5 +25,3 @@ print.BayesMallowsMixtures <- function(x, ...){ cat("Collection of", length(x), "Bayesian Mallows Mixture Models with the following number of mixture components:\n", paste0(paste(n_clusters, collapse = ", "), ".")) } - - diff --git a/R/rank_conversion.R b/R/rank_conversion.R index 0918bc4a..05f28e88 100644 --- a/R/rank_conversion.R +++ b/R/rank_conversion.R @@ -42,13 +42,13 @@ NULL #' @describeIn rank_conversion Convert from ordering to ranking. #' @export -create_ranking <- function(orderings){ +create_ranking <- function(orderings) { # Check that it is a permutation - if(is.vector(orderings)){ + if (is.vector(orderings)) { stopifnot(validate_permutation(orderings)) return(order(orderings)) - } else if(is.matrix(orderings)){ + } else if (is.matrix(orderings)) { n_items <- ncol(orderings) # Convert to list, for easier functional programming @@ -57,7 +57,7 @@ create_ranking <- function(orderings){ # Check that matrix contains permutations check <- lapply(orderings, validate_permutation) - if(!Reduce(`&&`, check)){ + if (!Reduce(`&&`, check)) { stop(paste("orderings must contain proper permutations. Problem row(s):", which(!check))) } @@ -74,7 +74,7 @@ create_ranking <- function(orderings){ inds[is.na(inds)] <- FALSE # Extract the correct items candidates[inds] - } ) + }) return(t(matrix(unlist(rankings), ncol = length(rankings)))) } else { @@ -84,7 +84,7 @@ create_ranking <- function(orderings){ #' @describeIn rank_conversion Convert from ranking to ordering. #' @export -create_ordering <- function(rankings){ +create_ordering <- function(rankings) { create_ranking(rankings) } diff --git a/R/rank_distance.R b/R/rank_distance.R index 4d464366..e0ec36be 100644 --- a/R/rank_distance.R +++ b/R/rank_distance.R @@ -20,15 +20,15 @@ #' @references \insertAllCited #' #' @example /inst/examples/rank_distance_example.R -rank_distance <- function(rankings, rho, metric, obs_freq = 1){ +rank_distance <- function(rankings, rho, metric, obs_freq = 1) { - if(!is.matrix(rankings)){ + if (!is.matrix(rankings)) { rankings <- matrix(rankings, nrow = 1) } stopifnot(length(obs_freq) == 1 || length(obs_freq) == nrow(rankings)) - if(length(obs_freq) == 1){ + if (length(obs_freq) == 1) { obs_freq <- rep(obs_freq, nrow(rankings)) } diff --git a/R/rank_freq_distr.R b/R/rank_freq_distr.R index f211f567..86911f92 100644 --- a/R/rank_freq_distr.R +++ b/R/rank_freq_distr.R @@ -13,15 +13,15 @@ #' #' @example /inst/examples/rank_freq_distr_example.R #' -rank_freq_distr <- function(rankings){ +rank_freq_distr <- function(rankings) { - if(!is.matrix(rankings)){ + if (!is.matrix(rankings)) { rankings <- matrix(rankings, nrow = 1) } rankings[is.na(rankings)] <- 0 out <- unit_to_freq(data = rankings) - out[out==0] <- NA + out[out == 0] <- NA return(out) diff --git a/R/sample_mallows.R b/R/sample_mallows.R index 7ae54070..557956e8 100644 --- a/R/sample_mallows.R +++ b/R/sample_mallows.R @@ -42,7 +42,7 @@ #' @example /inst/examples/sample_mallows_example.R #' sample_mallows <- function(rho0, alpha0, n_samples, - leap_size = max(1L, floor(n_items/5)), + leap_size = max(1L, floor(n_items / 5)), metric = "footrule", diagnostic = FALSE, burnin = ifelse(diagnostic, 0, 1000), @@ -51,19 +51,19 @@ sample_mallows <- function(rho0, alpha0, n_samples, max_lag = 1000L) { - if(!(validate_permutation(rho0) && sum(is.na(rho0)) == 0)){ + if (!(validate_permutation(rho0) && sum(is.na(rho0)) == 0)) { stop("rho0 must be a proper ranking with no missing values.") } - if(diagnostic && n_samples == 1){ + if (diagnostic && n_samples == 1) { stop("Must have more than one samples to create diagnostic plots") - } else if(n_samples <= 0){ + } else if (n_samples <= 0) { stop("n_samples must be positive.") } n_items <- length(rho0) - if(diagnostic){ + if (diagnostic) { internal_burnin <- 0 internal_thinning <- 1 internal_n_samples <- burnin + n_samples * thinning @@ -84,8 +84,8 @@ sample_mallows <- function(rho0, alpha0, n_samples, obs_freq = rep(1, internal_n_samples) )) - if(diagnostic){ - if(is.null(items_to_plot) && n_items > 5){ + if (diagnostic) { + if (is.null(items_to_plot) && n_items > 5) { message("Items not provided by user. Picking 5 at random.") items_to_plot <- sample.int(n_items, 5) } else { @@ -93,11 +93,11 @@ sample_mallows <- function(rho0, alpha0, n_samples, } # Compute the autocorrelation in the samples - autocorr <- apply(samples[ , items_to_plot, drop = FALSE], 2, stats::acf, + autocorr <- apply(samples[, items_to_plot, drop = FALSE], 2, stats::acf, lag.max = max_lag, plot = FALSE, demean = TRUE) names(autocorr) <- items_to_plot - autocorr <- do.call(rbind, Map(function(x, xnm){ + autocorr <- do.call(rbind, Map(function(x, xnm) { data.frame( item = xnm, acf = x$acf[, 1, 1], diff --git a/R/smc_post_processing_functions.R b/R/smc_post_processing_functions.R index 74476e28..07280934 100644 --- a/R/smc_post_processing_functions.R +++ b/R/smc_post_processing_functions.R @@ -11,9 +11,9 @@ smc_processing <- function(output, colnames = NULL) { df <- data.frame(data = output) # if colnames are specified, then incorporate them - if(is.null(colnames)){ + if (is.null(colnames)) { n_items <- ncol(df) - cletters <- rep(c("Item"), times = n_items) + cletters <- rep("Item", times = n_items) cindexes <- (c(1:n_items)) cnames <- c(paste(cletters, cindexes, sep = " ")) colnames(df) <- cnames @@ -71,7 +71,7 @@ compute_posterior_intervals_rho <- function(output, nmc, burnin, colnames = NULL } #------------------------------------------------------------------------------------------ - if(verbose) print(rho_posterior_interval) + if (verbose) print(rho_posterior_interval) return(rho_posterior_interval) } @@ -131,7 +131,7 @@ compute_rho_consensus <- function(output, nmc, burnin, C, type, colnames = NULL, plot_alpha_posterior <- function(output, nmc, burnin) { alpha_samples_table <- data.frame(iteration = 1:nmc, value = output) - plot_posterior_alpha <- ggplot2::ggplot(alpha_samples_table, ggplot2::aes_(x =~ value)) + + plot_posterior_alpha <- ggplot2::ggplot(alpha_samples_table, ggplot2::aes_(x = ~ value)) + ggplot2::geom_density() + ggplot2::xlab(expression(alpha)) + ggplot2::ylab("Posterior density") + @@ -172,32 +172,32 @@ compute_posterior_intervals_alpha <- function(output, nmc, burnin, verbose=FALSE #' @param items Either a vector of item names, or a #' vector of indices. If NULL, five items are selected randomly. #' @export -plot_rho_posterior <- function(output, nmc, burnin, C, colnames = NULL, items = NULL){ +plot_rho_posterior <- function(output, nmc, burnin, C, colnames = NULL, items = NULL) { - n_items = dim(output)[2] + n_items <- dim(output)[2] - if(is.null(items) && n_items > 5){ + if (is.null(items) && n_items > 5) { message("Items not provided by user or more than 5 items in a ranking. Picking 5 at random.") - items <- sample(1:n_items, 5, replace = F) - items = sort(items) + items <- sample(1:n_items, 5, replace = FALSE) + items <- sort(items) } else if (is.null(items) && n_items <= 5) { items <- c(1:n_items) - items = sort(items) + items <- sort(items) } # do smc processing here - smc_plot = smc_processing(output = output, colnames = colnames) + smc_plot <- smc_processing(output = output, colnames = colnames) - if(!is.character(items)){ + if (!is.character(items)) { items <- unique(smc_plot$item)[items] } - iteration = rep(c(1:nmc), times = n_items) - df = cbind(iteration, smc_plot) + iteration <- rep(c(1:nmc), times = n_items) + df <- cbind(iteration, smc_plot) - if(C==1){ - df = cbind(cluster = "Cluster 1", df) + if (C == 1) { + df <- cbind(cluster = "Cluster 1", df) } df <- dplyr::filter(df, .data$iteration > burnin, .data$item %in% items) @@ -220,7 +220,7 @@ plot_rho_posterior <- function(output, nmc, burnin, C, colnames = NULL, items = ggplot2::xlab("rank") + ggplot2::ylab("Posterior probability") - if(C == 1){ + if (C == 1) { p <- p + ggplot2::facet_wrap(~ .data$item) } else { p <- p + ggplot2::facet_wrap(~ .data$cluster + .data$item) diff --git a/R/tidy_mcmc.R b/R/tidy_mcmc.R index 34c6f847..b08d8497 100644 --- a/R/tidy_mcmc.R +++ b/R/tidy_mcmc.R @@ -1,4 +1,4 @@ -tidy_mcmc <- function(fit){ +tidy_mcmc <- function(fit) { fit <- tidy_rho(fit) fit <- tidy_alpha(fit) @@ -14,7 +14,7 @@ tidy_mcmc <- function(fit){ -tidy_rho <- function(fit){ +tidy_rho <- function(fit) { # Tidy rho rho_dims <- dim(fit$rho) # Item1, Item2, Item3, ...., Item1, Item2, Item3 @@ -47,7 +47,7 @@ tidy_rho <- function(fit){ return(fit) } -tidy_alpha <- function(fit){ +tidy_alpha <- function(fit) { # Tidy alpha alpha_dims <- dim(fit$alpha) # Cluster1, Cluster2, ..., Cluster1, Cluster2 @@ -76,11 +76,11 @@ tidy_alpha <- function(fit){ return(fit) } -tidy_cluster_assignment <- function(fit){ +tidy_cluster_assignment <- function(fit) { # Tidy cluster assignment - if(fit$save_clus){ - if(fit$n_clusters > 1){ + if (fit$save_clus) { + if (fit$n_clusters > 1) { cluster_dims <- dim(fit$cluster_assignment) value <- paste("Cluster", c(fit$cluster_assignment)) } else { @@ -114,9 +114,9 @@ tidy_cluster_assignment <- function(fit){ return(fit) } -tidy_cluster_probabilities <- function(fit){ +tidy_cluster_probabilities <- function(fit) { # Tidy cluster probabilities - if(fit$n_clusters > 1){ + if (fit$n_clusters > 1) { clusprob_dims <- dim(fit$cluster_probs) value <- c(fit$cluster_probs) } else { @@ -149,9 +149,9 @@ tidy_cluster_probabilities <- function(fit){ } -tidy_wcd <- function(fit){ +tidy_wcd <- function(fit) { # Tidy the within-cluster distances, or delete the empty matrix - if(fit$include_wcd){ + if (fit$include_wcd) { wcd_dims <- dim(fit$within_cluster_distance) value <- c(fit$within_cluster_distance) @@ -183,9 +183,9 @@ tidy_wcd <- function(fit){ return(fit) } -tidy_augmented_data <- function(fit){ +tidy_augmented_data <- function(fit) { # Tidy augmented data, or delete - if(fit$save_aug){ + if (fit$save_aug) { augdata_dims <- dim(fit$augmented_data) @@ -217,10 +217,10 @@ tidy_augmented_data <- function(fit){ } -tidy_augmentation_acceptance <- function(fit){ +tidy_augmentation_acceptance <- function(fit) { # Augmentation acceptance - if(fit$any_missing || fit$augpair){ + if (fit$any_missing || fit$augpair) { fit$aug_acceptance <- dplyr::tibble(acceptance_rate = c(fit$aug_acceptance)) fit$aug_acceptance <- dplyr::mutate(fit$aug_acceptance, assessor = dplyr::row_number()) @@ -235,10 +235,10 @@ tidy_augmentation_acceptance <- function(fit){ -tidy_error_probability <- function(fit){ +tidy_error_probability <- function(fit) { theta_length <- length(fit$theta) - if(theta_length > 0){ + if (theta_length > 0) { fit$theta <- dplyr::tibble( iteration = seq(from = 1, to = theta_length, by = 1), value = c(fit$theta) diff --git a/inst/examples/expected_dist_example.R b/inst/examples/expected_dist_example.R index 5ffcd8d9..56a7b763 100644 --- a/inst/examples/expected_dist_example.R +++ b/inst/examples/expected_dist_example.R @@ -1,6 +1,6 @@ -expected_dist(1,5,metric="kendall") -expected_dist(2,6,metric="cayley") -expected_dist(1.5,7,metric="hamming") -expected_dist(5,30,"ulam") -expected_dist(3.5,45,"footrule") -expected_dist(4,10,"spearman") +expected_dist(1, 5, metric = "kendall") +expected_dist(2, 6, metric = "cayley") +expected_dist(1.5, 7, metric = "hamming") +expected_dist(5, 30, "ulam") +expected_dist(3.5, 45, "footrule") +expected_dist(4, 10, "spearman") diff --git a/inst/examples/lik_db_mix_example.R b/inst/examples/lik_db_mix_example.R index 8d6dd901..48476420 100644 --- a/inst/examples/lik_db_mix_example.R +++ b/inst/examples/lik_db_mix_example.R @@ -5,13 +5,13 @@ mydata <- sample_mallows( n_samples = 100, rho0 = 1:n_items, alpha0 = 10, - metric="kendall") + metric = "kendall") # Compute the likelihood and log-likelihood values under the true model... lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), - weights = c(0.5,0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = mydata ) @@ -28,21 +28,20 @@ lik_db_mix( # or equivalently, by using the frequency distribution freq_distr <- rank_freq_distr(mydata) lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[,n_items+1] + obs_freq = freq_distr[, n_items + 1] ) lik_db_mix( rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), - weights=c(0.5, 0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[, n_items+1], - log=TRUE + obs_freq = freq_distr[, n_items + 1], + log = TRUE ) - diff --git a/inst/examples/metropolis_hastings_alpha_example.R b/inst/examples/metropolis_hastings_alpha_example.R index 76117b2b..3d182558 100644 --- a/inst/examples/metropolis_hastings_alpha_example.R +++ b/inst/examples/metropolis_hastings_alpha_example.R @@ -1,9 +1,9 @@ -rho <- c(1,2,3,4,5,6) +rho <- c(1, 2, 3, 4, 5, 6) alpha <- 2 metric <- "footrule" n_items <- 6 rankings <- sample_mallows( - rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 + rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 ) alpha_vector <- seq(from = 0, to = 20, by = 0.1) iter <- 1e2 @@ -12,26 +12,26 @@ degree <- 10 # Estimate the logarithm of the partition function of the Mallows rank model # using the estimate partition function logz_estimate <- estimate_partition_function( - method = "importance_sampling", alpha_vector = alpha_vector, - n_items = n_items, metric = "footrule", nmc = iter, degree = degree + method = "importance_sampling", alpha_vector = alpha_vector, + n_items = n_items, metric = "footrule", nmc = iter, degree = degree ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 ) diff --git a/inst/examples/rank_distance_example.R b/inst/examples/rank_distance_example.R index d0d4b1af..d7358e48 100644 --- a/inst/examples/rank_distance_example.R +++ b/inst/examples/rank_distance_example.R @@ -1,6 +1,6 @@ # Distance between two vectors of rankings: -rank_distance(1:5,5:1, metric = "kendall") +rank_distance(1:5, 5:1, metric = "kendall") rank_distance(c(2, 4, 3, 6, 1, 7, 5), c(3, 5, 4, 7, 6, 2, 1), metric = "cayley") rank_distance(c(4, 2, 3, 1), c(3, 4, 1, 2), metric = "hamming") rank_distance(c(1, 3, 5, 7, 9, 8, 6, 4, 2), c(1, 2, 3, 4, 9, 8, 7, 6, 5), "ulam") diff --git a/inst/examples/rank_freq_distr_example.R b/inst/examples/rank_freq_distr_example.R index 24f93203..afc93aba 100644 --- a/inst/examples/rank_freq_distr_example.R +++ b/inst/examples/rank_freq_distr_example.R @@ -1,6 +1,6 @@ # Create example data. We set the burn-in and thinning very low # for the sampling to go fast -data0 <- sample_mallows(rho0 = 1:5, alpha=10, n_samples = 1000, +data0 <- sample_mallows(rho0 = 1:5, alpha = 10, n_samples = 1000, burnin = 10, thinning = 1) # Find the frequency distribution -rank_freq_distr(rankings=data0) +rank_freq_distr(rankings = data0) diff --git a/inst/examples/sample_mallows_example.R b/inst/examples/sample_mallows_example.R index 2597c5e5..2654a31b 100644 --- a/inst/examples/sample_mallows_example.R +++ b/inst/examples/sample_mallows_example.R @@ -40,10 +40,9 @@ compute_posterior_intervals(model_fit, burnin = 2000, parameter = "alpha") library(PerMallows) # Set the scale parameter of the PerMallows package corresponding to # alpha0 in BayesMallows -theta0 = alpha0 / n_items +theta0 <- alpha0 / n_items # Sample with PerMallows::rmm sample1 <- rmm(n = 100, sigma0 = rho0, theta = theta0, dist.name = "cayley") # Generate the same sample with sample_mallows sample2 <- sample_mallows(rho0 = rho0, alpha0 = alpha0, n_samples = 100, burnin = 1000, thinning = 1000, metric = "cayley") - diff --git a/inst/examples/smc_mallows_new_users_complete_example.R b/inst/examples/smc_mallows_new_users_complete_example.R index 06e83a73..bf87d24e 100644 --- a/inst/examples/smc_mallows_new_users_complete_example.R +++ b/inst/examples/smc_mallows_new_users_complete_example.R @@ -4,18 +4,18 @@ n_items <- ncol(sushi_rankings) metric <- "footrule" num_new_obs <- 10 logz_estimate <- estimate_partition_function( - method = "importance_sampling", - alpha_vector = seq(from = 0, to = 15, by = 0.1), - n_items = n_items, metric = metric, nmc = 1e2, degree = 10 + method = "importance_sampling", + alpha_vector = seq(from = 0, to = 15, by = 0.1), + n_items = n_items, metric = metric, nmc = 1e2, degree = 10 ) # Calculating rho and alpha samples samples <- smc_mallows_new_users_complete( - R_obs = data, n_items = n_items, metric = metric, - leap_size = floor(n_items / 5), N = 100, Time = nrow(data) / num_new_obs, - mcmc_kernel_app = 5, logz_estimate = logz_estimate, - alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, - num_new_obs = num_new_obs, verbose = TRUE + R_obs = data, n_items = n_items, metric = metric, + leap_size = floor(n_items / 5), N = 100, Time = nrow(data) / num_new_obs, + mcmc_kernel_app = 5, logz_estimate = logz_estimate, + alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, + num_new_obs = num_new_obs, verbose = TRUE ) # Studying the structure of the output diff --git a/man/expected_dist.Rd b/man/expected_dist.Rd index 2793c269..caa6aa8a 100644 --- a/man/expected_dist.Rd +++ b/man/expected_dist.Rd @@ -20,10 +20,10 @@ A scalar providing the expected value of the \code{metric} under the Mallows ran Compute the expectation of several metrics under the Mallows rank model. } \examples{ -expected_dist(1,5,metric="kendall") -expected_dist(2,6,metric="cayley") -expected_dist(1.5,7,metric="hamming") -expected_dist(5,30,"ulam") -expected_dist(3.5,45,"footrule") -expected_dist(4,10,"spearman") +expected_dist(1, 5, metric = "kendall") +expected_dist(2, 6, metric = "cayley") +expected_dist(1.5, 7, metric = "hamming") +expected_dist(5, 30, "ulam") +expected_dist(3.5, 45, "footrule") +expected_dist(4, 10, "spearman") } diff --git a/man/lik_db_mix.Rd b/man/lik_db_mix.Rd index d1902fa3..08e2c045 100644 --- a/man/lik_db_mix.Rd +++ b/man/lik_db_mix.Rd @@ -51,13 +51,13 @@ mydata <- sample_mallows( n_samples = 100, rho0 = 1:n_items, alpha0 = 10, - metric="kendall") + metric = "kendall") # Compute the likelihood and log-likelihood values under the true model... lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), - weights = c(0.5,0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = mydata ) @@ -74,22 +74,21 @@ lik_db_mix( # or equivalently, by using the frequency distribution freq_distr <- rank_freq_distr(mydata) lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[,n_items+1] + obs_freq = freq_distr[, n_items + 1] ) lik_db_mix( rho = rbind(1:n_items, 1:n_items), alpha = c(10, 10), - weights=c(0.5, 0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[, n_items+1], - log=TRUE + obs_freq = freq_distr[, n_items + 1], + log = TRUE ) - } diff --git a/man/metropolis_hastings_alpha.Rd b/man/metropolis_hastings_alpha.Rd index ea9d14a1..5f4aba33 100644 --- a/man/metropolis_hastings_alpha.Rd +++ b/man/metropolis_hastings_alpha.Rd @@ -60,12 +60,12 @@ Function to perform Metropolis-Hastings for new rho under Alternatively, if \eqn{N} equals 1, \code{rankings} can be a vector. } \examples{ -rho <- c(1,2,3,4,5,6) +rho <- c(1, 2, 3, 4, 5, 6) alpha <- 2 metric <- "footrule" n_items <- 6 rankings <- sample_mallows( - rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 + rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 ) alpha_vector <- seq(from = 0, to = 20, by = 0.1) iter <- 1e2 @@ -74,28 +74,28 @@ degree <- 10 # Estimate the logarithm of the partition function of the Mallows rank model # using the estimate partition function logz_estimate <- estimate_partition_function( - method = "importance_sampling", alpha_vector = alpha_vector, - n_items = n_items, metric = "footrule", nmc = iter, degree = degree + method = "importance_sampling", alpha_vector = alpha_vector, + n_items = n_items, metric = "footrule", nmc = iter, degree = degree ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 ) metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 ) } \author{ diff --git a/man/rank_distance.Rd b/man/rank_distance.Rd index 1a117ff1..21f67ed5 100644 --- a/man/rank_distance.Rd +++ b/man/rank_distance.Rd @@ -34,7 +34,7 @@ translation of \code{Rankcluster::distCayley} \insertCite{Grimonprez2016}{BayesM \examples{ # Distance between two vectors of rankings: -rank_distance(1:5,5:1, metric = "kendall") +rank_distance(1:5, 5:1, metric = "kendall") rank_distance(c(2, 4, 3, 6, 1, 7, 5), c(3, 5, 4, 7, 6, 2, 1), metric = "cayley") rank_distance(c(4, 2, 3, 1), c(3, 4, 1, 2), metric = "hamming") rank_distance(c(1, 3, 5, 7, 9, 8, 6, 4, 2), c(1, 2, 3, 4, 9, 8, 7, 6, 5), "ulam") diff --git a/man/rank_freq_distr.Rd b/man/rank_freq_distr.Rd index d0cc110c..5d8ea026 100644 --- a/man/rank_freq_distr.Rd +++ b/man/rank_freq_distr.Rd @@ -23,8 +23,8 @@ Construct the frequency distribution of the distinct ranking \examples{ # Create example data. We set the burn-in and thinning very low # for the sampling to go fast -data0 <- sample_mallows(rho0 = 1:5, alpha=10, n_samples = 1000, +data0 <- sample_mallows(rho0 = 1:5, alpha = 10, n_samples = 1000, burnin = 10, thinning = 1) # Find the frequency distribution -rank_freq_distr(rankings=data0) +rank_freq_distr(rankings = data0) } diff --git a/man/sample_mallows.Rd b/man/sample_mallows.Rd index 41046c37..939f70aa 100644 --- a/man/sample_mallows.Rd +++ b/man/sample_mallows.Rd @@ -106,13 +106,12 @@ compute_posterior_intervals(model_fit, burnin = 2000, parameter = "alpha") library(PerMallows) # Set the scale parameter of the PerMallows package corresponding to # alpha0 in BayesMallows -theta0 = alpha0 / n_items +theta0 <- alpha0 / n_items # Sample with PerMallows::rmm sample1 <- rmm(n = 100, sigma0 = rho0, theta = theta0, dist.name = "cayley") # Generate the same sample with sample_mallows sample2 <- sample_mallows(rho0 = rho0, alpha0 = alpha0, n_samples = 100, burnin = 1000, thinning = 1000, metric = "cayley") - } \references{ \insertAllCited{} diff --git a/man/smc_mallows_new_users_complete.Rd b/man/smc_mallows_new_users_complete.Rd index 95bc025a..be298133 100644 --- a/man/smc_mallows_new_users_complete.Rd +++ b/man/smc_mallows_new_users_complete.Rd @@ -77,18 +77,18 @@ n_items <- ncol(sushi_rankings) metric <- "footrule" num_new_obs <- 10 logz_estimate <- estimate_partition_function( - method = "importance_sampling", - alpha_vector = seq(from = 0, to = 15, by = 0.1), - n_items = n_items, metric = metric, nmc = 1e2, degree = 10 + method = "importance_sampling", + alpha_vector = seq(from = 0, to = 15, by = 0.1), + n_items = n_items, metric = metric, nmc = 1e2, degree = 10 ) # Calculating rho and alpha samples samples <- smc_mallows_new_users_complete( - R_obs = data, n_items = n_items, metric = metric, - leap_size = floor(n_items / 5), N = 100, Time = nrow(data) / num_new_obs, - mcmc_kernel_app = 5, logz_estimate = logz_estimate, - alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, - num_new_obs = num_new_obs, verbose = TRUE + R_obs = data, n_items = n_items, metric = metric, + leap_size = floor(n_items / 5), N = 100, Time = nrow(data) / num_new_obs, + mcmc_kernel_app = 5, logz_estimate = logz_estimate, + alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, + num_new_obs = num_new_obs, verbose = TRUE ) # Studying the structure of the output diff --git a/tests/testthat.R b/tests/testthat.R index c8594a2a..06eb8dc1 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,3 @@ library(testthat) library(BayesMallows) test_check("BayesMallows") - diff --git a/tests/testthat/test-classes_compute_consensus_posterior.R b/tests/testthat/test-classes_compute_consensus_posterior.R index 7d826c18..79035395 100644 --- a/tests/testthat/test-classes_compute_consensus_posterior.R +++ b/tests/testthat/test-classes_compute_consensus_posterior.R @@ -19,26 +19,26 @@ alpha_vector <- seq(from = 0, to = 15, by = 0.1) iter <- 1e3 degree <- 10 logz_estimate <- estimate_partition_function( - method = "importance_sampling", alpha_vector = alpha_vector, - n_items = n_items, metric = metric, nmc = iter, degree = degree + method = "importance_sampling", alpha_vector = alpha_vector, + n_items = n_items, metric = metric, nmc = iter, degree = degree ) data <- sushi_rankings[1:100, ] leap_size <- floor(n_items / 5) nmc <- N <- 1000 Time <- 20 fit_smc <- smc_mallows_new_users_complete( - R_obs = data, n_items = n_items, metric = metric, leap_size = leap_size, - N = N, Time = Time, logz_estimate = logz_estimate, mcmc_kernel_app = 5, - num_new_obs = 5, alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 1e6 + R_obs = data, n_items = n_items, metric = metric, leap_size = leap_size, + N = N, Time = Time, logz_estimate = logz_estimate, mcmc_kernel_app = 5, + num_new_obs = 5, alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 1e6 ) fit_smc_alpha <- fit_smc$alpha_samples[, Time + 1] fit_smc_post_alpha <- compute_posterior_intervals_alpha( - output = fit_smc_alpha, nmc = nmc, burnin = 0, verbose = FALSE + output = fit_smc_alpha, nmc = nmc, burnin = 0, verbose = FALSE ) fit_smc_rho <- fit_smc$rho_samples[, , Time + 1] fit_smc_post_rho <- compute_posterior_intervals_rho( - output = fit_smc_rho, nmc = nmc, burnin = 0, - verbose = FALSE + output = fit_smc_rho, nmc = nmc, burnin = 0, + verbose = FALSE ) # Emulating the internal workings of compute_posterior_intervals =============== @@ -48,19 +48,19 @@ fit_smc_post_rho <- compute_posterior_intervals_rho( fit_bm_alpha <- fit_bm$alpha fit_bm_alpha <- dplyr::group_by(fit_bm_alpha, .data$cluster) class(fit_bm_alpha) <- c( - "posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame" + "posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame" ) fit_bm_post_internal_alpha <- .compute_posterior_intervals( - fit_bm_alpha, "alpha", .95, 3L + fit_bm_alpha, "alpha", .95, 3L ) fit_bm_rho <- fit_bm$rho fit_bm_rho <- dplyr::group_by(fit_bm_rho, .data$cluster) class(fit_bm_rho) <- c( - "posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame" + "posterior_BayesMallows", "grouped_df", "tbl_df", "tbl", "data.frame" ) fit_bm_post_internal_rho <- .compute_posterior_intervals( - fit_bm_rho, "rho", .95, 3L + fit_bm_rho, "rho", .95, 3L ) # SMC-Mallows -------------------------------------------- # @@ -70,10 +70,10 @@ fit_smc_alpha$n_clusters <- 1 fit_smc_alpha$cluster <- "Cluster 1" fit_smc_alpha <- dplyr::group_by(fit_smc_alpha, .data$cluster) class(fit_smc_alpha) <- c( - "posterior_SMCMallows", "grouped_df", "tbl_df", "tbl", "data.frame" + "posterior_SMCMallows", "grouped_df", "tbl_df", "tbl", "data.frame" ) fit_smc_post_internal_alpha <- .compute_posterior_intervals( - fit_smc_alpha, "alpha", .95, 3L + fit_smc_alpha, "alpha", .95, 3L ) fit_smc_rho <- smc_processing(fit_smc_rho) @@ -81,29 +81,29 @@ fit_smc_rho$n_clusters <- 1 fit_smc_rho$cluster <- "Cluster 1" fit_smc_rho <- dplyr::group_by(fit_smc_rho, .data$cluster) class(fit_smc_rho) <- c( - "posterior_SMCMallows", "grouped_df", "tbl_df", "tbl", "data.frame" + "posterior_SMCMallows", "grouped_df", "tbl_df", "tbl", "data.frame" ) fit_smc_post_internal_rho <- .compute_posterior_intervals( - fit_smc_alpha, "rho", .95, 3L, discrete = TRUE + fit_smc_alpha, "rho", .95, 3L, discrete = TRUE ) # Testing classes ============================================================== test_that("Classes are correctly attributed", { - expect_s3_class(fit_bm, "BayesMallows") - expect_s3_class(fit_smc, "SMCMallows") - expect_s3_class(fit_bm_post_alpha, "data.frame") - expect_s3_class(fit_bm_post_rho, "data.frame") - expect_s3_class(fit_smc_post_alpha, "data.frame") - expect_s3_class(fit_smc_post_rho, "data.frame") - expect_error(.compute_posterior_intervals(fit_bm_post_alpha)) - expect_error(.compute_posterior_intervals(fit_bm_post_rho)) - expect_error(.compute_posterior_intervals(fit_smc_post_alpha)) - expect_error(.compute_posterior_intervals(fit_smc_post_rho)) - expect_s3_class(fit_bm_post_internal_alpha, "data.frame") - expect_s3_class(fit_bm_post_internal_rho, "data.frame") - expect_s3_class(fit_smc_post_internal_alpha, "data.frame") - expect_s3_class(fit_smc_post_internal_rho, "data.frame") + expect_s3_class(fit_bm, "BayesMallows") + expect_s3_class(fit_smc, "SMCMallows") + expect_s3_class(fit_bm_post_alpha, "data.frame") + expect_s3_class(fit_bm_post_rho, "data.frame") + expect_s3_class(fit_smc_post_alpha, "data.frame") + expect_s3_class(fit_smc_post_rho, "data.frame") + expect_error(.compute_posterior_intervals(fit_bm_post_alpha)) + expect_error(.compute_posterior_intervals(fit_bm_post_rho)) + expect_error(.compute_posterior_intervals(fit_smc_post_alpha)) + expect_error(.compute_posterior_intervals(fit_smc_post_rho)) + expect_s3_class(fit_bm_post_internal_alpha, "data.frame") + expect_s3_class(fit_bm_post_internal_rho, "data.frame") + expect_s3_class(fit_smc_post_internal_alpha, "data.frame") + expect_s3_class(fit_smc_post_internal_rho, "data.frame") }) context("compute_consensus() classes") @@ -112,15 +112,15 @@ fit_bm_consensus_cp <- compute_consensus(fit_bm, type = "CP") fit_bm_consensus_map <- compute_consensus(fit_bm, type = "MAP") fit_smc_rho <- fit_smc$rho_samples[, , Time + 1] fit_smc_consensus_cp <- compute_rho_consensus( - output = fit_smc_rho, nmc = nmc, burnin = 0, C = 1, type = "CP" + output = fit_smc_rho, nmc = nmc, burnin = 0, C = 1, type = "CP" ) fit_smc_consensus_map <- compute_rho_consensus( - output = fit_smc_rho, nmc = nmc, burnin = 0, C = 1, type = "MAP" + output = fit_smc_rho, nmc = nmc, burnin = 0, C = 1, type = "MAP" ) test_that("Classes are correctly attributed", { - expect_s3_class(fit_bm_consensus_cp, "data.frame") - expect_s3_class(fit_bm_consensus_map, "data.frame") - expect_s3_class(fit_smc_consensus_cp, "data.frame") - expect_s3_class(fit_smc_consensus_map, "data.frame") + expect_s3_class(fit_bm_consensus_cp, "data.frame") + expect_s3_class(fit_bm_consensus_map, "data.frame") + expect_s3_class(fit_smc_consensus_cp, "data.frame") + expect_s3_class(fit_smc_consensus_map, "data.frame") }) diff --git a/tests/testthat/test-compute_mallows.R b/tests/testthat/test-compute_mallows.R index 1af50787..d5bf223a 100644 --- a/tests/testthat/test-compute_mallows.R +++ b/tests/testthat/test-compute_mallows.R @@ -5,7 +5,7 @@ context("Testing compute_mallows") test_that("miscellaneous input validation", { namat <- potato_visual - namat[c(1,2, 3), c(7, 9)] <- NA_real_ + namat[c(1, 2, 3), c(7, 9)] <- NA_real_ expect_error(compute_mallows(rankings = namat, na_action = "fail")) expect_output(compute_mallows(rankings = namat, nmc = 2, na_action = "omit"), "Omitting 9 rows from rankings due to NA values") @@ -20,7 +20,7 @@ test_that("miscellaneous input validation", { expect_error(compute_mallows(rankings = potato_visual, nmc = -100)) }) -test_that("rho_init is properly validated",{ +test_that("rho_init is properly validated", { m <- potato_visual expect_error(compute_mallows(rankings = m, rho_init = 1:(ncol(m) - 1))) expect_error(compute_mallows(rankings = m, rho_init = c(potato_true_ranking[-1], 22))) @@ -31,7 +31,7 @@ test_that("rho_init is properly validated",{ } ) -test_that("compute_mallows discovers inconsistent rankings",{ +test_that("compute_mallows discovers inconsistent rankings", { expect_error(compute_mallows( rankings = matrix(c(1, 2, -3, 1, 2, 3), nrow = 2, byrow = TRUE) @@ -66,7 +66,7 @@ test_that("compute_mallows with missing data works", { test_that("compute_mallows runs with the right distances", { - for(metric in c("footrule", "spearman", "cayley", "kendall", "ulam", "hamming")){ + for (metric in c("footrule", "spearman", "cayley", "kendall", "ulam", "hamming")) { expect_s3_class(compute_mallows(potato_visual, metric = metric, nmc = 3), "BayesMallows") } @@ -82,7 +82,7 @@ test_that("compute_mallows handles integer preferences", { expect_s3_class(m, "BayesMallows") }) -test_that("compute_mallows handles data with lots of missings",{ +test_that("compute_mallows handles data with lots of missings", { R_partial2 <- structure(c(NA, NA, NA, NA, NA, NA, 9, NA, NA, 7, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, @@ -105,7 +105,7 @@ test_that("compute_mallows handles data with lots of missings",{ } ) -test_that("compute_mallows treats obs_freq properly",{ +test_that("compute_mallows treats obs_freq properly", { m1 <- compute_mallows(rankings = potato_visual, obs_freq = rep(1, nrow(potato_visual)), seed = 2233) m2 <- compute_mallows(rankings = potato_visual, seed = 2233) @@ -120,7 +120,7 @@ test_that("compute_mallows treats obs_freq properly",{ # Next, we create a new hypthetical beach_preferences dataframe where each # assessor is replicated 1-4 times - beach_pref_rep <- do.call(rbind, lapply(split(beach_small, f = seq_len(nrow(beach_small))), function(dd){ + beach_pref_rep <- do.call(rbind, lapply(split(beach_small, f = seq_len(nrow(beach_small))), function(dd) { ret <- merge( dd, data.frame(new_assessor = seq_len(obs_freq[dd$assessor])), diff --git a/tests/testthat/test-distance_function.R b/tests/testthat/test-distance_function.R index 031f7097..66cd0873 100644 --- a/tests/testthat/test-distance_function.R +++ b/tests/testthat/test-distance_function.R @@ -1,6 +1,6 @@ context("Testing computation of distance") # Brute force formula -check_dist <- function(n, fun){ +check_dist <- function(n, fun) { # Generate all permutations perm <- permutations(n) @@ -9,7 +9,7 @@ check_dist <- function(n, fun){ # Loop over some n values test_that("footrule distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "footrule") @@ -20,7 +20,7 @@ test_that("footrule distance is correct", { # Loop over some n values test_that("Spearman distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "spearman") @@ -31,7 +31,7 @@ test_that("Spearman distance is correct", { # Loop over some n values test_that("Kendall distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "kendall") @@ -44,7 +44,7 @@ test_that("Kendall distance is correct", { # Loop over some n values test_that("Cayley distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "cayley") @@ -57,7 +57,7 @@ test_that("Cayley distance is correct", { # Loop over some n values test_that("Hamming distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "hamming") @@ -70,7 +70,7 @@ test_that("Hamming distance is correct", { # Loop over some n values test_that("Ulam distance is correct", { - for(n in c(2, 3, 5)){ + for (n in c(2, 3, 5)) { expect_equal( check_dist(n, fun = function(r1, r2) { get_rank_distance(r1, r2, "ulam") @@ -83,7 +83,7 @@ test_that("Ulam distance is correct", { test_that("Exported rank_distance is correct", { # Distance between two vectors of rankings: - expect_equal(rank_distance(1:5,5:1, metric = "kendall"), 10) + expect_equal(rank_distance(1:5, 5:1, metric = "kendall"), 10) expect_equal( rank_distance(c(2, 4, 3, 6, 1, 7, 5), c(3, 5, 4, 7, 6, 2, 1), metric = "cayley"), 6) diff --git a/tests/testthat/test-estimate_partition_function.R b/tests/testthat/test-estimate_partition_function.R index 289830fa..accccda8 100644 --- a/tests/testthat/test-estimate_partition_function.R +++ b/tests/testthat/test-estimate_partition_function.R @@ -4,7 +4,7 @@ context("Testing function estimate_partition_function") test_that( "estimate_partition_function fails for wrong asymptotic metrics", { - for(metric in c("cayley", "hamming", "kendall", "ulam")){ + for (metric in c("cayley", "hamming", "kendall", "ulam")) { expect_error( estimate_partition_function(method = "asymptotic", alpha_vector = seq(from = 1, to = 2, by = .1), diff --git a/tests/testthat/test-expected_dist.R b/tests/testthat/test-expected_dist.R index 564ea580..7d236fba 100644 --- a/tests/testthat/test-expected_dist.R +++ b/tests/testthat/test-expected_dist.R @@ -1,10 +1,10 @@ test_that("expected dist works", { - expect_equal(round(expected_dist(5,5,metric="kendall"), 6), 1.749137) - expect_equal(round(expected_dist(12,6,metric="cayley"), 6), 1.375779) - expect_equal(round(expected_dist(1.5 * 7,7,metric="hamming"), 6), 2.69246) - expect_equal(round(expected_dist(5 * 30,30,"ulam"), 6), 4.133538) - expect_equal(round(expected_dist(3.5 * 45,45,"footrule"), 6), 0.080459) - expect_equal(round(expected_dist(4 * 10,10,"spearman"), 6), 0.006033) + expect_equal(round(expected_dist(5, 5, metric = "kendall"), 6), 1.749137) + expect_equal(round(expected_dist(12, 6, metric = "cayley"), 6), 1.375779) + expect_equal(round(expected_dist(1.5 * 7, 7, metric = "hamming"), 6), 2.69246) + expect_equal(round(expected_dist(5 * 30, 30, "ulam"), 6), 4.133538) + expect_equal(round(expected_dist(3.5 * 45, 45, "footrule"), 6), 0.080459) + expect_equal(round(expected_dist(4 * 10, 10, "spearman"), 6), 0.006033) }) diff --git a/tests/testthat/test-generate_ranking.R b/tests/testthat/test-generate_ranking.R index 75007733..2d89f921 100644 --- a/tests/testthat/test-generate_ranking.R +++ b/tests/testthat/test-generate_ranking.R @@ -16,7 +16,7 @@ pair_comp <- tribble( pair_comp_tc <- generate_transitive_closure(pair_comp) beach_tc <- generate_transitive_closure(beach_preferences) -test_that("generate_initial_ranking works",{ +test_that("generate_initial_ranking works", { expect_error(generate_initial_ranking(pair_comp)) expect_is(generate_initial_ranking(pair_comp_tc), "matrix") @@ -25,7 +25,7 @@ test_that("generate_initial_ranking works",{ } ) -test_that("generate_initial_ranking with shuffle_unranked works",{ +test_that("generate_initial_ranking with shuffle_unranked works", { small_tc <- beach_tc[beach_tc$assessor %in% 1:6 & beach_tc$bottom_item %in% 1:4 & beach_tc$top_item %in% 1:4, ] set.seed(123) @@ -44,7 +44,7 @@ test_that("generate_initial_ranking with shuffle_unranked works",{ -test_that("generate_initial_ranking with random works",{ +test_that("generate_initial_ranking with random works", { small_tc <- beach_tc[beach_tc$assessor %in% 1:6 & beach_tc$bottom_item %in% 1:4 & beach_tc$top_item %in% 1:4, ] @@ -65,4 +65,3 @@ test_that("generate_initial_ranking with random works",{ expect_error(generate_initial_ranking(tc = beach_tc, random = TRUE)) } ) - diff --git a/tests/testthat/test-lik_db_mix.R b/tests/testthat/test-lik_db_mix.R index 0328f77d..c916f593 100644 --- a/tests/testthat/test-lik_db_mix.R +++ b/tests/testthat/test-lik_db_mix.R @@ -10,9 +10,9 @@ test_that("lik_db_mix works", { # Compute the likelihood and log-likelihood values under the true model... expect_equal( sprintf("%.3e", lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(2 * n_items, 2 * n_items), - weights = c(0.5,0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = mydata )), "1.434e-74") @@ -30,29 +30,29 @@ test_that("lik_db_mix works", { freq_distr <- rank_freq_distr(mydata) expect_equal( sprintf("%.3e", lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(2 * n_items, 2 * n_items), weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[,n_items+1] + obs_freq = freq_distr[, n_items + 1] )), "1.434e-74") expect_equal(round(lik_db_mix( rho = rbind(1:n_items, 1:n_items), alpha = c(2 * n_items, 2 * n_items), - weights=c(0.5, 0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = freq_distr[, 1:n_items], - obs_freq = freq_distr[, n_items+1], - log=TRUE + obs_freq = freq_distr[, n_items + 1], + log = TRUE ), 4), -170.0306) expect_error( lik_db_mix( - rho = rbind(1:n_items,1:n_items), + rho = rbind(1:n_items, 1:n_items), alpha = c(2 * n_items, 2 * n_items), - weights = c(0.5,0.5), + weights = c(0.5, 0.5), metric = "kendall", rankings = mydata, obs_freq = c(1, 2) diff --git a/tests/testthat/test-mcmc_function.R b/tests/testthat/test-mcmc_function.R index 0d21112a..780bae13 100644 --- a/tests/testthat/test-mcmc_function.R +++ b/tests/testthat/test-mcmc_function.R @@ -38,4 +38,3 @@ test_that( nrow() == 0 ) ) - diff --git a/tests/testthat/test-misc_cpp.R b/tests/testthat/test-misc_cpp.R index b72d099a..eb37682d 100644 --- a/tests/testthat/test-misc_cpp.R +++ b/tests/testthat/test-misc_cpp.R @@ -31,7 +31,7 @@ test_that( n <- 10000L probs <- c(0.1, 0.2, 0.7) values <- integer(n) - for(i in seq(1L, n, 1L)){ + for (i in seq(1L, n, 1L)) { values[[i]] <- BayesMallows:::sample_int(probs) } freqs <- table(values) / length(values) @@ -42,7 +42,7 @@ test_that( # TRUE, due to the randomness in sampling skip_on_cran() diff <- abs(probs - freqs) - for(i in 1:3){ + for (i in 1:3) { expect_lt(diff[[i]], 0.02) } diff --git a/tests/testthat/test-misc_functions.R b/tests/testthat/test-misc_functions.R index 512adaad..ed2d975e 100644 --- a/tests/testthat/test-misc_functions.R +++ b/tests/testthat/test-misc_functions.R @@ -1,7 +1,7 @@ context("Testing misc functions") test_that( - "validate_permutation is correct",{ + "validate_permutation is correct", { expect_equal( BayesMallows:::validate_permutation(c(1, 3, 3)), FALSE diff --git a/tests/testthat/test-partition_function.R b/tests/testthat/test-partition_function.R index 55459687..d6b7f715 100644 --- a/tests/testthat/test-partition_function.R +++ b/tests/testthat/test-partition_function.R @@ -2,29 +2,29 @@ context("Testing computation of partition functions") # Brute force formula -check_log_zn <- function(n, alpha, metric){ +check_log_zn <- function(n, alpha, metric) { # Generate all permutations perm <- permutations(n) # Compute the partition function - if(metric == "footrule") { - log(sum(exp(- alpha / n * colSums( abs(t(perm ) - 1:n ))))) - } else if(metric == "spearman") { - log(sum(exp(- alpha / n * colSums( (t(perm ) - 1:n )^2)))) - } else if(metric == "kendall") { + if (metric == "footrule") { + log(sum(exp(- alpha / n * colSums(abs(t(perm) - 1:n))))) + } else if (metric == "spearman") { + log(sum(exp(- alpha / n * colSums((t(perm) - 1:n)^2)))) + } else if (metric == "kendall") { log(sum(exp(- alpha / n * apply(perm, 1, get_rank_distance, r2 = 1:n, metric = "kendall")))) - } else if(metric == "cayley") { + } else if (metric == "cayley") { log(sum(exp(- alpha / n * apply(perm, 1, get_rank_distance, r2 = 1:n, metric = "cayley")))) - } else if(metric == "hamming") { + } else if (metric == "hamming") { log(sum(exp(- alpha / n * apply(perm, 1, get_rank_distance, r2 = 1:n, metric = "hamming")))) - } else if(metric == "ulam") { + } else if (metric == "ulam") { log(sum(unlist(lapply(seq(0, n - 1, by = 1), function(x) { PerMallows::count.perms(perm.length = n, dist.value = x, dist.name = "ulam") * exp(-alpha / n * x) })))) @@ -39,8 +39,8 @@ test_that("footrule partition function is correct", { footrule_sequence <- dplyr::filter(BayesMallows:::partition_function_data, metric == "footrule", type == "cardinalities")$values - for(n in c(1, 2, 3, 5)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3, 5)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, cardinalities = footrule_sequence[[n]], metric = "footrule"), @@ -54,8 +54,8 @@ test_that("Spearman partition function is correct", { spearman_sequence <- dplyr::filter(BayesMallows:::partition_function_data, metric == "spearman", type == "cardinalities")$values - for(n in c(1, 2, 3)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, cardinalities = spearman_sequence[[n]], metric = "spearman"), @@ -66,8 +66,8 @@ test_that("Spearman partition function is correct", { test_that("Kendall partition function is correct", { - for(n in c(1, 2, 3)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, metric = "kendall"), check_log_zn(n, alpha, "kendall") @@ -76,8 +76,8 @@ test_that("Kendall partition function is correct", { }}) test_that("Cayley partition function is correct", { - for(n in c(1, 2, 3)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, metric = "cayley"), check_log_zn(n, alpha, "cayley") @@ -87,8 +87,8 @@ test_that("Cayley partition function is correct", { test_that("Hamming partition function is correct", { - for(n in c(1, 2, 3)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, metric = "hamming"), check_log_zn(n, alpha, "hamming") @@ -100,8 +100,8 @@ test_that("Ulam partition function is correct", { ulam_sequence <- dplyr::filter(BayesMallows:::partition_function_data, metric == "ulam", type == "cardinalities")$values - for(n in c(1, 2, 3)){ - for(alpha in c(0.001, 0.1, 1)){ + for (n in c(1, 2, 3)) { + for (alpha in c(0.001, 0.1, 1)) { expect_equal( get_partition_function(n = n, alpha = alpha, cardinalities = ulam_sequence[[n]], @@ -134,7 +134,7 @@ test_that("estimate_partition_function runs in parallel", { nmc = 20, degree = degree) - lapply(1:2, function(x){ + lapply(1:2, function(x) { cl <- parallel::makeCluster(x) fit <- estimate_partition_function(method = "importance_sampling", alpha_vector = alpha_vector, diff --git a/tests/testthat/test-plot.R b/tests/testthat/test-plot.R index bad97618..9791818b 100644 --- a/tests/testthat/test-plot.R +++ b/tests/testthat/test-plot.R @@ -22,6 +22,6 @@ test_that("plot.BayesMallows works", { expect_s3_class(plot(m, burnin = 4, parameter = "cluster_probs"), "ggplot") expect_s3_class(plot(m, burnin = 4, parameter = "cluster_assignment"), "ggplot") - m <- compute_mallows(preferences = beach_preferences[1:100,], nmc = 10, error_model = "bernoulli") + m <- compute_mallows(preferences = beach_preferences[1:100, ], nmc = 10, error_model = "bernoulli") expect_s3_class(plot(m, burnin = 3, parameter = "theta"), "ggplot") }) diff --git a/tests/testthat/test-rank_freq_distr.R b/tests/testthat/test-rank_freq_distr.R index 53e81fda..1f6e2643 100644 --- a/tests/testthat/test-rank_freq_distr.R +++ b/tests/testthat/test-rank_freq_distr.R @@ -25,7 +25,7 @@ test_that("rank_freq_distr works", { ) set.seed(9988) - rows <- unlist(Map(function(x, y){ + rows <- unlist(Map(function(x, y) { rep(y, each = x) }, x = sample(20:100, nrow(potato_visual), replace = TRUE), @@ -61,4 +61,3 @@ test_that("rank_freq_distr works", { "", "", "", "", "", "", "", "freq"))) ) }) - diff --git a/tests/testthat/test-sample_mallows.R b/tests/testthat/test-sample_mallows.R index 74fd2d64..2060e38e 100644 --- a/tests/testthat/test-sample_mallows.R +++ b/tests/testthat/test-sample_mallows.R @@ -8,7 +8,7 @@ rho0 <- seq(from = 1, to = n_items, by = 1) # Set the scale alpha0 <- 10 -for(m in c("footrule", "spearman", "cayley", "hamming", "kendall", "ulam")){ +for (m in c("footrule", "spearman", "cayley", "hamming", "kendall", "ulam")) { samples <- sample_mallows(rho0 = rho0, alpha0 = alpha0, n_samples = 100, burnin = 1000, thinning = 1000, metric = m, leap_size = 1) test_that( diff --git a/tests/testthat/test-smc_individual_functions.R b/tests/testthat/test-smc_individual_functions.R index ea63a153..62e853dc 100644 --- a/tests/testthat/test-smc_individual_functions.R +++ b/tests/testthat/test-smc_individual_functions.R @@ -1,115 +1,115 @@ context("SMC individual functions") -rho <- c(1,2,3,4,5,6) +rho <- c(1, 2, 3, 4, 5, 6) alpha <- 2 metric <- "footrule" n_items <- 6 test_that("get_mallows_loglik() works as expected", { - set.seed(101) - loglik <- get_mallows_loglik( - alpha = alpha, rho = t(rho), n_items = length(rho), rankings = t(rho), - metric = metric - ) - expect_equal(loglik, 0) - - rankings <- sample_mallows( - rho0 = rho, alpha0 = alpha, n_samples = 10, - burnin = 1000, thinning = 500 - ) - loglik <- get_mallows_loglik( - alpha = alpha, rho = rho, n_items = n_items, rankings = rankings, - metric = metric - ) - expect_equivalent(loglik, -22.6667, tol=1e-4) + set.seed(101) + loglik <- get_mallows_loglik( + alpha = alpha, rho = t(rho), n_items = length(rho), rankings = t(rho), + metric = metric + ) + expect_equal(loglik, 0) + + rankings <- sample_mallows( + rho0 = rho, alpha0 = alpha, n_samples = 10, + burnin = 1000, thinning = 500 + ) + loglik <- get_mallows_loglik( + alpha = alpha, rho = rho, n_items = n_items, rankings = rankings, + metric = metric + ) + expect_equivalent(loglik, -22.6667, tol = 1e-4) }) test_that("smc_metropolis_hastings_rho() works as expected", { - set.seed(101) - # This functions uses get_mallows_log_lik and leap_and_shift_probs - # so if the checks match in those worker functions then it is very likely - # that this function will return the correct outputs. - rankings <- sample_mallows( - rho0 = rho, alpha0 = alpha, n_samples = 10, - burnin = 1000, thinning = 500 - ) - - # you can confirm the print statements inside the metropolis_hastings_rho - # match get_mallows_loglik and leap_and_shift_probs - test_1 <- metropolis_hastings_rho( - alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, - rho = rho, leap_size = 1 - ) - dist_1 <- BayesMallows:::get_rank_distance(rho, test_1, metric= "ulam") - expect_equal(test_1, as.matrix(c(1, 2, 3, 5, 4, 6))) - # if rho != rho_prime, then it should have a ulam distance of 1 - # if rho == rho_prime, then it should have ulam distance of 0 - expect_equal(dist_1, 1) - - test_2 <- metropolis_hastings_rho( - alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, - rho = rho, leap_size = 2 - ) - dist_2 <- BayesMallows:::get_rank_distance(rho, test_2, metric = "ulam") - expect_equal(test_2, as.matrix(c(1, 2, 3, 4, 5, 6))) - expect_equal(dist_2, 0) - - test_3 <- metropolis_hastings_rho( - alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, - rho = rho, leap_size = 3 - ) - dist_3 <- BayesMallows:::get_rank_distance(rho, test_3, metric = "ulam") - expect_equal(test_3, as.matrix(c(1, 2, 3, 4, 5, 6))) - expect_equal(dist_3, 0) - - # we have a ranking data set containing 10 rankings over 6 items - test_4 <- metropolis_hastings_rho( - alpha = alpha, n_items = n_items, rankings = rankings, metric = metric, - rho = rho, leap_size = 1 - ) - dist_4 <- BayesMallows:::get_rank_distance(rho, test_4, metric = "ulam") - expect_equal(test_4, as.matrix(c(1, 2, 3, 4, 5, 6))) - expect_equal(dist_4, 0) + set.seed(101) + # This functions uses get_mallows_log_lik and leap_and_shift_probs + # so if the checks match in those worker functions then it is very likely + # that this function will return the correct outputs. + rankings <- sample_mallows( + rho0 = rho, alpha0 = alpha, n_samples = 10, + burnin = 1000, thinning = 500 + ) + + # you can confirm the print statements inside the metropolis_hastings_rho + # match get_mallows_loglik and leap_and_shift_probs + test_1 <- metropolis_hastings_rho( + alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, + rho = rho, leap_size = 1 + ) + dist_1 <- BayesMallows:::get_rank_distance(rho, test_1, metric = "ulam") + expect_equal(test_1, as.matrix(c(1, 2, 3, 5, 4, 6))) + # if rho != rho_prime, then it should have a ulam distance of 1 + # if rho == rho_prime, then it should have ulam distance of 0 + expect_equal(dist_1, 1) + + test_2 <- metropolis_hastings_rho( + alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, + rho = rho, leap_size = 2 + ) + dist_2 <- BayesMallows:::get_rank_distance(rho, test_2, metric = "ulam") + expect_equal(test_2, as.matrix(c(1, 2, 3, 4, 5, 6))) + expect_equal(dist_2, 0) + + test_3 <- metropolis_hastings_rho( + alpha = alpha, n_items = n_items, rankings = t(rho), metric = metric, + rho = rho, leap_size = 3 + ) + dist_3 <- BayesMallows:::get_rank_distance(rho, test_3, metric = "ulam") + expect_equal(test_3, as.matrix(c(1, 2, 3, 4, 5, 6))) + expect_equal(dist_3, 0) + + # we have a ranking data set containing 10 rankings over 6 items + test_4 <- metropolis_hastings_rho( + alpha = alpha, n_items = n_items, rankings = rankings, metric = metric, + rho = rho, leap_size = 1 + ) + dist_4 <- BayesMallows:::get_rank_distance(rho, test_4, metric = "ulam") + expect_equal(test_4, as.matrix(c(1, 2, 3, 4, 5, 6))) + expect_equal(dist_4, 0) }) test_that("smc_leap_and_shift_probs() works as expected", { - set.seed(101) - n_items <- length(rho) - - # leap_size has a possible range, the BayesMallows papers suggest - # leap_size = floor(n_items/5) but the leap_size can be up to n_items/2. - # Note that leap_size must be integered valued. - - # if leap_size = 1, then forwards_prob = backwards_prob - test_1 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 1) - expect_equal(test_1$rho_prime, as.matrix(c(1, 3, 2, 4, 5, 6))) - expect_equivalent(test_1$forwards_prob, 0.1666667, tol=1e-6) - expect_equivalent(test_1$backwards_prob, 0.1666667, tol=1e-6) - - # if rho != rho_prime, then it should have a ulam distance of 1 - # if rho == rho_prime, then it should have ulam distance of 0 - dist_1 <- BayesMallows:::get_rank_distance(rho, test_1$rho_prime, metric= "ulam") - expect_equal(dist_1, 1) - - test_2 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 2) - expect_equal(test_2$rho_prime, as.matrix(c(1, 2, 3, 4, 5, 6))) - expect_equivalent(test_2$forwards_prob, 0.0556, tol=1e-4) - expect_equivalent(test_2$backwards_prob, 0.0556, tol=1e-4) - - dist_2 <- get_rank_distance( - rho, test_2$rho_prime, metric= "ulam" - ) - expect_equal(dist_2, 0) - - test_3 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 3) - expect_equal(test_3$rho_prime, as.matrix(c(1, 2, 3, 4, 5, 6))) - expect_equivalent(test_3$forwards_prob, 0.0417, tol=1e-3) - expect_equivalent(test_3$backwards_prob, 0.0417, tol=1e-3) - - dist_3 <- get_rank_distance( - rho, test_3$rho_prime, metric= "ulam" - ) - expect_equal(dist_3, 0) + set.seed(101) + n_items <- length(rho) + + # leap_size has a possible range, the BayesMallows papers suggest + # leap_size = floor(n_items/5) but the leap_size can be up to n_items/2. + # Note that leap_size must be integered valued. + + # if leap_size = 1, then forwards_prob = backwards_prob + test_1 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 1) + expect_equal(test_1$rho_prime, as.matrix(c(1, 3, 2, 4, 5, 6))) + expect_equivalent(test_1$forwards_prob, 0.1666667, tol = 1e-6) + expect_equivalent(test_1$backwards_prob, 0.1666667, tol = 1e-6) + + # if rho != rho_prime, then it should have a ulam distance of 1 + # if rho == rho_prime, then it should have ulam distance of 0 + dist_1 <- BayesMallows:::get_rank_distance(rho, test_1$rho_prime, metric = "ulam") + expect_equal(dist_1, 1) + + test_2 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 2) + expect_equal(test_2$rho_prime, as.matrix(c(1, 2, 3, 4, 5, 6))) + expect_equivalent(test_2$forwards_prob, 0.0556, tol = 1e-4) + expect_equivalent(test_2$backwards_prob, 0.0556, tol = 1e-4) + + dist_2 <- get_rank_distance( + rho, test_2$rho_prime, metric = "ulam" + ) + expect_equal(dist_2, 0) + + test_3 <- leap_and_shift_probs(rho = rho, n_items = n_items, leap_size = 3) + expect_equal(test_3$rho_prime, as.matrix(c(1, 2, 3, 4, 5, 6))) + expect_equivalent(test_3$forwards_prob, 0.0417, tol = 1e-3) + expect_equivalent(test_3$backwards_prob, 0.0417, tol = 1e-3) + + dist_3 <- get_rank_distance( + rho, test_3$rho_prime, metric = "ulam" + ) + expect_equal(dist_3, 0) }) # ======================================================== # @@ -117,51 +117,51 @@ test_that("smc_leap_and_shift_probs() works as expected", { # ======================================================== # metropolis_hastings_alpha_old <- function( - alpha, n_items, rankings, metric, rho, logz_estimate + alpha, n_items, rankings, metric, rho, logz_estimate ) { - exp_alpha_prime <- rlnorm(1, mean = alpha, sd = 0.15) # 1 - alpha_prime <- log(exp_alpha_prime) - - # evaluate the log-likelihood with current rankings - mallows_loglik_prop <- get_mallows_loglik( - alpha = (alpha_prime - alpha), rho = rho, n = n_items, - rankings = rankings, metric = metric - ) - - # evaluate the log estimate of the partition function - # for a particular value of alpha - logz_alpha <- get_partition_function( - n_items = n_items, alpha = alpha, logz_estimate = logz_estimate, - metric = metric - ) - logz_alpha_prime <- get_partition_function( - n_items = n_items, alpha = alpha_prime, logz_estimate = logz_estimate, - metric = metric - ) - - n_users <- length(rankings) / n_items - - loga <- n_users * (logz_alpha - logz_alpha_prime) + - dexp(alpha_prime, log=TRUE) - dexp(alpha, log=TRUE) + - alpha_prime - alpha + mallows_loglik_prop - - # determine whether to accept or reject proposed rho and - # return now consensus ranking - p <- runif(1, min = 0, max = 1) - if (log(p) <= loga) { - return(alpha_prime) - } else { - return(alpha) - } + exp_alpha_prime <- rlnorm(1, mean = alpha, sd = 0.15) # 1 + alpha_prime <- log(exp_alpha_prime) + + # evaluate the log-likelihood with current rankings + mallows_loglik_prop <- get_mallows_loglik( + alpha = (alpha_prime - alpha), rho = rho, n = n_items, + rankings = rankings, metric = metric + ) + + # evaluate the log estimate of the partition function + # for a particular value of alpha + logz_alpha <- get_partition_function( + n_items = n_items, alpha = alpha, logz_estimate = logz_estimate, + metric = metric + ) + logz_alpha_prime <- get_partition_function( + n_items = n_items, alpha = alpha_prime, logz_estimate = logz_estimate, + metric = metric + ) + + n_users <- length(rankings) / n_items + + loga <- n_users * (logz_alpha - logz_alpha_prime) + + dexp(alpha_prime, log = TRUE) - dexp(alpha, log = TRUE) + + alpha_prime - alpha + mallows_loglik_prop + + # determine whether to accept or reject proposed rho and + # return now consensus ranking + p <- runif(1, min = 0, max = 1) + if (log(p) <= loga) { + return(alpha_prime) + } else { + return(alpha) + } } set.seed(101) -rho <- c(1,2,3,4,5,6) +rho <- c(1, 2, 3, 4, 5, 6) alpha <- 2 metric <- "footrule" n_items <- 6 rankings <- sample_mallows( - rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 + rho0 = rho, alpha0 = alpha, n_samples = 10, burnin = 1000, thinning = 500 ) alpha_vector <- seq(from = 0, to = 20, by = 1) iter <- 1e4 @@ -170,39 +170,39 @@ degree <- 10 # Estimate the logarithm of the partition function of the Mallows rank model # using the estimate partition function logz_estimate <- estimate_partition_function( - method = "importance_sampling", alpha_vector = alpha_vector, - n_items = n_items, metric = "footrule", nmc = iter, degree = degree + method = "importance_sampling", alpha_vector = alpha_vector, + n_items = n_items, metric = "footrule", nmc = iter, degree = degree ) set.seed(101) test_1_a <- metropolis_hastings_alpha_old(alpha, n_items, rankings, metric, rho, logz_estimate) test_1_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, - lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, alpha_prop_sd = 0.5, + lambda = 0.1, alpha_max = 20 ) set.seed(101) test_2_a <- metropolis_hastings_alpha_old( - alpha, n_items, rankings, metric, rho, logz_estimate + alpha, n_items, rankings, metric, rho, logz_estimate ) test_2_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.1, alpha_max = 20 ) set.seed(101) test_3_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.5, lambda = 0.15, alpha_max = 20 ) set.seed(101) test_4_b <- metropolis_hastings_alpha( - alpha, n_items, rankings, metric, rho, logz_estimate, - alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 + alpha, n_items, rankings, metric, rho, logz_estimate, + alpha_prop_sd = 0.15, lambda = 0.15, alpha_max = 20 ) test_that("metropolis_hastings_alpha() works as expected", { - expect_equivalent(test_1_a, 1.951095, tol=1e-5) - expect_equivalent(test_1_b, 2.450351, tol=1e-5) - expect_equivalent(test_2_a, 1.951095, tol=1e-5) - expect_equivalent(test_2_b, 2.125639, tol=1e-5) - expect_equivalent(test_3_b, 2) - expect_equivalent(test_4_b, 1.904542, tol=1e-5) + expect_equivalent(test_1_a, 1.951095, tol = 1e-5) + expect_equivalent(test_1_b, 2.450351, tol = 1e-5) + expect_equivalent(test_2_a, 1.951095, tol = 1e-5) + expect_equivalent(test_2_b, 2.125639, tol = 1e-5) + expect_equivalent(test_3_b, 2) + expect_equivalent(test_4_b, 1.904542, tol = 1e-5) }) diff --git a/tests/testthat/test-smc_mallows_complete_rankings.R b/tests/testthat/test-smc_mallows_complete_rankings.R index e784fc9d..4cfb7127 100755 --- a/tests/testthat/test-smc_mallows_complete_rankings.R +++ b/tests/testthat/test-smc_mallows_complete_rankings.R @@ -5,16 +5,16 @@ context("SMC complete rankings: sequence") ######################### set.seed(994) -data = sushi_rankings[1:100,] +data <- sushi_rankings[1:100, ] # General n_items <- dim(sushi_rankings)[2] # Number of items -leap_size = floor(n_items/5) -metric = "footrule" +leap_size <- floor(n_items / 5) +metric <- "footrule" # Generate estimate of Z_n(alpha) alpha_vector <- seq(from = 0, to = 15, by = 1) -iter = 1e2 +iter <- 1e2 degree <- 10 # Estimate the logarithm of the partition function of the Mallows rank model using the estimate partition function @@ -27,45 +27,45 @@ logz_estimate <- estimate_partition_function(method = "importance_sampling", ###################################### # BayesMallows Analysis (MCMC) ###################################### -nmc = 20 -burnin=5 -model_fit <- compute_mallows(rankings = data, nmc = nmc, metric = metric, leap_size =leap_size, +nmc <- 20 +burnin <- 5 +model_fit <- compute_mallows(rankings = data, nmc = nmc, metric = metric, leap_size = leap_size, alpha_prop_sd = 0.15, logz_estimate = logz_estimate) -model_fit$burnin = burnin +model_fit$burnin <- burnin -alpha_samples_table = data.frame(iteration = 1:nmc , value = model_fit$alpha$value) -alpha_samples_table = alpha_samples_table[(burnin+1):nmc,] +alpha_samples_table <- data.frame(iteration = 1:nmc, value = model_fit$alpha$value) +alpha_samples_table <- alpha_samples_table[(burnin + 1):nmc, ] # from observing the plots, this looks like the estimated parameters of the Mallows Model -rho_0 = c(4,5,2,6,8,3,9,1,7,10) -alpha_0 = 1.7 +rho_0 <- c(4, 5, 2, 6, 8, 3, 9, 1, 7, 10) +alpha_0 <- 1.7 # heatplot - there is no burnin! -mcmc_rho_matrix = matrix(model_fit$rho$value, ncol = n_items, nrow = nmc, byrow=TRUE) +mcmc_rho_matrix <- matrix(model_fit$rho$value, ncol = n_items, nrow = nmc, byrow = TRUE) # ################################################################### # # SMC # ################################################################### -mcmc_times = 5 -num_new_obs = 10 -Time = dim(data)[1]/num_new_obs -N = 100 +mcmc_times <- 5 +num_new_obs <- 10 +Time <- dim(data)[1] / num_new_obs +N <- 100 test <- smc_mallows_new_users_complete( - R_obs = data, n_items = n_items, metric = metric, - leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_times, - alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, - num_new_obs = num_new_obs, verbose = FALSE + R_obs = data, n_items = n_items, metric = metric, + leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_times, + alpha_prop_sd = 0.1, lambda = 0.001, alpha_max = 1e6, + num_new_obs = num_new_obs, verbose = FALSE ) test_that("Output of smc_mallows_new_users_complete is OK", { - expect_s3_class(test, "SMCMallows") - expect_length(test, 2) - expect_named(test, c("rho_samples", "alpha_samples")) - expect_equal(dim(test$rho_samples), c(100, 10, 111)) - expect_equal(dim(test$alpha_samples), c(100, 111)) + expect_s3_class(test, "SMCMallows") + expect_length(test, 2) + expect_named(test, c("rho_samples", "alpha_samples")) + expect_equal(dim(test$rho_samples), c(100, 10, 111)) + expect_equal(dim(test$alpha_samples), c(100, 111)) }) # ############################### @@ -74,107 +74,107 @@ test_that("Output of smc_mallows_new_users_complete is OK", { # posterior confidence intervals for rho rho_temp <- compute_posterior_intervals_rho( - output = test$rho_samples[,,Time+1], nmc = N, burnin = 0 + output = test$rho_samples[, , Time + 1], nmc = N, burnin = 0 ) # MAP AND CP consensus ranking estimates rho_cp <- compute_rho_consensus( - output = test$rho_samples[,,Time+1], nmc = N, burnin = 0, C = 1, type = "CP" + output = test$rho_samples[, , Time + 1], nmc = N, burnin = 0, C = 1, type = "CP" ) -rho_map <- compute_rho_consensus(output = test$rho_samples[,,Time+1], nmc = N, burnin = 0, C = 1, type = "MAP") +rho_map <- compute_rho_consensus(output = test$rho_samples[, , Time + 1], nmc = N, burnin = 0, C = 1, type = "MAP") test_that("Output of compute_posterior_intervals_rho is OK", { - expect_is(rho_temp, "tbl_df") - expect_length(rho_temp, 7) - expect_named( - rho_temp, - c( - "item", "parameter", "mean", "median", "conf_level", "hpdi", - "central_interval" - ) - ) - expect_equivalent(sapply(rho_temp, length), rep(10, 7)) + expect_is(rho_temp, "tbl_df") + expect_length(rho_temp, 7) + expect_named( + rho_temp, + c( + "item", "parameter", "mean", "median", "conf_level", "hpdi", + "central_interval" + ) + ) + expect_equivalent(sapply(rho_temp, length), rep(10, 7)) }) # posterior for alpha -alpha_samples_table = data.frame( - iteration = 1:N , value = test$alpha_samples[,Time+1] +alpha_samples_table <- data.frame( + iteration = 1:N, value = test$alpha_samples[, Time + 1] ) # posterior confidence intervals -alpha_posterior_intervals = compute_posterior_intervals_alpha( - output = test$alpha_samples[,Time+1], nmc = N, burnin = 0 +alpha_posterior_intervals <- compute_posterior_intervals_alpha( + output = test$alpha_samples[, Time + 1], nmc = N, burnin = 0 ) test_that("Output of compute_posterior_intervals_alpha is OK", { - expect_is(alpha_posterior_intervals, "tbl_df") - expect_length(alpha_posterior_intervals, 6) - expect_named( - alpha_posterior_intervals, - c( - "parameter", "mean", "median", "conf_level", "hpdi", - "central_interval" - ) - ) - expect_equivalent(sapply(alpha_posterior_intervals, length), rep(1, 6)) + expect_is(alpha_posterior_intervals, "tbl_df") + expect_length(alpha_posterior_intervals, 6) + expect_named( + alpha_posterior_intervals, + c( + "parameter", "mean", "median", "conf_level", "hpdi", + "central_interval" + ) + ) + expect_equivalent(sapply(alpha_posterior_intervals, length), rep(1, 6)) }) context("SMC complete rankings: breakdown") test_that("get_mallows_loglik() in smc_mallows_new_users_complete() works", { - # ======================================================== # - # Setup # - # ======================================================== # - - # Basic elements ----------------------------------------- # - data <- sushi_rankings[1:100, ] - n_users <- nrow(data) - n_items <- ncol(sushi_rankings) - Time <- nrow(data) / num_new_obs - num_new_obs <- 10 - N <- 100 - - # rho_samples and alpha_samples -------------------------- # - rho_samples <- array(data=0, dim=c(N, n_items, (n_users + Time + 1))) - for (ii in seq_len(N)){ - rho_samples[ii, , 1] <- sample(seq_len(n_items), n_items, replace=FALSE) - } - alpha_samples <- matrix(nrow=N, ncol=(n_items + Time + 1)) - alpha_samples[, 1] <- rexp(N, rate=1) - - # logz_estimate ------------------------------------------ # - alpha_vector <- seq(from = 0, to = 15, by = 1) - iter <- 3e2 - degree <- 10 - logz_estimate <- estimate_partition_function( - method="importance_sampling", alpha_vector=alpha_vector, - n_items=n_items, metric=metric, nmc=iter, degree=degree - ) - - num_obs <- 0 - out_loglik <- vector(mode="numeric", length=Time) - for (tt in seq_len(Time)) { - num_obs <- num_obs + num_new_obs - new_observed_rankings <- data[(num_obs - num_new_obs + 1):num_obs, ] - rho_samples[, , tt + 1] <- rho_samples[, , tt] - alpha_samples[, tt + 1] <- alpha_samples[, tt] - alpha_samples_ii <- alpha_samples[ii, tt + 1] - rho_samples_ii <- rho_samples[ii, , tt + 1] - for (ii in seq_len(N)) { - log_z_alpha <- BayesMallows:::get_partition_function( - n_items, alpha_samples_ii, NULL, logz_estimate, metric - ) - log_likelihood <- get_mallows_loglik( - alpha_samples_ii, t(rho_samples_ii), n_items, - new_observed_rankings, metric - ) - } - out_loglik[tt] <- log_likelihood - } - - # ======================================================== # - # Test # - # ======================================================== # - tolerance <- 0.1 - expect_gt(max(out_loglik), mean(out_loglik) * (1 + tolerance)) - expect_lt(min(out_loglik), mean(out_loglik) * (1 - tolerance)) + # ======================================================== # + # Setup # + # ======================================================== # + + # Basic elements ----------------------------------------- # + data <- sushi_rankings[1:100, ] + n_users <- nrow(data) + n_items <- ncol(sushi_rankings) + Time <- nrow(data) / num_new_obs + num_new_obs <- 10 + N <- 100 + + # rho_samples and alpha_samples -------------------------- # + rho_samples <- array(data = 0, dim = c(N, n_items, (n_users + Time + 1))) + for (ii in seq_len(N)) { + rho_samples[ii, , 1] <- sample(seq_len(n_items), n_items, replace = FALSE) + } + alpha_samples <- matrix(nrow = N, ncol = (n_items + Time + 1)) + alpha_samples[, 1] <- rexp(N, rate = 1) + + # logz_estimate ------------------------------------------ # + alpha_vector <- seq(from = 0, to = 15, by = 1) + iter <- 3e2 + degree <- 10 + logz_estimate <- estimate_partition_function( + method = "importance_sampling", alpha_vector = alpha_vector, + n_items = n_items, metric = metric, nmc = iter, degree = degree + ) + + num_obs <- 0 + out_loglik <- vector(mode = "numeric", length = Time) + for (tt in seq_len(Time)) { + num_obs <- num_obs + num_new_obs + new_observed_rankings <- data[(num_obs - num_new_obs + 1):num_obs, ] + rho_samples[, , tt + 1] <- rho_samples[, , tt] + alpha_samples[, tt + 1] <- alpha_samples[, tt] + alpha_samples_ii <- alpha_samples[ii, tt + 1] + rho_samples_ii <- rho_samples[ii, , tt + 1] + for (ii in seq_len(N)) { + log_z_alpha <- BayesMallows:::get_partition_function( + n_items, alpha_samples_ii, NULL, logz_estimate, metric + ) + log_likelihood <- get_mallows_loglik( + alpha_samples_ii, t(rho_samples_ii), n_items, + new_observed_rankings, metric + ) + } + out_loglik[tt] <- log_likelihood + } + + # ======================================================== # + # Test # + # ======================================================== # + tolerance <- 0.1 + expect_gt(max(out_loglik), mean(out_loglik) * (1 + tolerance)) + expect_lt(min(out_loglik), mean(out_loglik) * (1 - tolerance)) }) diff --git a/tests/testthat/test-smc_mallows_new_item_rank.R b/tests/testthat/test-smc_mallows_new_item_rank.R index c880b6e3..400375ba 100644 --- a/tests/testthat/test-smc_mallows_new_item_rank.R +++ b/tests/testthat/test-smc_mallows_new_item_rank.R @@ -1,4 +1,4 @@ -context('SMC new user and item rank combined') +context("SMC new user and item rank combined") # a simpler example to test ==================================================== set.seed(101) @@ -32,78 +32,78 @@ lambda <- 0.15 alpha_max <- 1e6 test_that("Produces the wrong metric and aug_method error", { - expect_error( - smc_mallows_new_item_rank_alpha_fixed( - alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, - metric = "cayley", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "pseudolikelihood" - ) - ) - expect_error( - smc_mallows_new_item_rank( - n_items = n_items, R_obs = sample_dataset, - metric = "cayley", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "pseudolikelihood" - ) - ) + expect_error( + smc_mallows_new_item_rank_alpha_fixed( + alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, + metric = "cayley", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "pseudolikelihood" + ) + ) + expect_error( + smc_mallows_new_item_rank( + n_items = n_items, R_obs = sample_dataset, + metric = "cayley", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "pseudolikelihood" + ) + ) }) test_that("Runs with unif kernel", { - smc_unif_alpha_fixed_unif <- suppressMessages( - smc_mallows_new_item_rank_alpha_fixed( - alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, - metric = "footrule", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "random" - ) - ) - expect_is(smc_unif_alpha_fixed_unif, "list") - expect_length(smc_unif_alpha_fixed_unif, 1) - expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) - smc_unif <- suppressMessages( - smc_mallows_new_item_rank( - n_items = n_items, R_obs = sample_dataset, - metric = "footrule", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "random" - ) - ) - expect_is(smc_unif, "list") - expect_length(smc_unif, 2) - expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) - expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) + smc_unif_alpha_fixed_unif <- suppressMessages( + smc_mallows_new_item_rank_alpha_fixed( + alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, + metric = "footrule", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "random" + ) + ) + expect_is(smc_unif_alpha_fixed_unif, "list") + expect_length(smc_unif_alpha_fixed_unif, 1) + expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) + smc_unif <- suppressMessages( + smc_mallows_new_item_rank( + n_items = n_items, R_obs = sample_dataset, + metric = "footrule", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "random" + ) + ) + expect_is(smc_unif, "list") + expect_length(smc_unif, 2) + expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) + expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) }) test_that("Runs with pseudo kernel", { - smc_unif_alpha_fixed_unif <- suppressMessages( - smc_mallows_new_item_rank_alpha_fixed( - alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, - metric = "footrule", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "pseudolikelihood" - ) - ) - expect_is(smc_unif_alpha_fixed_unif, "list") - expect_length(smc_unif_alpha_fixed_unif, 1) - expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) - smc_unif <- suppressMessages( - smc_mallows_new_item_rank( - n_items = n_items, R_obs = sample_dataset, - metric = "footrule", leap_size = leap_size, N = N, Time = Time, - logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, - alpha_prop_sd = alpha_prop_sd, lambda = lambda, - alpha_max = alpha_max, aug_method = "pseudolikelihood" - ) - ) - expect_is(smc_unif, "list") - expect_length(smc_unif, 2) - expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) - expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) + smc_unif_alpha_fixed_unif <- suppressMessages( + smc_mallows_new_item_rank_alpha_fixed( + alpha = alpha_0, n_items = n_items, R_obs = sample_dataset, + metric = "footrule", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "pseudolikelihood" + ) + ) + expect_is(smc_unif_alpha_fixed_unif, "list") + expect_length(smc_unif_alpha_fixed_unif, 1) + expect_equal(dim(smc_unif_alpha_fixed_unif$rho_samples), c(N, 6, 31)) + smc_unif <- suppressMessages( + smc_mallows_new_item_rank( + n_items = n_items, R_obs = sample_dataset, + metric = "footrule", leap_size = leap_size, N = N, Time = Time, + logz_estimate = logz_estimate, mcmc_kernel_app = mcmc_kernel_app, + alpha_prop_sd = alpha_prop_sd, lambda = lambda, + alpha_max = alpha_max, aug_method = "pseudolikelihood" + ) + ) + expect_is(smc_unif, "list") + expect_length(smc_unif, 2) + expect_equal(dim(smc_unif$rho_samples), c(N, 6, 31)) + expect_equal(dim(smc_unif$alpha_samples), c(N, 31)) }) diff --git a/tests/testthat/test-smc_mallows_partial_rankings.R b/tests/testthat/test-smc_mallows_partial_rankings.R index 18dec112..ce1f8dc4 100644 --- a/tests/testthat/test-smc_mallows_partial_rankings.R +++ b/tests/testthat/test-smc_mallows_partial_rankings.R @@ -58,10 +58,10 @@ test_that("BayesMallows MCMC Results are OK", { ) post_rho <- compute_posterior_intervals(bm_mcmc, parameter = "rho") post_alpha <- compute_posterior_intervals(bm_mcmc, parameter = "alpha") - expect_equal(dim(post_rho) , c(10, 7)) - expect_equal(dim(rho_cp) , c(10, 3)) - expect_equal(dim(rho_map) , c(10, 3)) - expect_equal(dim(post_alpha), c(1 , 6)) + expect_equal(dim(post_rho), c(10, 7)) + expect_equal(dim(rho_cp), c(10, 3)) + expect_equal(dim(rho_map), c(10, 3)) + expect_equal(dim(post_alpha), c(1, 6)) }) # SMC Analysis (alpha unknown) =========================== @@ -147,11 +147,11 @@ test_that("Runs with unif kernel", { expect_equal(dim(smc_unif$alpha_samples), c(N, 21)) expect_s3_class( - plot_alpha_posterior(smc_unif$alpha_samples[, Time+ 1], nmc = N, burnin = 2), + plot_alpha_posterior(smc_unif$alpha_samples[, Time + 1], nmc = N, burnin = 2), "ggplot") expect_s3_class( - plot_rho_posterior(smc_unif$rho_samples[, ,Time+ 1], nmc = N, burnin = 2, C = 1), + plot_rho_posterior(smc_unif$rho_samples[, , Time + 1], nmc = N, burnin = 2, C = 1), "ggplot") }) diff --git a/tests/testthat/test-smc_pseudolikelihood.R b/tests/testthat/test-smc_pseudolikelihood.R index 3790f744..3fd78ef8 100644 --- a/tests/testthat/test-smc_pseudolikelihood.R +++ b/tests/testthat/test-smc_pseudolikelihood.R @@ -41,7 +41,7 @@ test_1_forward <- calculate_forward_probability( item_ordering = item_ordering, partial_ranking = partial_ranking, remaining_set = remaining_set, rho = rho, alpha = alpha, n_items = n_items, metric = metric -) # TODO #116: get this to output aug_ranking == c(1, 2, 3, 6, 5, 4). +) # Tried all combinations of item_ordering. No dice. current_ranking <- c(1, 2, 6, 5, 4, 3) @@ -52,12 +52,8 @@ test_1_backward_a <- calculate_backward_probability( alpha = alpha, n_items = n_items, metric = metric ) -new_current_ranking <- test_1_forward$aug_ranking # c(1, 2, 3, 4, 6, 5) +new_current_ranking <- test_1_forward$aug_ranking -# new_current_ranking needs to be one of the following so that test_1_backward_b -# equals test_1_forward$forward_prob: -# - c(1, 2, 3, 6, 5, 4) -# - c(1, 2, 6, 4, 3, 5) test_1_backward_b <- calculate_backward_probability( item_ordering = item_ordering, partial_ranking = partial_ranking, current_ranking = new_current_ranking, remaining_set = remaining_set, rho = rho, diff --git a/tests/testthat/test-smc_uniform.R b/tests/testthat/test-smc_uniform.R index e4075bc3..9680f854 100644 --- a/tests/testthat/test-smc_uniform.R +++ b/tests/testthat/test-smc_uniform.R @@ -5,10 +5,10 @@ require("BayesMallows") # tests for M-H_aug_ranking function =========================================== -rho = c(1,2,3,4,5,6) -alpha = 2 -metric = "footrule" -n_items= 6 +rho <- c(1, 2, 3, 4, 5, 6) +alpha <- 2 +metric <- "footrule" +n_items <- 6 test_that("MH-aug ranking works", { @@ -18,11 +18,11 @@ test_that("MH-aug ranking works", { set.seed(584) test_1 <- metropolis_hastings_aug_ranking( current_ranking = R_curr, - partial_ranking = R_obs, - alpha = alpha, + partial_ranking = R_obs, + alpha = alpha, rho = rho, - n_items = n_items, - metric = metric + n_items = n_items, + metric = metric ) expect_equal(test_1, as.matrix(c(1, 2, 3, 6, 5, 4))) expect_equal(get_rank_distance(rho, test_1, metric = "ulam"), 2) @@ -42,7 +42,7 @@ test_that("MH-aug ranking works", { # One missing rank --------------------------------------- # R_curr <- c(1, 2, 3, 6, 5, 4) R_obs <- c(1, 2, 3, 6, 5, NA) - set.seed(545) + set.seed(545) test_3 <- metropolis_hastings_aug_ranking( current_ranking = R_curr, partial_ranking = R_obs, alpha = alpha, rho = rho, n_items = n_items, metric = metric @@ -59,7 +59,7 @@ test_that("correction_kernel works", { # Three missing ranks ------------------------------------ # R_curr <- c(1, 2, 3, 4, 5, 6) R_obs <- c(1, 2, 3, NA, NA, NA) - set.seed(879) + set.seed(879) test_4 <- correction_kernel(R_obs, R_curr, n_items) expect_equal(test_4$ranking, as.matrix(c(1, 2, 3, 6, 4, 5))) expect_equal(test_4$correction_prob, 1 / 6) @@ -68,7 +68,7 @@ test_that("correction_kernel works", { # Two missing ranks -------------------------------------- # R_curr <- c(1, 2, 3, 4, 5, 6) R_obs <- c(1, 2, 3, 5, NA, NA) - set.seed(706) + set.seed(706) test_5 <- correction_kernel(R_obs, R_curr, n_items) expect_equal(test_5$ranking, as.matrix(c(1, 2, 3, 5, 4, 6))) expect_equal(test_5$correction_prob, 0.5) @@ -77,7 +77,7 @@ test_that("correction_kernel works", { # No missing ranks --------------------------------------- # R_curr <- c(1, 2, 3, 4, 5, 6) R_obs <- c(1, 2, 3, 4, 5, 6) - set.seed(731) + set.seed(731) test_6 <- correction_kernel(R_obs, R_curr, n_items) expect_equal(test_6$ranking, as.matrix(c(1, 2, 3, 4, 5, 6))) expect_equal(test_6$correction_prob, 1) diff --git a/tests/testthat/test-transitive_closure.R b/tests/testthat/test-transitive_closure.R index 37590901..a07ee05f 100644 --- a/tests/testthat/test-transitive_closure.R +++ b/tests/testthat/test-transitive_closure.R @@ -31,7 +31,7 @@ pair_comp_tc <- tribble( class(pair_comp_tc) <- c("BayesMallowsTC", class(pair_comp_tc)) -test_that("transitive closure generation works",{ +test_that("transitive closure generation works", { pair_comp_returned <- generate_transitive_closure(pair_comp) %>% arrange(assessor, bottom_item, top_item) @@ -41,10 +41,8 @@ test_that("transitive closure generation works",{ } ) -test_that("transitive closure generation discovers inconsistencies",{ +test_that("transitive closure generation discovers inconsistencies", { pair_comp_inc <- bind_rows(pair_comp, tibble(assessor = 1, bottom_item = 5L, top_item = 2L)) expect_error(invisible(capture.output(generate_transitive_closure(pair_comp_inc)))) }) - -