From ea6cf3135997e9600d6548dc99ff202d469c3a87 Mon Sep 17 00:00:00 2001 From: Thibault Vatter Date: Thu, 3 May 2018 18:17:26 -0400 Subject: [PATCH] consistent behavior for ordered variables (#19) * consistent behavior for ordered variables * add unit tests --- R/kde1d_methods.R | 27 ++++++++++++--------------- tests/testthat/test_kde1d.R | 12 ++++++++++++ 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/R/kde1d_methods.R b/R/kde1d_methods.R index ef3cd5f..e4b8ce2 100644 --- a/R/kde1d_methods.R +++ b/R/kde1d_methods.R @@ -37,14 +37,14 @@ dkde1d <- function(x, obj) { x <- x[[1]] if (!is.ordered(x)) stopifnot(!is.factor(x)) + if (length(obj$jitter_info$i_disc) == 1 & !is.ordered(x)) + x <- ordered(x, obj$jitter_info$levels$x) - x <- expand_as_numeric(x) fhat <- dkde1d_cpp(x, obj) if (length(obj$jitter_info$i_disc) == 1) { # for discrete variables we can normalize - x_all_num <- expand_as_numeric(as.ordered(obj$jitter_info$levels$x)) - f_all <- dkde1d_cpp(x_all_num, obj) + f_all <- dkde1d_cpp(as.ordered(obj$jitter_info$levels$x), obj) fhat <- fhat / sum(f_all) } @@ -60,15 +60,14 @@ pkde1d <- function(q, obj) { if (!is.ordered(q)) stopifnot(!is.factor(q)) - q <- expand_as_numeric(q) - if (length(obj$jitter_info$i_disc) != 1) { p <- pkde1d_cpp(q, obj) } else { - # for discrete variables we have to add the missing probability mass - x_all_num <- expand_as_numeric(as.ordered(obj$jitter_info$levels$x)) - f_all <- dkde1d(x_all_num, obj) - p <- sapply(q, function(y) sum(f_all[x_all_num <= y])) + if (!is.ordered(q)) + q <- ordered(q, obj$jitter_info$levels$x) + x_all <- as.ordered(obj$jitter_info$levels$x) + p_all <- dkde1d(x_all, obj) + p <- sapply(q, function(y) sum(p_all[x_all <= y])) p <- pmin(pmax(p, 0), 1) } @@ -81,19 +80,17 @@ pkde1d <- function(q, obj) { qkde1d <- function(p, obj) { stopifnot(all(na.omit(p) > 0.0) & all(na.omit(p) < 1.0)) if (length(obj$jitter_info$i_disc) != 1) { + ## qkde1d_cpp for continuous variables q <- qkde1d_cpp(p, obj) } else { ## for discrete variables compute quantile from the density - x_all_num <- expand_as_numeric(as.ordered(obj$jitter_info$levels$x)) + x_all <- as.ordered(obj$jitter_info$levels$x) # pdf at all possible values of x - dd <- dkde1d(x_all_num, obj) - pp <- c(cumsum(dd)) / sum(dd) + pp <- pkde1d(x_all, obj) # generalized inverse - q <- x_all_num[vapply(p, function(y) which(y <= pp)[1], integer(1))] - q <- ordered(obj$jitter_info$levels$x[q + 1], - levels = obj$jitter_info$levels$x) + q <- x_all[vapply(p, function(y) which(y <= pp)[1], integer(1))] } q diff --git a/tests/testthat/test_kde1d.R b/tests/testthat/test_kde1d.R index ae1a635..4f26eeb 100644 --- a/tests/testthat/test_kde1d.R +++ b/tests/testthat/test_kde1d.R @@ -108,4 +108,16 @@ test_that("other generics work", { } lapply(fits, test_other_generics) +}) + +test_that("behavior for discrete data is consistent", { + n <- 1e3 + x <- ordered(sample(5, n, TRUE), 1:5) + fit <- kde1d(x) + xx <- ordered(1:5, 1:5) + expect_equal(dkde1d(1:5, fit), dkde1d(xx, fit)) + expect_equal(pkde1d(1:5, fit), pkde1d(xx, fit)) + expect_true(all(is.na(dkde1d(c(0, 6), fit)))) + expect_true(all(is.na(pkde1d(c(0, 6), fit)))) + expect_true(all(rkde1d(n, fit) %in% x)) }) \ No newline at end of file