Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prepare standalone c++ version with interface #41

Merged
merged 15 commits into from
Nov 13, 2019
38 changes: 14 additions & 24 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

#' fits a kernel density estimate and calculates the effective degrees of
#' freedom.
#' @param x vector of observations.
#' @param x vector of observations; catergorical data must be converted to
#' non-negative integers.
#' @param nlevels the number of factor levels; 0 for continuous data.
#' @param bw the bandwidth parameter.
#' @param xmin lower bound for the support of the density, `NaN` means no
#' boundary.
Expand All @@ -13,47 +15,35 @@
#' @return `An Rcpp::List` containing the fitted density values on a grid and
#' additional information.
#' @noRd
fit_kde1d_cpp <- function(x, bw, xmin, xmax, deg, weights) {
.Call('_kde1d_fit_kde1d_cpp', PACKAGE = 'kde1d', x, bw, xmin, xmax, deg, weights)
fit_kde1d_cpp <- function(x, nlevels, bw, mult, xmin, xmax, deg, weights) {
.Call('_kde1d_fit_kde1d_cpp', PACKAGE = 'kde1d', x, nlevels, bw, mult, xmin, xmax, deg, weights)
}

#' computes the pdf of a kernel density estimate by interpolation.
#' @param x vector of evaluation points.
#' @param R_object the fitted object passed from R.
#' @param kde1d_r the fitted object passed from R.
#' @return a vector of pdf values.
#' @noRd
dkde1d_cpp <- function(x, R_object) {
.Call('_kde1d_dkde1d_cpp', PACKAGE = 'kde1d', x, R_object)
dkde1d_cpp <- function(x, kde1d_r) {
.Call('_kde1d_dkde1d_cpp', PACKAGE = 'kde1d', x, kde1d_r)
}

#' computes the cdf of a kernel density estimate by numerical integration.
#' @param x vector of evaluation points.
#' @param R_object the fitted object passed from R.
#' @param kde1d_r the fitted object passed from R.
#' @return a vector of cdf values.
#' @noRd
pkde1d_cpp <- function(x, R_object) {
.Call('_kde1d_pkde1d_cpp', PACKAGE = 'kde1d', x, R_object)
pkde1d_cpp <- function(q, kde1d_r) {
.Call('_kde1d_pkde1d_cpp', PACKAGE = 'kde1d', q, kde1d_r)
}

#' computes the quantile of a kernel density estimate by numerical inversion
#' (bisection method).
#' @param x vector of evaluation points.
#' @param R_object the fitted object passed from R.
#' @param kde1d_r the fitted object passed from R.
#' @return a vector of quantiles.
#' @noRd
qkde1d_cpp <- function(x, R_object) {
.Call('_kde1d_qkde1d_cpp', PACKAGE = 'kde1d', x, R_object)
}

#' @param x vector of observations
#' @param bw bandwidth parameter, NA for automatic selection.
#' @param mult bandwidth multiplier.
#' @param discrete whether a jittered estimate is computed.
#' @param weights vector of weights for each observation (can be empty).
#' @param deg polynomial degree.
#' @return the selected bandwidth
#' @noRd
select_bw_cpp <- function(x, bw, mult, discrete, weights, deg) {
.Call('_kde1d_select_bw_cpp', PACKAGE = 'kde1d', x, bw, mult, discrete, weights, deg)
qkde1d_cpp <- function(p, kde1d_r) {
.Call('_kde1d_qkde1d_cpp', PACKAGE = 'kde1d', p, kde1d_r)
}

48 changes: 6 additions & 42 deletions R/kde1d_methods.R → R/kde1d-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,62 +32,26 @@
#' @export
dkde1d <- function(x, obj) {
x <- prep_eval_arg(x, obj)

# adjust grid to stabilize estimates
rng <- diff(range(obj$grid_points))
if (!is.nan(obj$xmin))
obj$grid_points[1] <- obj$xmin - 0.1 * rng
if (!is.nan(obj$xmax))
obj$grid_points[length(obj$grid_points)] <- obj$xmax + 0.1 * rng

fhat <- dkde1d_cpp(as.numeric(x), obj)
if (is.ordered(obj$x)) {
# for discrete variables we can normalize
f_all <- dkde1d_cpp(seq_along(levels(obj$x)), obj)
fhat <- fhat / sum(f_all)
}

as.vector(fhat)
dkde1d_cpp(as.numeric(x), obj)
}

#' @param q vector of quantiles.
#' @rdname dkde1d
#' @export
pkde1d <- function(q, obj) {
q <- prep_eval_arg(q, obj)

if (is.numeric(obj$x)) {
p <- pkde1d_cpp(q, obj)
} else {
if (!is.ordered(q)) {
q <- ordered(q, levels(obj$x))
}
x_all <- ordered(levels(obj$x), levels(obj$x))
p_all <- dkde1d(x_all, obj)
p_total <- sum(p_all)
p <- sapply(q, function(y) sum(p_all[x_all <= y] / p_total))
p <- pmin(pmax(p, 0), 1)
}

p
pkde1d_cpp(q, obj)
}

#' @param p vector of probabilities.
#' @rdname dkde1d
#' @export
qkde1d <- function(p, obj) {
stopifnot(all(na.omit(p) > 0.0) & all(na.omit(p) < 1.0))
if (is.numeric(obj$x)) {
q <- qkde1d_cpp(p, obj)
} else {
## for discrete variables compute quantile from the density
x_all <- ordered(levels(obj$x), levels(obj$x))
# pdf at all possible values of x
pp <- pkde1d(x_all, obj)
# generalized inverse
q <- x_all[vapply(p, function(y) which(y <= pp)[1], integer(1))]
q <- qkde1d_cpp(p, obj)
if (is.ordered(obj$x)) {
## for discrete variables, add factor levels
q <- ordered(levels(obj$x)[q + 1], levels(obj$x))
}

q
}

Expand Down
31 changes: 11 additions & 20 deletions R/kde1d.R
Original file line number Diff line number Diff line change
Expand Up @@ -98,36 +98,27 @@
#' lines(kde1d(x), col = 2)
#' @importFrom stats na.omit
#' @export
kde1d <- function(x, xmin = NaN, xmax = NaN, mult = 1, bw = NA,
deg = 2, weights = numeric(0)) {
kde1d <- function(x, xmin = NaN, xmax = NaN, mult = 1, bw = NA, deg = 2,
weights = numeric(0)) {
x <- na.omit(x)
# sanity checks
check_arguments(x, mult, xmin, xmax, bw, deg, weights)
w_norm <- weights / mean(weights)

# equidistant jittering for discrete variables
x_jtr <- equi_jitter(x)

# bandwidth selection
bw <- select_bw_cpp(
boundary_transform(na.omit(x_jtr), xmin, xmax),
bw,
mult,
is.ordered(x),
w_norm,
deg
)

# fit model
fit <- fit_kde1d_cpp(na.omit(x_jtr), bw, xmin, xmax, deg, w_norm)
fit <- fit_kde1d_cpp(x = if (is.numeric(x)) x else (as.numeric(x) - 1),
nlevels = length(levels(x)),
bw = bw,
mult = mult,
xmin = xmin,
xmax = xmax,
deg = deg,
weights = weights)

# add info
fit$var_name <- as.character(match.call()[2])
fit$nobs <- length(x)
fit$nobs <- sum(!is.na(x))
fit$weights <- weights
fit$x <- x

# return as kde1d object
class(fit) <- "kde1d"
fit
}
6 changes: 3 additions & 3 deletions R/tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,15 @@ prep_eval_arg <- function(x, obj) {
if (is.data.frame(x))
x <- x[[1]]
if (!is.ordered(x) & is.ordered(obj$x))
x <- ordered(x, levels(obj$x))
x <- as.ordered(x)
if (is.numeric(x))
return(x)

stopifnot(is.ordered(x))
if (!all(levels(x) %in% levels(obj$x)))
stop("'x' contains levels that were not observed in the data.")
stop("'x' contains levels that weren't present when fitting.")
levels(x) <- levels(obj$x)
if (!is.ordered(x) & is.ordered(obj$x))
x <- ordered(x, levels(obj$x))
x
as.numeric(x) - 1
}
33 changes: 33 additions & 0 deletions inst/include/kde1d-wrappers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include "kde1d/kde1d.hpp"

namespace kde1d {

inline Rcpp::List kde1d_wrap(const Kde1d& kde1d_cpp)
{
auto kde1d_r = Rcpp::List::create(
Rcpp::Named("grid_points") = kde1d_cpp.get_grid_points(),
Rcpp::Named("values") = kde1d_cpp.get_values(),
Rcpp::Named("nlevels") = kde1d_cpp.get_nlevels(),
Rcpp::Named("bw") = kde1d_cpp.get_bw(),
Rcpp::Named("xmin") = kde1d_cpp.get_xmin(),
Rcpp::Named("xmax") = kde1d_cpp.get_xmax(),
Rcpp::Named("deg") = kde1d_cpp.get_deg(),
Rcpp::Named("edf") = kde1d_cpp.get_edf(),
Rcpp::Named("loglik") = kde1d_cpp.get_loglik()
);
kde1d_r.attr("class") = "kde1d";

return kde1d_r;
}

inline Kde1d kde1d_wrap(const Rcpp::List& kde1d_r)
{
auto grid = interp::InterpolationGrid1d(
kde1d_r["grid_points"], kde1d_r["values"], 0);
return Kde1d(grid, kde1d_r["nlevels"], kde1d_r["xmin"], kde1d_r["xmax"]);
}


}
8 changes: 8 additions & 0 deletions inst/include/dpik.hpp → inst/include/kde1d/dpik.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#define _USE_MATH_DEFINES
#include <cmath>

namespace kde1d {

namespace bw {

//! Bandwidth selection for local-likelihood density estimation.
//! Methodology is similar to Sheather and Jones(1991), but asymptotic
//! bias/variance expressions are adapted for higher-order polynomials and
Expand Down Expand Up @@ -232,3 +236,7 @@ inline double PluginBandwidthSelector::select_bw(size_t deg)
return bw;
}

} // end kde1d::bw

} // end kde1d

Loading