Skip to content

Commit

Permalink
First version of the VLMC for collections of time series (issue #30).
Browse files Browse the repository at this point in the history
  • Loading branch information
fabrice-rossi committed Feb 16, 2024
1 parent cc00941 commit 055148f
Show file tree
Hide file tree
Showing 9 changed files with 499 additions and 1 deletion.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ S3method(logLik,covlmc)
S3method(logLik,vlmc)
S3method(logLik,vlmc_cpp)
S3method(loglikelihood,covlmc)
S3method(loglikelihood,multi_vlmc)
S3method(loglikelihood,vlmc)
S3method(loglikelihood,vlmc_cpp)
S3method(metrics,covlmc)
Expand Down Expand Up @@ -111,6 +112,7 @@ S3method(print,vlmc_cpp)
S3method(probs,ctx_node)
S3method(probs,ctx_node_cpp)
S3method(prune,covlmc)
S3method(prune,multi_vlmc)
S3method(prune,vlmc)
S3method(prune,vlmc_cpp)
S3method(restore_model,ctx_tree_cpp)
Expand Down Expand Up @@ -158,6 +160,7 @@ export(merged_with)
export(metrics)
export(model)
export(multi_ctx_tree)
export(multi_vlmc)
export(parent)
export(positions)
export(prune)
Expand Down
38 changes: 38 additions & 0 deletions R/vlmc_likelihood_multi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#' @inherit loglikelihood
#' @param newdata either a discrete time series or a list of discrete time
#' series
#' @description This function evaluates the log-likelihood of a VLMC fitted on a
#' collection of discrete time series.
#'
#' @section Limitation:
#'
#' VLMC fitted via [multi_vlmc()] on a collection discrete time series do not
#' support likelihood calculation with `newdata`.
#'
#' @seealso [multi_vlmc()]
#' @export
loglikelihood.multi_vlmc <- function(vlmc, newdata, initial = c("truncated", "specific", "extended"),
ignore, ...) {
assertthat::assert_that(!missing(newdata))
if (!is.list(newdata)) {
NextMethod()
} else {
initial <- match.arg(initial)
ll <- 0
nb_obs <- 0L
for (k in seq_along(newdata)) {
if (missing(ignore)) {
the_ll <- loglikelihood(vlmc, newdata[[k]], initial, ...)
} else {
the_ll <- loglikelihood(vlmc, newdata[[k]], initial, ignore, ...)
}
ll <- ll + as.numeric(the_ll)
nb_obs <- nb_obs + attr(the_ll, "nobs")
}
df <- attr(the_ll, "df")
attr(ll, "df") <- df
attr(ll, "nobs") <- nb_obs
attr(ll, "initial") <- initial
structure(ll, class = c("logLikMixVLMC", "logLik"))
}
}
106 changes: 106 additions & 0 deletions R/vlmc_multi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
## VLMC for multiple series

#' Fit a Variable Length Markov Chain (VLMC) to a collection of time series
#'
#' This function fits a Variable Length Markov Chain (VLMC) to a collection of
#' discrete time series.
#'
#' Owing to the iterative nature of the construction, this function may use a large
#' quantity of memory as pruning infrequent contexts is only done after
#' computing all of them. It is therefore recommend to avoid large depths and
#' the default value of `max_depth` is smaller than in the single time series
#' function [vlmc()].
#'
#' @param xs list of discrete times series
#' @param alpha number in (0,1] (default: 0.05) cut off value in quantile scale
#' in the pruning phase.
#' @param cutoff non negative number: cut off value in native (likelihood ratio)
#' scale in the pruning phase. Defaults to the value obtained from `alpha`.
#' Takes precedence over `alpha` is specified.
#' @param min_size integer >= 1 (default: 2). Minimum number of observations for
#' a context in the growing phase of the context tree.
#' @param max_depth integer >= 1 (default: 25). Longest context considered in
#' growing phase of the context tree.
#' @param prune logical: specify whether the context tree should be pruned
#' (default behaviour).
#' @param keep_match logical: specify whether to keep the context matches
#' (default to FALSE)
#' @returns a fitted vlmc model (of class `multi_vlmc`)
#' @examples
#' pc <- powerconsumption[powerconsumption$week %in% 5:8, ]
#' powerlevels <- quantile(pc$active_power, probs = c(0.25, 0.5, 0.75, 1))
#' dts <- lapply(
#' 5:8,
#' function(x) {
#' cut(pc$active_power[pc$week == x],
#' breaks = c(0, powerlevels)
#' )
#' }
#' )
#' model <- multi_vlmc(dts, max_depth = 3)
#' draw(model)
#' depth(model)
#' @export
#' @seealso [multi_ctx_tree()], [vlmc()]
multi_vlmc <- function(xs, alpha = 0.05, cutoff = NULL, min_size = 2L, max_depth = 25L,
prune = TRUE, keep_match = FALSE) {
## keep_match=TRUE is currently not supported
assertthat::assert_that(!keep_match)
assertthat::assert_that(is.list(xs))
ctx_tree <- multi_ctx_tree(xs,
min_size = min_size, max_depth = max_depth,
keep_position = keep_match
)
if (is.null(cutoff)) {
if (is.null(alpha) || !is.numeric(alpha) || alpha <= 0 || alpha > 1) {
stop("the alpha parameter must be in (0, 1]")
}
cutoff <- to_native(alpha, length(ctx_tree$vals))
} else {
## cutoff takes precedence
if (!is.numeric(cutoff) || cutoff < 0) {
stop("the cutoff parameter must be a non negative number")
}
alpha <- to_quantile(cutoff, length(ctx_tree$vals))
}
if (prune) {
result <- prune_ctx_tree(ctx_tree, alpha = alpha, cutoff = cutoff)
class(result) <- c("multi_vlmc", class(result))
} else {
result <- new_ctx_tree(ctx_tree$vals, ctx_tree, class = c("multi_vlmc", "vlmc"))
}
result$alpha <- alpha
result$cutoff <- cutoff
result$keep_match <- keep_match
result$data_size <- sum(lengths(xs, use.names = FALSE))
result$pruned <- prune
result
}

#' @rdname prune
#' @export
prune.multi_vlmc <- function(vlmc, alpha = 0.05, cutoff = NULL, ...) {
if (is.null(cutoff)) {
if (is.null(alpha) || !is.numeric(alpha) || alpha <= 0 || alpha > 1) {
stop("the alpha parameter must be in (0, 1]")
}
}
result <- prune_ctx_tree(vlmc,
alpha = alpha, cutoff = cutoff,
class = c("multi_vlmc", "vlmc")
)
if (is.null(cutoff)) {
cutoff <- to_native(alpha, length(vlmc$vals))
} else {
## cutoff takes precedence
alpha <- to_quantile(cutoff, length(vlmc$vals))
}
result$alpha <- alpha
result$cutoff <- cutoff
result$data_size <- vlmc$data_size
result$keep_match <- vlmc$keep_match
## preserve the construction information
result$max_depth <- vlmc$max_depth
result$pruned <- TRUE
result
}
137 changes: 137 additions & 0 deletions R/vlmc_tune_multi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
tune_multi_vlmc <- function(xs, criterion = c("BIC", "AIC"),
initial = c("truncated", "specific", "extended"),
alpha_init = NULL, cutoff_init = NULL,
min_size = 2L, max_depth = 10L,
verbose = 0,
save = c("best", "initial", "all")) {
criterion <- match.arg(criterion)
initial <- match.arg(initial)
save <- match.arg(save)
data_sizes <- lengths(xs)
if (is.null(alpha_init) && is.null(cutoff_init)) {
if (criterion == "BIC") {
cutoff <- 0.25 * log(sum(data_sizes))
f_criterion <- stats::BIC
} else {
cutoff <- 1
f_criterion <- stats::AIC
}
} else {
if (is.null(cutoff_init)) {
if (is.null(alpha_init) || !is.numeric(alpha_init) || alpha_init <= 0 || alpha_init > 1) {
stop("the alpha_init parameter must be in (0, 1]")
}
## we need to compute the state model
nx <- to_dts(xs[[1]])
cutoff <- to_native(alpha_init, length(nx$vals))
} else {
## cutoff takes precedence
if (!is.numeric(cutoff_init) || cutoff_init < 0) {
stop("the cutoff_init parameter must be a non negative number")
}
cutoff <- cutoff_init
}
}
if (criterion == "BIC") {
f_criterion <- stats::BIC
} else {
f_criterion <- stats::AIC
}
if (verbose > 0) {
cat("Fitting a vlmc with max_depth=", max_depth, "and cutoff=", cutoff, "\n")
}
saved_models <- list()
base_model <- multi_vlmc(xs,
cutoff = cutoff, min_size = min_size,
max_depth = max_depth
)
while (base_model$max_depth) {
n_max_depth <- min(2 * max_depth, min(data_sizes) - 1)
if (n_max_depth > max_depth) {
if (verbose > 0) {
cat("Max depth reached, increasing it to", n_max_depth, "\n")
}
max_depth <- n_max_depth
base_model <- multi_vlmc(xs, cutoff = cutoff, min_size = min_size, max_depth = max_depth)
} else {
warning("cannot find a suitable value for max_depth")
break
}
}
if (verbose > 0) {
cat("Pruning phase\n")
}
cutoffs <- cutoff(base_model, scale = "native")
results <- data.frame(
cutoff = c(cutoff, cutoffs),
alpha = to_quantile(c(cutoff, cutoffs), length(states(base_model))),
depth = rep(NA, length(cutoffs) + 1),
nb_contexts = rep(NA, length(cutoffs) + 1),
loglikelihood = rep(NA, length(cutoffs) + 1),
AIC = rep(NA, length(cutoffs) + 1),
BIC = rep(NA, length(cutoffs) + 1)
)
k <- 1
model <- base_model
best_crit <- Inf
if (verbose > 0) {
cat("Initial criterion =", best_crit, "\n")
}
if (save == "all") {
all_models <- vector(mode = "list", length = length(cutoffs))
}
max_order <- depth(model)
repeat {
if (verbose > 0) {
cat("Computing loglikelihood\n")
}
if (initial == "truncated") {
ll <- loglikelihood(model, initial = "truncated", newdata = xs, ignore = max_order)
} else {
ll <- loglikelihood(model, initial = initial, newdata = xs)
}
crit <- f_criterion(ll)
if (crit <= best_crit) {
best_crit <- crit
best_model <- model
if (verbose > 0) {
cat(
"Improving criterion =", best_crit, "likelihood =", ll,
"df =", attr(ll, "df"),
"nobs = ", attr(ll, "nobs"), "\n"
)
}
}
results$depth[k] <- depth(model)
results$nb_contexts[k] <- context_number(model)
results$loglikelihood[k] <- ll
results$AIC[k] <- stats::AIC(ll)
results$BIC[k] <- stats::BIC(ll)
if (k <= length(cutoffs)) {
if (verbose > 0) {
cat("Pruning vlmc with cutoff =", cutoffs[k], "\n")
}
model <- prune(model, cutoff = cutoffs[k])
if (save == "all") {
all_models[[k]] <- model
}
k <- k + 1
} else {
break
}
}
pre_result <- list(
best_model = best_model,
best_ll = loglikelihood(best_model, newdata = xs),
criterion = criterion,
initial = initial,
results = results,
cutoffs = c(cutoff, cutoffs)
)
if (save == "all") {
pre_result[["saved_models"]] <- list(initial = base_model, all = all_models)
} else if (save == "initial") {
pre_result[["saved_models"]] <- list(initial = base_model)
}
structure(pre_result, class = "tune_vlmc")
}
2 changes: 2 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ reference:
desc: Functions used to estimate a (CO)VLMC model on a collection of time series rather than on a single one
- contents:
- multi_ctx_tree
- multi_vlmc
- loglikelihood.multi_vlmc
- title: Global options
- contents:
- mixvlmc-package
Expand Down
Loading

0 comments on commit 055148f

Please sign in to comment.