Skip to content

Commit

Permalink
Report contexts as lists of ctx_node for context trees (issue #50).
Browse files Browse the repository at this point in the history
  • Loading branch information
fabrice-rossi committed Oct 5, 2023
1 parent 267c4e3 commit fc76b58
Show file tree
Hide file tree
Showing 22 changed files with 297 additions and 131 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ S3method(predict,covlmc)
S3method(predict,vlmc)
S3method(prepare_covariate,data.frame)
S3method(prepare_covariate,matrix)
S3method(print,contexts)
S3method(print,covlmc)
S3method(print,ctx_node)
S3method(print,ctx_tree)
Expand Down
70 changes: 54 additions & 16 deletions R/contexts.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
path_list_extractor <- function(path, ct, vals, control, is_leaf, p_summary) {
path_list_extractor <- function(tree, path, ct, vals, control, is_leaf, p_summary) {
if (is_leaf) {
if (is.null(ct[["f_by"]])) {
NULL
Expand All @@ -18,7 +18,7 @@ no_summary <- function(ct) {
NULL
}

path_df_extractor <- function(path, ct, vals, control, is_leaf, p_summary) {
path_df_extractor <- function(tree, path, ct, vals, control, is_leaf, p_summary) {
if (is_leaf) {
if (is.null(ct[["f_by"]])) {
NULL
Expand Down Expand Up @@ -50,16 +50,16 @@ path_df_extractor <- function(path, ct, vals, control, is_leaf, p_summary) {
#' 1. it applies the summarize function to itself (summarize(ct)).
#' 2. it calls itself recursively on each child and gather the results.
#' A `NULL` result is discarded.
#' 2. it calls `extractor` on the current node `extractor(path, ct, vals, control, TRUE/FALSE, p_summary)`
#' 2. it calls `extractor` on the current node `extractor(tree, path, ct, vals, control, TRUE/FALSE, p_summary)`
#' the fourth parameter is TRUE for a leaf node (no children) and FALSE for another node.
#' 3. the result is aggregated with sub results if available
#'
#' `extractor` should only return a non NULL result if valid contexts can be extracted from the ct.
#' @noRd
rec_contexts_extractor <- function(path, ct, vals, extractor, control, summarize, p_summary) {
rec_contexts_extractor <- function(tree, path, ct, vals, extractor, control, summarize, p_summary) {
if (is.null(ct$children)) {
## this is a leaf
extractor(path, ct, vals, control, TRUE, p_summary)
extractor(tree, path, ct, vals, control, TRUE, p_summary)
} else {
all_ctx <- NULL
l_summary <- summarize(ct)
Expand All @@ -70,19 +70,21 @@ rec_contexts_extractor <- function(path, ct, vals, extractor, control, summarize
sub_path <- c(path, vals[v])
}
sub_ctx <- rec_contexts_extractor(
tree,
sub_path, ct$children[[v]], vals,
extractor, control, summarize, l_summary
)
all_ctx <- flex_append(all_ctx, sub_ctx)
}
local_ctx <- extractor(path, ct, vals, control, FALSE, p_summary)
local_ctx <- extractor(tree, path, ct, vals, control, FALSE, p_summary)
all_ctx <- flex_append(all_ctx, local_ctx)
all_ctx
}
}

contexts_extractor <- function(ct, reverse, extractor, control, summarize = no_summary) {
preres <- rec_contexts_extractor(NULL, ct, ct$vals, extractor, control, summarize, summarize(ct))
## ct is the tree (i.e. the root of the tree)
preres <- rec_contexts_extractor(ct, NULL, ct, ct$vals, extractor, control, summarize, summarize(ct))
if (is.data.frame(preres)) {
if (!reverse) {
new_res <- data.frame(context = I(lapply(preres$context, rev)))
Expand All @@ -97,11 +99,15 @@ contexts_extractor <- function(ct, reverse, extractor, control, summarize = no_s
}
preres
} else {
## we have a list of ctx_node
if (!reverse) {
preres <- lapply(preres, rev)
## reverse each ctx node
preres <- lapply(preres, \(x) rev(x))
}
if (is.null(preres[[length(preres)]])) {
preres[[length(preres)]] <- ct$vals[0]
## should never happen
stop("internal error in contexts_extractor")
## preres[[length(preres)]] <- ct$vals[0]
}
preres
}
Expand All @@ -113,11 +119,14 @@ contexts_extractor <- function(ct, reverse, extractor, control, summarize = no_s
#' contexts.
#'
#' The default behaviour consists in returning a list of all the contexts
#' contained in the tree (with `type="auto"` or `type="list"`). When
#' contained in the tree using `ctx_node` objects (as returned by e.g.
#' [find_sequence()]) (with `type="list"`). The properties of the contexts can
#' then be explored using adapted functions such as [counts()] and
#' [positions()]. The result list is of class `contexts`. When
#' `type="data.frame"`, the method returns a data.frame whose first column,
#' named `context`, contains the contexts. Other columns contain context
#' specific values which depend on the actual class of the tree and on
#' additional parameters. An adapted return type is chosen when type="auto"`.
#' named `context`, contains the contexts as vectors. Other columns contain
#' context specific values which depend on the actual class of the tree and on
#' additional parameters.
#'
#' @section State order in a context: Notice that contexts are given by default
#' in the "reverse" order used by the VLMC papers: older values are on the
Expand All @@ -130,15 +139,44 @@ contexts_extractor <- function(ct, reverse, extractor, control, summarize = no_s
#' @param reverse logical (defaults to TRUE). See details.
#' @param ... additional arguments for the contexts function.
#'
#' @returns The list of the contexts represented in this tree or a data.frame
#' with more content.
#' @returns The list of class `contexts` containing the contexts represented in
#' this tree (as `ctx_node`) or a data.frame.
#' @examples
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE)
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5)
#' contexts(dts_tree)
#' contexts(dts_tree, "data.frame", TRUE)
#' @seealso [contexts.ctx_tree()], [contexts.vlmc()], [contexts.covlmc()].
#' @export
contexts <- function(ct, type = c("auto", "list", "data.frame"), reverse = TRUE, ...) {
contexts <- function(ct, type = c("list", "data.frame"), reverse = TRUE, ...) {
UseMethod("contexts")
}

new_context_list <- function(ctx_list, ..., class = character()) {
structure(ctx_list, ..., class = c(class, "contexts", class(ctx_list)))
}

#' Print a context list
#'
#' This function prints a list of contexts i.e. a `contexts` object listing
#' `ctx_node` objects.
#'
#' @param x the `contexts` object to print
#' @param reverse specifies whether the contexts should be reported in reverse
#' temporal order (`TRUE`, default value) or in the temporal order (`FALSE`).
#' @param ... additional arguments for the print function.
#' @returns the `x` object, invisibly
#' @seealso [contexts()]
#' @export
#' @examples
#' dts <- c("A", "B", "C", "A", "A", "B", "B", "C", "C", "A")
#' dts_tree <- ctx_tree(dts, max_depth = 3)
#' print(contexts(dts_tree))
print.contexts <- function(x, reverse = TRUE, ...) {
cat("Contexts:\n")
for (i in seq_along(x)) {
the_seq <- as_sequence(x[[i]], reverse = reverse)
cat(" ", paste(the_seq, collapse = ", "), "\n", sep = "")
}
invisible(x)
}
6 changes: 3 additions & 3 deletions R/covlmc_contexts.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ covlmc_model_extractor <- function(res, model, control) {
flex_cbind(res, cores)
}

covlmc_context_extractor <- function(path, ct, vals, control, is_leaf, p_summary) {
covlmc_context_extractor <- function(tree, path, ct, vals, control, is_leaf, p_summary) {
if (is.null(ct[["model"]])) {
if (!is.null(ct[["merged_model"]])) {
res <- NULL
Expand All @@ -59,7 +59,7 @@ covlmc_context_extractor <- function(path, ct, vals, control, is_leaf, p_summary
} else {
sub_path <- c(path, vals[v])
}
l_res <- frequency_context_extractor(sub_path, ct$children[[v]], vals, control, is_leaf, p_summary)
l_res <- frequency_context_extractor(tree, sub_path, ct$children[[v]], vals, control, is_leaf, p_summary)
l_res <- covlmc_model_extractor(l_res, ct$merged_model, control)
if (isTRUE(control[["merging"]])) {
l_res$merged <- TRUE
Expand All @@ -71,7 +71,7 @@ covlmc_context_extractor <- function(path, ct, vals, control, is_leaf, p_summary
NULL
}
} else {
res <- frequency_context_extractor(path, ct, vals, control, is_leaf, p_summary)
res <- frequency_context_extractor(tree, path, ct, vals, control, is_leaf, p_summary)
if (!is.null(control[["model"]]) || isTRUE(control[["hsize"]]) ||
isTRUE(control[["metrics"]]) || isTRUE(control[["merging"]])) {
res <- covlmc_model_extractor(res, ct$model, control)
Expand Down
2 changes: 1 addition & 1 deletion R/covlmc_metrics.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
covlmc_predictive_extractor <- function(path, ct, vals, control, is_leaf, p_summary) {
covlmc_predictive_extractor <- function(tree, path, ct, vals, control, is_leaf, p_summary) {
if (is.null(ct[["model"]])) {
if (!is.null(ct[["merged_model"]])) {
data.frame(
Expand Down
61 changes: 38 additions & 23 deletions R/ctx_tree_contexts.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
frequency_context_extractor <-
function(path, ct, vals, control, is_leaf, p_summary) {
function(tree, path, ct, vals, control, is_leaf, p_summary) {
if ((is_leaf && !is.null(ct[["f_by"]])) ||
(!is_leaf && nb_sub_tree(ct) < length(vals))) {
if (is.null(control[["frequency"]])) {
Expand Down Expand Up @@ -62,17 +62,17 @@ frequency_context_extractor <-
#' be reported in a `positions` column of the result data frame. The
#' availability of the positions depends on the way the context
#' tree was built. See details for the definition of a position.
#' @details The default result for `type="auto"` (or `type="list"`) and
#' `frequency=NULL` is the list of all contexts.
#' @details The `frequency` and `positions` parameters influence only the
#' results when `type="data.frame"`. In this case the resulting `data.frame`
#' has a `context` column storing the contexts. If `frequency="total"`, an
#' additional column named `freq` gives the number of occurrences of each
#' context in the series used to build the tree. If `frequency="detailed"`,
#' one additional column is added per state in the context space. Each column
#' records the number of times a given context is followed by the
#' corresponding value in the original series.
#'
#' Other results are obtained only with `type="data.frame"` (or
#' `type="auto"`). In this case the resulting `data.frame` has a `context`
#' column storing the contexts. If `frequency="total"`, an additional column
#' named `freq` gives the number of occurrences of each context in the series
#' used to build the tree. If `frequency="detailed"`, one additional column is
#' added per state in the context space. Each column records the number of
#' times a given context is followed by the corresponding value in the
#' original series.
#' When `type="list"`, similar information can be obtained from the `ctx_node`
#' objects returned by the function as a `contexts` list.
#'
#' @section Positions: A position of a context `ctx` in the time series `x` is an
#' index value `t` such that the context ends with `x[t]`. Thus `x[t+1]` is after
Expand All @@ -83,34 +83,49 @@ frequency_context_extractor <-
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE)
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5)
#' contexts(dts_tree)
#' contexts(dts_tree, frequency = "total")
#' contexts(dts_tree, frequency = "detailed")
#' contexts(dts_tree, type = "data.frame", frequency = "total")
#' contexts(dts_tree, type = "data.frame", frequency = "detailed")
#' @export
contexts.ctx_tree <- function(ct, type = c("auto", "list", "data.frame"),
contexts.ctx_tree <- function(ct, type = c("list", "data.frame"),
reverse = TRUE, frequency = NULL,
positions = FALSE, ...) {
type <- match.arg(type)
if (is.null(frequency) && isFALSE(positions)) {
basic_extractor <- switch(type,
"auto" = path_list_extractor,
"list" = path_list_extractor,
"data.frame" = path_df_extractor
)
contexts_extractor(ct, reverse, basic_extractor, NULL)
if (type == "list") {
new_context_list(contexts_extractor(ct, reverse, node_content_extractor, NULL))
} else {
assertthat::assert_that(type %in% c("auto", "data.frame"))
if (!is.null(frequency)) {
assertthat::assert_that(frequency %in% c("total", "detailed"))
extractor <- frequency_context_extractor
} else if (positions) {
extractor <- frequency_context_extractor
} else {
extractor <- path_df_extractor
}
control <- list(frequency = frequency, positions = positions)
pre_res <- contexts_extractor(ct, reverse, frequency_context_extractor, control)
pre_res <- contexts_extractor(ct, reverse, extractor, control)
if (positions) {
pre_res$positions <- I(pre_res$positions)
}
pre_res
}
}

node_content_extractor <- function(tree, path, ct, vals, control, is_leaf, p_summary) {
if (is_leaf) {
if (is.null(ct[["f_by"]])) {
NULL
} else {
list(new_ctx_node(path, tree, ct, TRUE))
}
} else {
if (nb_sub_tree(ct) < length(vals)) {
list(new_ctx_node(path, tree, ct, TRUE))
} else {
NULL
}
}
}

rec_match_context <- function(tree, d, ctx) {
if (length(ctx) == 0L) {
list(tree = tree, depth = d)
Expand Down
6 changes: 3 additions & 3 deletions R/summary_ctx_tree.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' @exportS3Method
summary.ctx_tree <- function(object, ...) {
ctx <- contexts(object, frequency = "detailed")
all_length <- sapply(ctx$context, length)
ctx <- contexts(object)
all_length <- sapply(ctx, length)
res <- list(
state_space = states(object),
nb = nrow(ctx),
nb = length(ctx),
depth = max(all_length),
avg_depth = mean(all_length)
)
Expand Down
2 changes: 1 addition & 1 deletion R/summary_vlmc.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @exportS3Method
summary.vlmc <- function(object, ...) {
ctx <- contexts(object, frequency = "detailed")
ctx <- contexts(object, type = "data.frame", frequency = "detailed")
all_length <- sapply(ctx$context, length)
res <- list(
state_space = states(object),
Expand Down
6 changes: 3 additions & 3 deletions R/vlmc_contexts.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ vlmc_parent_summary <- function(ctx) {
}

vlmc_context_extractor <-
function(path, ct, vals, control, is_leaf, p_summary) {
res <- frequency_context_extractor(path, ct, vals, control, is_leaf, p_summary)
function(tree, path, ct, vals, control, is_leaf, p_summary) {
res <- frequency_context_extractor(tree, path, ct, vals, control, is_leaf, p_summary)
if (is.null(res)) {
NULL
} else {
Expand All @@ -18,7 +18,7 @@ vlmc_context_extractor <-
l_cont <- control
l_cont$frequency <- "detailed"
l_cont$counts <- "local"
lres <- frequency_context_extractor(path, ct, vals, l_cont, is_leaf, p_summary)
lres <- frequency_context_extractor(tree, path, ct, vals, l_cont, is_leaf, p_summary)
} else {
lres <- res
}
Expand Down
4 changes: 2 additions & 2 deletions R/vlmc_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ generate_fake_data <- function(freq, counts, probs, vals) {
#'
#' @exportS3Method
metrics.vlmc <- function(model, ...) {
all_ctx <- contexts(model, frequency = "detailed", counts = "local")
all_ctx_global <- contexts(model, frequency = "detailed")
all_ctx <- contexts(model, type = "data.frame", frequency = "detailed", counts = "local")
all_ctx_global <- contexts(model, type = "data.frame", frequency = "detailed")
probs <- sweep(as.matrix(all_ctx_global[, -(1:2)]), 1, all_ctx_global$freq, "/")
counts <- as.matrix(all_ctx[, -(1:2)])
accounted_for <- sum(all_ctx$freq)
Expand Down
17 changes: 10 additions & 7 deletions man/contexts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fc76b58

Please sign in to comment.