Skip to content

Commit

Permalink
Add context tree node manipulation (issue #50).
Browse files Browse the repository at this point in the history
  • Loading branch information
fabrice-rossi committed Oct 25, 2023
1 parent 698d130 commit 273d5f8
Show file tree
Hide file tree
Showing 16 changed files with 783 additions and 4 deletions.
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ S3method(draw,ctx_tree)
S3method(draw,ctx_tree_cpp)
S3method(draw,vlmc)
S3method(draw,vlmc_cpp)
S3method(find_sequence,ctx_tree)
S3method(glm_coef,constant_model)
S3method(glm_coef,default)
S3method(glm_coef,glm)
Expand Down Expand Up @@ -73,6 +74,7 @@ S3method(predict,vlmc_cpp)
S3method(prepare_covariate,data.frame)
S3method(prepare_covariate,matrix)
S3method(print,covlmc)
S3method(print,ctx_node)
S3method(print,ctx_tree)
S3method(print,ctx_tree_cpp)
S3method(print,dts)
Expand Down Expand Up @@ -108,9 +110,12 @@ S3method(trim,ctx_tree)
S3method(trim,vlmc)
S3method(trim,vlmc_cpp)
export(as_covlmc)
export(as_sequence)
export(as_vlmc)
export(children)
export(context_number)
export(contexts)
export(counts)
export(covariate_depth)
export(covlmc)
export(covlmc_control)
Expand All @@ -119,11 +124,15 @@ export(cutoff)
export(depth)
export(draw)
export(draw_control)
export(find_sequence)
export(is_context)
export(is_covlmc)
export(is_ctx_tree)
export(is_vlmc)
export(loglikelihood)
export(metrics)
export(parent)
export(positions)
export(prune)
export(states)
export(trim)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
Those function are documented in a new vignette (`vignette("prediction")`)
* `contexts()` can now report the positions of each context in the original time
series
* nodes of context trees can be extracted individually as `ctx_node` objects
using the `find_sequence()` function. A collection of new functions can be
used to manipulate the nodes and gain fine grain information on the
corresponding sequences (issue #50).
* the log likelihood calculations performed by `logLik.vlmc()`, `logLik.covlmc()`
`loglikelihood()` and `loglikelihood.covlmc()` have been revised, expanded to
include three possible definitions of the likelihood function, and documented
Expand Down
210 changes: 210 additions & 0 deletions R/ctx_node.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
new_ctx_node <- function(ctx, tree, node) {
structure(
list(
sequence = ctx, node = node, tree = tree,
is_context = count_local_context(node) > 0
),
class = "ctx_node"
)
}

#' @export
print.ctx_node <- function(x, ...) {
if (x$is_context) {
cat("Context: ")
} else {
cat("Sequence: ")
}
cat(paste(x$sequence, collapse = ", "), "\n")
cat(" followed by ", paste(paste(x$tree$vals, x$node$f_by, sep = " ("), collapse = "), "), ")\n", sep = "")
}

is_ctx_node <- function(node) {
methods::is(node, "ctx_node")
}

assertthat::on_failure(is_ctx_node) <- function(call, env) {
paste0(deparse(call$node), " is not a ctx_node object")
}

#' Extract the sequence encoded by a node
#'
#' This function returns the sequence represented by the `node` object.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#' @param reverse specifies whether the sequence should be reported in reverse
#' temporal order (`TRUE`, default value) or in the temporal order (`FALSE`).
#'
#' @return the sequence represented by the `node` object, a vector
#' @export
#'
#' @examples
#' dts <- c("A", "B", "C", "A", "A", "B", "B", "C", "C", "A")
#' dts_tree <- ctx_tree(dts, max_depth = 3)
#' res <- find_sequence(dts_tree, "A")
#' as_sequence(res)
as_sequence <- function(node, reverse = TRUE) {
assertthat::assert_that(is_ctx_node(node))
if (reverse) {
node$sequence
} else {
rev(node$sequence)
}
}

#' Find the node of a sequence in a context tree
#'
#' This function checks whether the sequence `ctx` is represented in the context
#' tree `ct`. If this is the case, it returns a description of matching node, an
#' object of class `ctx_node`. If the sequence is not represented in the tree,
#' the function return `NULL`.
#'
#' The function looks for sequences in general. The [is_context()] function can
#' be used on the resulting object to test if the sequence is in addition a
#' proper context.
#'
#' @param ct a context tree.
#' @param ctx a sequence to search in the context tree
#' @param reverse specifies whether the sequence `ctx` is given the reverse
#' temporal order (`TRUE`, default value) or in the temporal order (`FALSE`).
#' @param ... additional parameters for the find_sequence function
#' @returns an object of class `ctx_node` if the sequence `ctx` is represented
#' in the context tree, `NULL` when this is not the case.
#' @examples
#' dts <- c("A", "B", "C", "A", "A", "B", "B", "C", "C", "A")
#' dts_tree <- ctx_tree(dts, max_depth = 3)
#' find_sequence(dts_tree, "A")
#' ## returns NULL as "A" "C" does not appear in dts
#' find_sequence(dts_tree, c("A", "C"), reverse = FALSE)
#' @export
find_sequence <- function(ct, ctx, reverse = TRUE, ...) {
UseMethod("find_sequence")
}

#' @export
#' @rdname find_sequence
find_sequence.ctx_tree <- function(ct, ctx, reverse = TRUE, ...) {
if (length(ctx) == 0) {
if (isTRUE(ct$keep_match) && is.null(ct$match)) {
ct$match <- 1:ct$data_size
}
new_ctx_node(ctx, ct, ct)
} else {
assertthat::assert_that((typeof(ctx) == typeof(ct$vals)) && methods::is(ctx, class(ct$vals)),
msg = "ctx is not compatible with the model state space"
)
if (!reverse) {
ctx <- rev(ctx)
}
nx <- to_dts(ctx, ct$vals)
current <- ct
for (k in seq_along(ctx)) {
if (is.null(current$children)) {
return(NULL)
}
candidate <- current$children[[1 + nx$ix[k]]]
if (is.null(candidate) || length(candidate) == 0) {
return(NULL)
}
current <- candidate
}
new_ctx_node(ctx, ct, current)
}
}

#' Report the nature of a node in a context tree
#'
#' This function returns `TRUE` if the node is a proper context, `FALSE`
#' in the other case.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#' @returns `TRUE` if the node `node` is a proper context,
#' `FALSE` when this is not the case
#' @examples
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0)
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3)
#' draw(dts_ctree)
#' ## 0, 0 is a context but 0, 1 is not
#' is_context(find_sequence(dts_ctree, c(0, 0)))
#' is_context(find_sequence(dts_ctree, c(0, 1)))
#' @export
is_context <- function(node) {
assertthat::assert_that(is_ctx_node(node))
node$is_context
}

#' Report the positions of a sequence associated to a node
#'
#' This function returns the positions of the sequence represented by `node`
#' in the time series used to build the context tree in which the sequence is
#' represented. This is only possible is those positions were saved during the
#' construction of the context tree. In positions were not saved, a call to this
#' function produces an error.
#'
#' A position of a sequence `ctx` in the time series `x` is an index value `t`
#' such that the sequence ends with `x[t]`. Thus `x[t+1]` is after the context.
#' For instance if `x=c(0, 0, 1, 1)` and `ctx=c(0, 1)` (in standard state
#' order), then the position of `ctx` in `x` is 3.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#'
#' @returns positions of the sequence represented by `node` is the original
#' time series as a integer vector
#' @export
#'
#' @examples
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE)
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5)
#' subseq <- find_sequence(dts_tree, factor(c("A", "B"), levels = c("A", "B", "C")))
#' if (!is.null(subseq)) {
#' positions(subseq)
#' }
positions <- function(node) {
assertthat::assert_that(is_ctx_node(node))
if (is.null(node$node[["match"]])) {
stop("Cannot report positions if they were not saved")
}
node$node[["match"]] + length(node$sequence)
}

#' Report the distribution of values that follow occurrences of a sequence
#'
#' This function reports the number of occurrences of the sequence represented
#' by `node` in the original time series used to build the associated
#' context tree (not including a possible final occurrence not followed by any
#' value at the end of the original time series). In addition if
#' `frequency=="detailed"`, the function reports the frequencies of each of the
#' possible value of the time series when they appear just after the sequence.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#' @param frequency specifies the counts to be included in the result. `"total"`
#' gives the number of occurrences of the sequence in the original sequence.
#' `"detailed"` includes in addition the break down of these occurrences into
#' all the possible states.
#'
#' @returns either an integer when `frequency="total"` which gives the total
#' number of occurrences of the sequence represented by `node` or a
#' `data.frame` with a `total` column with the same value and a column for
#' each of the possible value of the original time series, reporting counts in
#' each column (see the description above).
#' @export
#' @seealso [contexts()] and [contexts.ctx_tree()]
#' @examples
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE)
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5)
#' subseq <- find_sequence(dts_tree, factor(c("A", "A"), levels = c("A", "B", "C")))
#' if (!is.null(subseq)) {
#' counts(subseq)
#' }
counts <- function(node, frequency = c("detailed", "total")) {
assertthat::assert_that(is_ctx_node(node))
frequency <- match.arg(frequency)
if (frequency == "total") {
sum(node$node[["f_by"]])
} else {
freq_by_val <- as.list(node$node[["f_by"]])
names(freq_by_val) <- as.character(node$tree$vals)
freq_by_val <- c(list(total = sum(node$node[["f_by"]])), freq_by_val)
data.frame(freq_by_val, check.names = FALSE)
}
}
78 changes: 78 additions & 0 deletions R/ctx_node_navigation.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' Find the parent of a node in a context tree
#'
#' This function returns the parent node of the node represented by the
#' `node` parameter. The result is `NULL` if `node` is the root node of
#' its context tree (representing the empty sequence).
#'
#' Each node of a context tree represents a sequence. When [find_sequence()] is
#' called with success, the returned object represents the corresponding node in
#' the context tree. Unless the original sequence is empty, this node has a
#' parent node which is returned as a `ctx_node` object by the present function.
#' Another interpretation is that the function returns the `node` object
#' associated to the sequence obtained by removing the oldest value from the
#' original sequence.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#' @returns a `ctx_node` object if `node` does correspond to the empty
#' sequence or `NULL` when this is not the case
#' @examples
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0)
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3)
#' ctx_00 <- find_sequence(dts_ctree, c(0, 0))
#' ## the parent sequence/node corresponds to the 0 context
#' parent(ctx_00)
#' identical(parent(ctx_00), find_sequence(dts_ctree, c(0)))
#' @export
parent <- function(node) {
assertthat::assert_that(is_ctx_node(node))
if (length(node$sequence) >= 1) {
find_sequence(node$tree, node$sequence[-length(node$sequence)])
} else {
NULL
}
}

#' Find the children nodes of a node in a context tree
#'
#' This function returns a list (possibly empty) of `ctx_node` objects. Each
#' object represents one of the children of the node represented by the
#' `node` parameter.
#'
#' Each node of a context tree represents a sequence. When [find_sequence()] is
#' called with success, the returned object represents the corresponding
#' node in the context tree. If this node has no child, the present
#' function returns an empty list. When the node has at least one child, the
#' function returns a list with one value for each element in the state space
#' (see [states()]). The value is `NULL` if the corresponding child is empty,
#' while it is a `ctx_node` object when the child is present. Each `ctx_node`
#' object is associated to the sequence obtained by adding to the past of
#' the sequence represented by `node` an observation of the associated state.
#'
#' @param node a `ctx_node` object as returned by [find_sequence()]
#' @returns a list of `ctx_node` objects, see details.
#' @examples
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0)
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3)
#' ctx_00 <- find_sequence(dts_ctree, c(0, 0))
#' ## this context can only be extended in the past by 1:
#' children(ctx_00)
#' ctx_01 <- find_sequence(dts_ctree, c(0, 1))
#' ## this context can be extended by both states
#' children(ctx_01)
#' @export
children <- function(node) {
assertthat::assert_that(is_ctx_node(node))
if (is.null(node$node[["children"]])) {
list()
} else {
res <- vector(mode = "list", length = length(node$node[["children"]]))
for (k in seq_along(res)) {
if (length(node$node$children[[k]]) > 0) {
the_ctx <- c(node$sequence, node$tree$vals[k])
res[[k]] <- new_ctx_node(the_ctx, node$tree, node$node$children[[k]])
}
}
names(res) <- as.character(node$tree$vals)
res
}
}
10 changes: 10 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ reference:
- draw.ctx_tree
- draw_control
- states
- title: Context tree nodes
desc: Extract nodes from context trees and manipulate them.
- contents:
- find_sequence
- as_sequence
- children
- counts
- is_context
- parent
- positions
- title: Variable Length Markov Chain (VLMC)
desc: Estimate VLMC from a time series and extract information from them.
- contents:
Expand Down
26 changes: 26 additions & 0 deletions man/as_sequence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 273d5f8

Please sign in to comment.