-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add context tree node manipulation (issue #50).
- Loading branch information
1 parent
698d130
commit 273d5f8
Showing
16 changed files
with
783 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
new_ctx_node <- function(ctx, tree, node) { | ||
structure( | ||
list( | ||
sequence = ctx, node = node, tree = tree, | ||
is_context = count_local_context(node) > 0 | ||
), | ||
class = "ctx_node" | ||
) | ||
} | ||
|
||
#' @export | ||
print.ctx_node <- function(x, ...) { | ||
if (x$is_context) { | ||
cat("Context: ") | ||
} else { | ||
cat("Sequence: ") | ||
} | ||
cat(paste(x$sequence, collapse = ", "), "\n") | ||
cat(" followed by ", paste(paste(x$tree$vals, x$node$f_by, sep = " ("), collapse = "), "), ")\n", sep = "") | ||
} | ||
|
||
is_ctx_node <- function(node) { | ||
methods::is(node, "ctx_node") | ||
} | ||
|
||
assertthat::on_failure(is_ctx_node) <- function(call, env) { | ||
paste0(deparse(call$node), " is not a ctx_node object") | ||
} | ||
|
||
#' Extract the sequence encoded by a node | ||
#' | ||
#' This function returns the sequence represented by the `node` object. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' @param reverse specifies whether the sequence should be reported in reverse | ||
#' temporal order (`TRUE`, default value) or in the temporal order (`FALSE`). | ||
#' | ||
#' @return the sequence represented by the `node` object, a vector | ||
#' @export | ||
#' | ||
#' @examples | ||
#' dts <- c("A", "B", "C", "A", "A", "B", "B", "C", "C", "A") | ||
#' dts_tree <- ctx_tree(dts, max_depth = 3) | ||
#' res <- find_sequence(dts_tree, "A") | ||
#' as_sequence(res) | ||
as_sequence <- function(node, reverse = TRUE) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
if (reverse) { | ||
node$sequence | ||
} else { | ||
rev(node$sequence) | ||
} | ||
} | ||
|
||
#' Find the node of a sequence in a context tree | ||
#' | ||
#' This function checks whether the sequence `ctx` is represented in the context | ||
#' tree `ct`. If this is the case, it returns a description of matching node, an | ||
#' object of class `ctx_node`. If the sequence is not represented in the tree, | ||
#' the function return `NULL`. | ||
#' | ||
#' The function looks for sequences in general. The [is_context()] function can | ||
#' be used on the resulting object to test if the sequence is in addition a | ||
#' proper context. | ||
#' | ||
#' @param ct a context tree. | ||
#' @param ctx a sequence to search in the context tree | ||
#' @param reverse specifies whether the sequence `ctx` is given the reverse | ||
#' temporal order (`TRUE`, default value) or in the temporal order (`FALSE`). | ||
#' @param ... additional parameters for the find_sequence function | ||
#' @returns an object of class `ctx_node` if the sequence `ctx` is represented | ||
#' in the context tree, `NULL` when this is not the case. | ||
#' @examples | ||
#' dts <- c("A", "B", "C", "A", "A", "B", "B", "C", "C", "A") | ||
#' dts_tree <- ctx_tree(dts, max_depth = 3) | ||
#' find_sequence(dts_tree, "A") | ||
#' ## returns NULL as "A" "C" does not appear in dts | ||
#' find_sequence(dts_tree, c("A", "C"), reverse = FALSE) | ||
#' @export | ||
find_sequence <- function(ct, ctx, reverse = TRUE, ...) { | ||
UseMethod("find_sequence") | ||
} | ||
|
||
#' @export | ||
#' @rdname find_sequence | ||
find_sequence.ctx_tree <- function(ct, ctx, reverse = TRUE, ...) { | ||
if (length(ctx) == 0) { | ||
if (isTRUE(ct$keep_match) && is.null(ct$match)) { | ||
ct$match <- 1:ct$data_size | ||
} | ||
new_ctx_node(ctx, ct, ct) | ||
} else { | ||
assertthat::assert_that((typeof(ctx) == typeof(ct$vals)) && methods::is(ctx, class(ct$vals)), | ||
msg = "ctx is not compatible with the model state space" | ||
) | ||
if (!reverse) { | ||
ctx <- rev(ctx) | ||
} | ||
nx <- to_dts(ctx, ct$vals) | ||
current <- ct | ||
for (k in seq_along(ctx)) { | ||
if (is.null(current$children)) { | ||
return(NULL) | ||
} | ||
candidate <- current$children[[1 + nx$ix[k]]] | ||
if (is.null(candidate) || length(candidate) == 0) { | ||
return(NULL) | ||
} | ||
current <- candidate | ||
} | ||
new_ctx_node(ctx, ct, current) | ||
} | ||
} | ||
|
||
#' Report the nature of a node in a context tree | ||
#' | ||
#' This function returns `TRUE` if the node is a proper context, `FALSE` | ||
#' in the other case. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' @returns `TRUE` if the node `node` is a proper context, | ||
#' `FALSE` when this is not the case | ||
#' @examples | ||
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0) | ||
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3) | ||
#' draw(dts_ctree) | ||
#' ## 0, 0 is a context but 0, 1 is not | ||
#' is_context(find_sequence(dts_ctree, c(0, 0))) | ||
#' is_context(find_sequence(dts_ctree, c(0, 1))) | ||
#' @export | ||
is_context <- function(node) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
node$is_context | ||
} | ||
|
||
#' Report the positions of a sequence associated to a node | ||
#' | ||
#' This function returns the positions of the sequence represented by `node` | ||
#' in the time series used to build the context tree in which the sequence is | ||
#' represented. This is only possible is those positions were saved during the | ||
#' construction of the context tree. In positions were not saved, a call to this | ||
#' function produces an error. | ||
#' | ||
#' A position of a sequence `ctx` in the time series `x` is an index value `t` | ||
#' such that the sequence ends with `x[t]`. Thus `x[t+1]` is after the context. | ||
#' For instance if `x=c(0, 0, 1, 1)` and `ctx=c(0, 1)` (in standard state | ||
#' order), then the position of `ctx` in `x` is 3. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' | ||
#' @returns positions of the sequence represented by `node` is the original | ||
#' time series as a integer vector | ||
#' @export | ||
#' | ||
#' @examples | ||
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE) | ||
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5) | ||
#' subseq <- find_sequence(dts_tree, factor(c("A", "B"), levels = c("A", "B", "C"))) | ||
#' if (!is.null(subseq)) { | ||
#' positions(subseq) | ||
#' } | ||
positions <- function(node) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
if (is.null(node$node[["match"]])) { | ||
stop("Cannot report positions if they were not saved") | ||
} | ||
node$node[["match"]] + length(node$sequence) | ||
} | ||
|
||
#' Report the distribution of values that follow occurrences of a sequence | ||
#' | ||
#' This function reports the number of occurrences of the sequence represented | ||
#' by `node` in the original time series used to build the associated | ||
#' context tree (not including a possible final occurrence not followed by any | ||
#' value at the end of the original time series). In addition if | ||
#' `frequency=="detailed"`, the function reports the frequencies of each of the | ||
#' possible value of the time series when they appear just after the sequence. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' @param frequency specifies the counts to be included in the result. `"total"` | ||
#' gives the number of occurrences of the sequence in the original sequence. | ||
#' `"detailed"` includes in addition the break down of these occurrences into | ||
#' all the possible states. | ||
#' | ||
#' @returns either an integer when `frequency="total"` which gives the total | ||
#' number of occurrences of the sequence represented by `node` or a | ||
#' `data.frame` with a `total` column with the same value and a column for | ||
#' each of the possible value of the original time series, reporting counts in | ||
#' each column (see the description above). | ||
#' @export | ||
#' @seealso [contexts()] and [contexts.ctx_tree()] | ||
#' @examples | ||
#' dts <- sample(as.factor(c("A", "B", "C")), 100, replace = TRUE) | ||
#' dts_tree <- ctx_tree(dts, max_depth = 3, min_size = 5) | ||
#' subseq <- find_sequence(dts_tree, factor(c("A", "A"), levels = c("A", "B", "C"))) | ||
#' if (!is.null(subseq)) { | ||
#' counts(subseq) | ||
#' } | ||
counts <- function(node, frequency = c("detailed", "total")) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
frequency <- match.arg(frequency) | ||
if (frequency == "total") { | ||
sum(node$node[["f_by"]]) | ||
} else { | ||
freq_by_val <- as.list(node$node[["f_by"]]) | ||
names(freq_by_val) <- as.character(node$tree$vals) | ||
freq_by_val <- c(list(total = sum(node$node[["f_by"]])), freq_by_val) | ||
data.frame(freq_by_val, check.names = FALSE) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
#' Find the parent of a node in a context tree | ||
#' | ||
#' This function returns the parent node of the node represented by the | ||
#' `node` parameter. The result is `NULL` if `node` is the root node of | ||
#' its context tree (representing the empty sequence). | ||
#' | ||
#' Each node of a context tree represents a sequence. When [find_sequence()] is | ||
#' called with success, the returned object represents the corresponding node in | ||
#' the context tree. Unless the original sequence is empty, this node has a | ||
#' parent node which is returned as a `ctx_node` object by the present function. | ||
#' Another interpretation is that the function returns the `node` object | ||
#' associated to the sequence obtained by removing the oldest value from the | ||
#' original sequence. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' @returns a `ctx_node` object if `node` does correspond to the empty | ||
#' sequence or `NULL` when this is not the case | ||
#' @examples | ||
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0) | ||
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3) | ||
#' ctx_00 <- find_sequence(dts_ctree, c(0, 0)) | ||
#' ## the parent sequence/node corresponds to the 0 context | ||
#' parent(ctx_00) | ||
#' identical(parent(ctx_00), find_sequence(dts_ctree, c(0))) | ||
#' @export | ||
parent <- function(node) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
if (length(node$sequence) >= 1) { | ||
find_sequence(node$tree, node$sequence[-length(node$sequence)]) | ||
} else { | ||
NULL | ||
} | ||
} | ||
|
||
#' Find the children nodes of a node in a context tree | ||
#' | ||
#' This function returns a list (possibly empty) of `ctx_node` objects. Each | ||
#' object represents one of the children of the node represented by the | ||
#' `node` parameter. | ||
#' | ||
#' Each node of a context tree represents a sequence. When [find_sequence()] is | ||
#' called with success, the returned object represents the corresponding | ||
#' node in the context tree. If this node has no child, the present | ||
#' function returns an empty list. When the node has at least one child, the | ||
#' function returns a list with one value for each element in the state space | ||
#' (see [states()]). The value is `NULL` if the corresponding child is empty, | ||
#' while it is a `ctx_node` object when the child is present. Each `ctx_node` | ||
#' object is associated to the sequence obtained by adding to the past of | ||
#' the sequence represented by `node` an observation of the associated state. | ||
#' | ||
#' @param node a `ctx_node` object as returned by [find_sequence()] | ||
#' @returns a list of `ctx_node` objects, see details. | ||
#' @examples | ||
#' dts <- c(0, 1, 1, 1, 0, 0, 1, 0, 1, 0) | ||
#' dts_ctree <- ctx_tree(dts, min_size = 1, max_depth = 3) | ||
#' ctx_00 <- find_sequence(dts_ctree, c(0, 0)) | ||
#' ## this context can only be extended in the past by 1: | ||
#' children(ctx_00) | ||
#' ctx_01 <- find_sequence(dts_ctree, c(0, 1)) | ||
#' ## this context can be extended by both states | ||
#' children(ctx_01) | ||
#' @export | ||
children <- function(node) { | ||
assertthat::assert_that(is_ctx_node(node)) | ||
if (is.null(node$node[["children"]])) { | ||
list() | ||
} else { | ||
res <- vector(mode = "list", length = length(node$node[["children"]])) | ||
for (k in seq_along(res)) { | ||
if (length(node$node$children[[k]]) > 0) { | ||
the_ctx <- c(node$sequence, node$tree$vals[k]) | ||
res[[k]] <- new_ctx_node(the_ctx, node$tree, node$node$children[[k]]) | ||
} | ||
} | ||
names(res) <- as.character(node$tree$vals) | ||
res | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.