From 5c6f72e90435f8f766016e3cec0b7cb82a3bbf0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20Bl=C3=A4tte?= Date: Thu, 29 Feb 2024 21:57:08 +0100 Subject: [PATCH] s_attributes() for subcorpus_bundle for siblings, ancestors, descendents #283 --- NEWS.md | 2 ++ R/corpus.R | 5 +++- R/s_attributes.R | 61 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2b24c91c..d1618bd5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -12,6 +12,8 @@ available for s_attribute defined by `mw`. * `decode()` to `AnnotatedPlainTextDocument` failed if a document contains only one token. Fixed #285. * `decode()` failed if nested s-attribute does not occur. Fixed #284. +* `s_attributes()` for bundle works if s-attribute is sibling, ancestor and +descendent #283. # polmineR v0.8.9 diff --git a/R/corpus.R b/R/corpus.R index f550c1ad..139b4085 100644 --- a/R/corpus.R +++ b/R/corpus.R @@ -162,7 +162,10 @@ setMethod("get_corpus", "kwic", function(x) x@corpus) #' @exportMethod get_corpus #' @rdname bundle -setMethod("get_corpus", "bundle", function(x) unique(sapply(x@objects, get_corpus))) +setMethod( + "get_corpus", "bundle", + function(x) unique(sapply(x@objects, get_corpus)) +) #' @rdname corpus-class diff --git a/R/s_attributes.R b/R/s_attributes.R index 790d9241..1d6bb9bd 100644 --- a/R/s_attributes.R +++ b/R/s_attributes.R @@ -215,7 +215,12 @@ setMethod( } else { FALSE } - s_attr_strucs <- if (length(.Object@s_attribute_strucs) > 0L) if (.Object@s_attribute_strucs == s_attribute) TRUE else FALSE else FALSE + s_attr_strucs <- if (length(.Object@s_attribute_strucs) > 0L){ + if (.Object@s_attribute_strucs == s_attribute) TRUE else FALSE + } else { + FALSE + } + if (xml_is_flat && s_attr_strucs){ len1 <- cl_attribute_size( corpus = .Object@corpus, registry = .Object@registry_dir, @@ -336,7 +341,9 @@ setMethod("s_attributes", "context", function (.Object, s_attribute = NULL){ #' @docType methods #' @rdname s_attributes-method -setMethod("s_attributes", "partition_bundle", function(.Object, s_attribute, unique = TRUE, ...){ +setMethod( + "s_attributes", "partition_bundle", + function(.Object, s_attribute, unique = TRUE, ...){ if ("sAttribute" %in% names(list(...))){ lifecycle::deprecate_warn( @@ -356,12 +363,56 @@ setMethod("s_attributes", "partition_bundle", function(.Object, s_attribute, uni ), recursive = FALSE ) - values <- cl_struc2str( + s_attr_strucs <- unique(unlist(lapply(.Object, slot, "s_attribute_strucs"))) + relationship <- s_attr_relationship( + x = s_attr_strucs, + y = s_attribute, corpus = .Object@corpus, - s_attribute = s_attribute, - struc = unlist(strucs, recursive = TRUE), registry = .Object@registry_dir ) + + if (relationship == 0L){ + values <- cl_struc2str( + corpus = .Object@corpus, + s_attribute = s_attribute, + struc = unlist(strucs, recursive = TRUE), + registry = .Object@registry_dir + ) + } else if (relationship == -1L){ + strucs <- cl_cpos2struc( + corpus = .Object@corpus, + cpos = do.call( + c, + unname(lapply(.Object@objects, function(x) x@cpos[,1])) + ), + s_attribute = s_attribute, + registry = .Object@registry_dir + ) + values <- cl_struc2str( + corpus = .Object@corpus, + s_attribute = s_attribute, + struc = unlist(strucs, recursive = TRUE), + registry = .Object@registry_dir + ) + } else if (relationship == 1L){ + region_matrix <- do.call(rbind, lapply(.Object@objects, slot, "cpos")) + struc_matrix <- RcppCWB::region_matrix_to_struc_matrix( + corpus = .Object@corpus, + s_attribute = s_attribute, + region_matrix = region_matrix, + registry = .Object@registry_dir + ) + strucs <- ranges_to_cpos(struc_matrix) + values <- cl_struc2str( + corpus = .Object@corpus, + s_attribute = s_attribute, + struc = strucs, + registry = .Object@registry_dir + ) + .size <- function(x) sum(x[,2] - x[,1] + 1L) + n <- lapply(lapply(split(x = struc_matrix, f = f), matrix, ncol = 2), .size) + f <- unlist(mapply(rep, x = seq_along(n), times = n), recursive = FALSE) + } Encoding(values) <- .Object@encoding values <- as.nativeEnc(values, from = .Object@encoding) retval <- split(x = values, f = f)