From b4b8e3ae0566a218853b55ceb0a61c7f411a7a90 Mon Sep 17 00:00:00 2001 From: David Zwerdling Date: Sun, 14 May 2023 16:40:12 -0700 Subject: [PATCH] initial main engine nneg array impl --- crates/rsonpath-lib/src/engine.rs | 4 ++ crates/rsonpath-lib/src/engine/main.rs | 66 +++++++++++++++++-- crates/rsonpath-lib/src/engine/recursive.rs | 28 ++++---- crates/rsonpath-lib/src/query/automaton.rs | 57 +++++++++++++++- .../src/query/nonnegative_array_index.rs | 9 ++- .../data/basic/array_root_populated.json | 2 +- .../tests/engine_correctness_tests.rs | 2 +- 7 files changed, 141 insertions(+), 27 deletions(-) diff --git a/crates/rsonpath-lib/src/engine.rs b/crates/rsonpath-lib/src/engine.rs index cd891cfc..95d58b89 100644 --- a/crates/rsonpath-lib/src/engine.rs +++ b/crates/rsonpath-lib/src/engine.rs @@ -17,9 +17,13 @@ pub use main::MainEngine as RsonpathEngine; use self::error::EngineError; use crate::input::Input; use crate::query::automaton::Automaton; +use crate::query::NonNegativeArrayIndex; use crate::query::{error::CompilerError, JsonPathQuery}; use crate::result::QueryResult; +/// A constant index for the common and starting case of the first item. +pub const FIRST_ITEM_INDEX: NonNegativeArrayIndex = NonNegativeArrayIndex::new(0); + /// Trait for an engine that can run its query on a given input. pub trait Engine { /// Compute the [`QueryResult`] on given [`Input`]. diff --git a/crates/rsonpath-lib/src/engine/main.rs b/crates/rsonpath-lib/src/engine/main.rs index 1a9c1101..9c271a31 100644 --- a/crates/rsonpath-lib/src/engine/main.rs +++ b/crates/rsonpath-lib/src/engine/main.rs @@ -9,7 +9,7 @@ //! even on targets that do not support AVX2 SIMD operations. #[cfg(feature = "head-skip")] use super::head_skipping::{CanHeadSkip, HeadSkip}; -use super::Compiler; +use super::{Compiler, FIRST_ITEM_INDEX}; #[cfg(feature = "head-skip")] use crate::classification::ResumeClassifierState; use crate::debug; @@ -20,7 +20,7 @@ use crate::engine::tail_skipping::TailSkip; use crate::engine::{Engine, Input}; use crate::query::automaton::{Automaton, State}; use crate::query::error::CompilerError; -use crate::query::{JsonPathQuery, Label}; +use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex}; use crate::result::QueryResult; use crate::BLOCK_SIZE; use crate::{ @@ -105,6 +105,7 @@ struct Executor<'q, 'b, I: Input> { bytes: &'b I, next_event: Option, is_list: bool, + array_count: NonNegativeArrayIndex, } fn query_executor<'q, 'b, I: Input>( @@ -119,6 +120,7 @@ fn query_executor<'q, 'b, I: Input>( bytes, next_event: None, is_list: false, + array_count: FIRST_ITEM_INDEX, } } @@ -247,14 +249,35 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { S: StructuralIterator<'b, I, Q, BLOCK_SIZE>, R: QueryResult, { + debug!("array_count = {}", self.array_count); self.next_event = classifier.next(); let is_next_opening = self.next_event.map_or(false, |s| s.is_opening()); if !is_next_opening { - let fallback_state = self.automaton[self.state].fallback_state(); - if self.is_list && self.automaton.is_accepting(fallback_state) { + let fallback_accepting = self + .automaton + .is_accepting(self.automaton[self.state].fallback_state()); + + if self.is_list && fallback_accepting { result.report(idx); } + + self.array_count = self.array_count.increment(); + + if let Ok(array_id) = self.array_count.try_into() { + let match_index = self + .automaton + .has_array_index_transition_to_accepting(self.state, &array_id); + + let accepting_list = self.automaton.is_accepting_list_item(self.state); + + let is_accepting_list_item = self.is_list && accepting_list; + + if is_accepting_list_item && match_index { + debug!("Accepting on list item."); + result.report(idx); + } + } } Ok(()) @@ -278,7 +301,24 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { if let Some(colon_idx) = self.find_preceding_colon(idx) { for &(label, target) in self.automaton[self.state].transitions() { match label { - TransitionLabel::ArrayIndex(_) => {} + TransitionLabel::ArrayIndex(_i) => { + // TODO: should this really be a no-op? + // if let Ok(array_id) = self.array_count.try_into() { + // if self.is_list + // && self + // .automaton + // .has_accepting_list_item_at_index(self.state, &array_id) + // { + // any_matched = true; + // if self.automaton.is_accepting(target) { + // debug!("Accept Array Index {i}"); + // debug!("Accept {idx}"); + // result.report(idx); + // } + // break; + // } + // } + } TransitionLabel::ObjectMember(label) => { if self.is_match(colon_idx, label)? { any_matched = true; @@ -316,9 +356,17 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { self.is_list = true; let fallback = self.automaton[self.state].fallback_state(); - if self.automaton.is_accepting(fallback) { - classifier.turn_commas_on(idx); + let is_fallback_accepting = self.automaton.is_accepting(fallback); + let wants_first_item = is_fallback_accepting + || self + .automaton + .has_first_array_index_transition_to_accepting(self.state); + + classifier.turn_commas_on(idx); + + if wants_first_item { self.next_event = classifier.next(); + match self.next_event { Some(Structural::Closing(_, close_idx)) => { if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1) @@ -372,6 +420,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) { self.state = stack_frame.state; self.is_list = stack_frame.is_list; + self.array_count = stack_frame.array_count; if self.automaton.is_unitary(self.state) { let bracket_type = self.current_node_bracket_type(); @@ -420,10 +469,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> { "push {}, goto {target}, is_list = {target_is_list}", self.state ); + self.stack.push(StackFrame { depth: *self.depth, state: self.state, is_list: self.is_list, + array_count: self.array_count, }); self.state = target; } @@ -480,6 +531,7 @@ struct StackFrame { depth: u8, state: State, is_list: bool, + array_count: NonNegativeArrayIndex, } #[derive(Debug)] diff --git a/crates/rsonpath-lib/src/engine/recursive.rs b/crates/rsonpath-lib/src/engine/recursive.rs index 96fcf840..4ebeaeba 100644 --- a/crates/rsonpath-lib/src/engine/recursive.rs +++ b/crates/rsonpath-lib/src/engine/recursive.rs @@ -13,19 +13,17 @@ use crate::debug; use crate::engine::error::EngineError; #[cfg(feature = "tail-skip")] use crate::engine::tail_skipping::TailSkip; +use crate::engine::FIRST_ITEM_INDEX; use crate::engine::{Compiler, Engine}; #[cfg(feature = "head-skip")] use crate::error::InternalRsonpathError; use crate::input::Input; use crate::query::automaton::{Automaton, State, TransitionLabel}; use crate::query::error::CompilerError; -use crate::query::NonNegativeArrayIndex; use crate::query::{JsonPathQuery, Label}; use crate::result::QueryResult; use crate::BLOCK_SIZE; -pub(crate) const FIRST_ITEM_INDEX: NonNegativeArrayIndex = NonNegativeArrayIndex::new(0); - /// Recursive implementation of the JSONPath query engine. pub struct RecursiveEngine<'q> { automaton: Automaton<'q>, @@ -188,7 +186,7 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { let needs_commas = is_list && (is_fallback_accepting || searching_list); let needs_colons = !is_list && self.automaton.has_transition_to_accepting(state); - let mut array_count = 0; + let mut array_count = FIRST_ITEM_INDEX; let config_characters = |classifier: &mut Classifier!(), idx: usize| { if needs_commas { @@ -249,23 +247,21 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { } // Once we are in comma search, we have already considered the option that the first item in the list is a match. Iterate on the remaining items. - array_count += 1; - - // let is_next_closing = next_event.map_or(false, |s| s.is_closing()); + array_count = array_count.increment(); - let match_index = self.automaton[state].transitions().iter().any(|t| match t { - (TransitionLabel::ArrayIndex(i), _) => array_count == i.get_index(), - _ => false, - }); + if let Ok(array_id) = array_count.try_into() { + let match_index = self + .automaton + .has_array_index_transition_to_accepting(state, &array_id); - if is_accepting_list_item && !is_next_opening && match_index { - debug!("Accepting on list item."); - result.report(idx); + if is_accepting_list_item && !is_next_opening && match_index { + debug!("Accepting on list item."); + result.report(idx); + } } } Some(Structural::Colon(idx)) => { debug!("Colon"); - // debug_assert!(!is_accepting_list_item); latest_idx = idx; next_event = classifier.next(); @@ -334,7 +330,7 @@ impl<'q, 'b, I: Input> ExecutionContext<'q, 'b, I> { } } TransitionLabel::ArrayIndex(i) => { - if is_list && (i.get_index() == array_count) { + if is_list && i.eq(&array_count) { matched = Some(target); if self.automaton.is_accepting(target) { debug!("Accept Array Index {i}"); diff --git a/crates/rsonpath-lib/src/query/automaton.rs b/crates/rsonpath-lib/src/query/automaton.rs index a37949f3..892b2681 100644 --- a/crates/rsonpath-lib/src/query/automaton.rs +++ b/crates/rsonpath-lib/src/query/automaton.rs @@ -7,7 +7,7 @@ mod state; pub use state::{State, StateAttributes}; use super::{error::CompilerError, JsonPathQuery, Label, NonNegativeArrayIndex}; -use crate::debug; +use crate::{debug, engine::FIRST_ITEM_INDEX}; use nfa::NondeterministicAutomaton; use smallvec::SmallVec; use std::{fmt::Display, ops::Index}; @@ -258,6 +258,61 @@ impl<'q> Automaton<'q> { }) } + /// Returns whether the given state is accepting the first item in a list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[0]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(automaton.has_first_array_index_transition_to_accepting(state)); + /// ``` + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[1]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// + /// assert!(!automaton.has_first_array_index_transition_to_accepting(state)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_first_array_index_transition_to_accepting(&self, state: State) -> bool { + self.has_array_index_transition_to_accepting(state, &FIRST_ITEM_INDEX) + } + + /// Returns whether the given state is accepting the item at a given index in a list. + /// + /// # Example + /// ```rust + /// # use rsonpath_lib::query::*; + /// # use rsonpath_lib::query::automaton::*; + /// let query = JsonPathQuery::parse("$[1]").unwrap(); + /// let automaton = Automaton::new(&query).unwrap(); + /// let state = automaton.initial_state(); + /// let match_index_1 = NonNegativeArrayIndex::new(1); + /// let match_index_2 = NonNegativeArrayIndex::new(2); + /// + /// assert!(automaton.has_array_index_transition_to_accepting(state, &match_index_1)); + /// assert!(!automaton.has_array_index_transition_to_accepting(state, &match_index_2)); + /// ``` + #[must_use] + #[inline(always)] + pub fn has_array_index_transition_to_accepting( + &self, + state: State, + match_index: &NonNegativeArrayIndex, + ) -> bool { + self[state].transitions().iter().any(|t| match t { + (TransitionLabel::ArrayIndex(i), s) => i.eq(match_index) && self.is_accepting(*s), + _ => false, + }) + } + /// Returns whether the given state has any transitions /// (labelled or fallback) to an accepting state. /// diff --git a/crates/rsonpath-lib/src/query/nonnegative_array_index.rs b/crates/rsonpath-lib/src/query/nonnegative_array_index.rs index 913b00c9..e8b13c17 100644 --- a/crates/rsonpath-lib/src/query/nonnegative_array_index.rs +++ b/crates/rsonpath-lib/src/query/nonnegative_array_index.rs @@ -14,7 +14,7 @@ use std::fmt::{self, Display, Formatter}; /// /// assert_eq!(idx.get_index(), 2); /// ``` -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug, PartialOrd, Ord)] pub struct NonNegativeArrayIndex(u64); /// The upper inclusive bound on index values. @@ -41,6 +41,13 @@ impl NonNegativeArrayIndex { Self(index) } + /// Create a new search index from a u64. + #[must_use] + #[inline] + pub fn increment(&self) -> Self { + NonNegativeArrayIndex::new(&self.0 + 1) + } + /// Return the index stored. #[must_use] #[inline] diff --git a/crates/rsonpath-lib/tests/data/basic/array_root_populated.json b/crates/rsonpath-lib/tests/data/basic/array_root_populated.json index 6ed63af5..3169929f 100644 --- a/crates/rsonpath-lib/tests/data/basic/array_root_populated.json +++ b/crates/rsonpath-lib/tests/data/basic/array_root_populated.json @@ -1 +1 @@ -[1,2] +[1,2] \ No newline at end of file diff --git a/crates/rsonpath-lib/tests/engine_correctness_tests.rs b/crates/rsonpath-lib/tests/engine_correctness_tests.rs index b0b54368..94c29d4f 100644 --- a/crates/rsonpath-lib/tests/engine_correctness_tests.rs +++ b/crates/rsonpath-lib/tests/engine_correctness_tests.rs @@ -23,7 +23,7 @@ macro_rules! count_test_cases { #[test_case("basic/atomic_descendant.json", "$..*" => 4; "atomic_descendant.json any descendant $..*")] #[test_case("basic/atomic_descendant.json", "$.b[0]" => 1; "atomic_descendant.json nneg array index")] #[test_case("basic/atomic_descendant.json", "$.b[1]" => 0; "atomic_descendant.json nonexistent nneg array index")] - #[test_case("basic/atomic_descendant.json", "$..[0]" => 1; "atomic_descendant.json descendant array index")] + #[test_case("basic/atomic_descendant.json", "$..[0]" => 1; "atomic_descendant.json descendant nneg array index")] #[test_case("basic/atomic_descendant.json", "$.b[0].b" => 1; "atomic_descendant.json nested nneg array index")] #[test_case("basic/atomic_after_complex.json", "$.a..b" => 1; "atomic_after_complex.json $.a..b")] #[test_case("basic/atomic_after_complex.json", "$.a[0]" => 1; "atomic_after_complex.json nneg array index")]