Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

62 introduce the index selector non negative into the recursive engine #132

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2027e9d
cherry picked from commit ef2f977 to merge non-negative array index
zwerdlds May 12, 2023
41bc7b7
verification fixups
zwerdlds May 12, 2023
0fb8dd7
nneg array indices tests
zwerdlds May 13, 2023
66cd300
initial main engine nneg array impl
zwerdlds May 14, 2023
0dcb9d7
more unit tests, sync with mat
zwerdlds May 15, 2023
1ae7a9f
nneg array index increment check; reset array on new square bracket;…
zwerdlds May 15, 2023
6454662
anydesc nneg array positive test; minor main cleanup
zwerdlds May 16, 2023
a1fede6
nneg array index code complete
zwerdlds May 16, 2023
d6acab8
test more accurate for stack pushing
zwerdlds May 17, 2023
33c48eb
NonNegativeArrayIndex::ZERO const moved to type
zwerdlds May 18, 2023
ff35356
NonNegativeArray::MAX provided public instead of primitive
zwerdlds May 18, 2023
ef0061d
refactored nneg array index max and zero to const wrapped value in No…
zwerdlds May 18, 2023
776336d
TransitionLabel get_label and get_array_index fixed
zwerdlds May 18, 2023
32bdac5
minimzer imports cleaned up
zwerdlds May 18, 2023
b7df9f4
minimzer imports cleaned up
zwerdlds May 18, 2023
3600675
merge cleanup
zwerdlds May 18, 2023
99f5bfe
invalid borrow removed
zwerdlds May 18, 2023
f850253
Improve documentation on TransitionLabel
zwerdlds May 18, 2023
2890daf
remove erroneous borrow (for now)
zwerdlds May 18, 2023
0e23aac
Borrow via from for TransitionLabel
zwerdlds May 19, 2023
93a1496
uses/mods reordered
zwerdlds May 19, 2023
9f01207
list index overflow short circuit order corrected
zwerdlds May 19, 2023
ecb084c
precomputed non-negative array index selector stack vars
zwerdlds May 20, 2023
01e4f71
cleanup for linter and erroneous unused code
zwerdlds May 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use_nix
4 changes: 4 additions & 0 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ test-classifier:
test-engine:
cargo test --test engine_correctness_tests

# Run the query tests on default features.
test-parser:
cargo test --test query_parser_tests

# Run all tests, including real dataset tests, on the feature powerset of the project.
test-full:
-cargo install cargo-hack
Expand Down
1 change: 0 additions & 1 deletion crates/rsonpath-lib/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ pub mod main;
pub mod recursive;
#[cfg(feature = "tail-skip")]
mod tail_skipping;

pub use main::MainEngine as RsonpathEngine;

use self::error::EngineError;
Expand Down
18 changes: 11 additions & 7 deletions crates/rsonpath-lib/src/engine/head_skipping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,17 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> {

if fallback_state == initial_state && transitions.len() == 1 {
let (label, target_state) = transitions[0];
debug!("Automaton starts with a descendant search, using memmem heuristic.");
return Some(Self {
bytes,
state: target_state,
is_accepting: automaton.is_accepting(target_state),
label,
});

if let Some(named_label) = label.get_label() {
debug!("Automaton starts with a descendant search, using memmem heuristic.");

return Some(Self {
bytes,
state: target_state,
is_accepting: automaton.is_accepting(target_state),
label: named_label,
});
}
}

None
Expand Down
175 changes: 137 additions & 38 deletions crates/rsonpath-lib/src/engine/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ use super::head_skipping::{CanHeadSkip, HeadSkip};
use super::Compiler;
#[cfg(feature = "head-skip")]
use crate::classification::ResumeClassifierState;
use crate::classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
};
use crate::debug;
use crate::engine::depth::Depth;
use crate::engine::error::EngineError;
Expand All @@ -24,9 +20,16 @@ use crate::engine::tail_skipping::TailSkip;
use crate::engine::{Engine, Input};
use crate::query::automaton::{Automaton, State};
use crate::query::error::CompilerError;
use crate::query::{JsonPathQuery, Label};
use crate::query::{JsonPathQuery, Label, NonNegativeArrayIndex};
use crate::result::QueryResult;
use crate::BLOCK_SIZE;
use crate::{
classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
},
query::automaton::TransitionLabel,
};
use smallvec::{smallvec, SmallVec};

/// Main engine for a fixed JSONPath query.
Expand Down Expand Up @@ -102,6 +105,9 @@ struct Executor<'q, 'b, I: Input> {
bytes: &'b I,
next_event: Option<Structural>,
is_list: bool,
array_count: NonNegativeArrayIndex,
has_any_array_item_transition: bool,
has_any_array_item_transition_to_accepting: bool,
}

fn query_executor<'q, 'b, I: Input>(
Expand All @@ -116,6 +122,9 @@ fn query_executor<'q, 'b, I: Input>(
bytes,
next_event: None,
is_list: false,
array_count: NonNegativeArrayIndex::ZERO,
has_any_array_item_transition: false,
has_any_array_item_transition_to_accepting: false,
}
}

Expand Down Expand Up @@ -203,10 +212,15 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
let mut any_matched = false;

for &(label, target) in self.automaton[self.state].transitions() {
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
result.report(idx);
any_matched = true;
break;
match label {
TransitionLabel::ArrayIndex(_) => {}
TransitionLabel::ObjectMember(label) => {
if self.automaton.is_accepting(target) && self.is_match(idx, label)? {
result.report(idx);
any_matched = true;
break;
}
}
}
}
let fallback_state = self.automaton[self.state].fallback_state();
Expand Down Expand Up @@ -240,13 +254,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
R: QueryResult,
{
self.next_event = classifier.next();

let is_next_opening = self.next_event.map_or(false, |s| s.is_opening());

if !is_next_opening {
let fallback_state = self.automaton[self.state].fallback_state();
if self.is_list && self.automaton.is_accepting(fallback_state) {
result.report(idx);
}
let is_fallback_accepting = self
.automaton
.is_accepting(self.automaton[self.state].fallback_state());

if !is_next_opening && self.is_list && is_fallback_accepting {
debug!("Accepting on comma.");
result.report(idx);
}

// After wildcard, check for a matching array index.
// If the index increment exceeds the field's limit, give up.
if self.is_list && self.array_count.try_increment().is_err() {
return Ok(());
}
debug!("Incremented array count to {}", self.array_count);

let match_index = self
.automaton
.has_array_index_transition_to_accepting(self.state, &self.array_count);

if !is_next_opening && match_index {
debug!("Accepting on list item.");
result.report(idx);
}

Ok(())
Expand All @@ -267,15 +300,32 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
let mut any_matched = false;

if let Some(colon_idx) = self.find_preceding_colon(idx) {
for &(label, target) in self.automaton[self.state].transitions() {
if self.is_match(colon_idx, label)? {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
result.report(colon_idx);
let colon_idx = self.find_preceding_colon(idx);

for &(label, target) in self.automaton[self.state].transitions() {
match label {
TransitionLabel::ArrayIndex(i) => {
if self.is_list && i.eq(&self.array_count) {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
debug!("Accept {idx}");
result.report(idx);
}
break;
}
}
TransitionLabel::ObjectMember(label) => {
if let Some(colon_idx) = colon_idx {
if self.is_match(colon_idx, label)? {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
result.report(colon_idx);
}
break;
}
}
break;
}
}
}
Expand All @@ -301,29 +351,51 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

if bracket_type == BracketType::Square {
self.is_list = true;
self.has_any_array_item_transition =
self.automaton.has_any_array_item_transition(self.state);
self.has_any_array_item_transition_to_accepting = self
.automaton
.has_any_array_item_transition_to_accepting(self.state);

let fallback = self.automaton[self.state].fallback_state();
if self.automaton.is_accepting(fallback) {
let is_fallback_accepting = self.automaton.is_accepting(fallback);

let searching_list = is_fallback_accepting || self.has_any_array_item_transition;

if searching_list {
classifier.turn_commas_on(idx);
self.next_event = classifier.next();
match self.next_event {
Some(Structural::Closing(_, close_idx)) => {
if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1)
{
if next_idx < close_idx {
result.report(next_idx);
self.array_count = NonNegativeArrayIndex::ZERO;
debug!("Initialized array count to {}", self.array_count);

let wants_first_item = is_fallback_accepting
|| self
.automaton
.has_first_array_index_transition_to_accepting(self.state);

if wants_first_item {
self.next_event = classifier.next();

match self.next_event {
Some(Structural::Closing(_, close_idx)) => {
if let Some((next_idx, _)) =
self.bytes.seek_non_whitespace_forward(idx + 1)
{
if next_idx < close_idx {
result.report(next_idx);
}
}
}
Some(Structural::Comma(_)) => {
result.report(idx + 1);
}
_ => (),
}
Some(Structural::Comma(_)) => {
result.report(idx + 1);
}
_ => (),
}
} else {
classifier.turn_commas_off();
}
} else {
classifier.turn_commas_off();
self.is_list = false;
}

Expand Down Expand Up @@ -359,6 +431,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
self.state = stack_frame.state;
self.is_list = stack_frame.is_list;
self.array_count = stack_frame.array_count;
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
self.has_any_array_item_transition_to_accepting =
stack_frame.has_any_array_item_transition_to_accepting;

debug!("Restored array count to {}", self.array_count);

if self.automaton.is_unitary(self.state) {
let bracket_type = self.current_node_bracket_type();
Expand All @@ -369,6 +447,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
}
}
}

#[cfg(not(feature = "unique-labels"))]
{
self.depth
Expand All @@ -378,13 +457,20 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
if let Some(stack_frame) = self.stack.pop_if_at_or_below(*self.depth) {
self.state = stack_frame.state;
self.is_list = stack_frame.is_list;
self.array_count = stack_frame.array_count;
self.has_any_array_item_transition = stack_frame.has_any_array_item_transition;
self.has_any_array_item_transition_to_accepting =
stack_frame.has_any_array_item_transition_to_accepting;

debug!("Restored array count to {}", self.array_count);
}
}

if self.is_list
&& self
&& (self
.automaton
.is_accepting(self.automaton[self.state].fallback_state())
|| self.has_any_array_item_transition)
{
classifier.turn_commas_on(idx);
} else {
Expand All @@ -402,15 +488,25 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

fn transition_to(&mut self, target: State, opening: BracketType) {
let target_is_list = opening == BracketType::Square;
if target != self.state || target_is_list != self.is_list {

let fallback = self.automaton[self.state].fallback_state();
let is_fallback_accepting = self.automaton.is_accepting(fallback);
let searching_list = is_fallback_accepting || self.has_any_array_item_transition;

if target != self.state || target_is_list != self.is_list || searching_list {
debug!(
"push {}, goto {target}, is_list = {target_is_list}",
self.state
"push {}, goto {target}, is_list = {target_is_list}, array_count: {}",
self.state, self.array_count
);

self.stack.push(StackFrame {
depth: *self.depth,
state: self.state,
is_list: self.is_list,
array_count: self.array_count,
has_any_array_item_transition: self.has_any_array_item_transition,
has_any_array_item_transition_to_accepting: self
.has_any_array_item_transition_to_accepting,
});
self.state = target;
}
Expand Down Expand Up @@ -467,6 +563,9 @@ struct StackFrame {
depth: u8,
state: State,
is_list: bool,
array_count: NonNegativeArrayIndex,
has_any_array_item_transition: bool,
has_any_array_item_transition_to_accepting: bool,
}

#[derive(Debug)]
Expand Down
Loading