-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: fixed empty query on atomic roots
- Previously only object and array roots were supported. Ref: #160
- Loading branch information
Showing
14 changed files
with
208 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
"memmap", | ||
"memmem", | ||
"Mmap", | ||
"mmaps", | ||
"movemask", | ||
"ndash", | ||
"nondescendant", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
//! Special case handlers for the empty query $. | ||
//! | ||
//! The main engine is built with a path automaton in mind, and for simplicity | ||
//! we assume the root opening was already read. This makes it incompatible with | ||
//! an empty query. Instead of rewriting the engine we provide fast-path implementations | ||
//! here. | ||
use crate::{ | ||
engine::{error::EngineError, Input}, | ||
is_json_whitespace, | ||
result::{empty::EmptyRecorder, Match, MatchCount, MatchIndex, MatchSpan, Sink}, | ||
FallibleIterator, BLOCK_SIZE, | ||
}; | ||
|
||
/// Count for an empty query – determine if the root exists. | ||
pub(super) fn count<I>(input: &I) -> Result<MatchCount, EngineError> | ||
where | ||
I: Input, | ||
{ | ||
// Assuming a correct JSON, there is either one root if any non-whitespace character | ||
// occurs in the document, or the document is empty. | ||
if input.seek_non_whitespace_forward(0)?.is_some() { | ||
Ok(1) | ||
} else { | ||
Ok(0) | ||
} | ||
} | ||
|
||
/// Index for an empty query – determine the first index of the root. | ||
pub(super) fn index<I, S>(input: &I, sink: &mut S) -> Result<(), EngineError> | ||
where | ||
I: Input, | ||
S: Sink<MatchIndex>, | ||
{ | ||
// Assuming a correct JSON, the root starts at the first non-whitespace character, if any. | ||
if let Some((first_idx, _)) = input.seek_non_whitespace_forward(0)? { | ||
sink.add_match(first_idx) | ||
.map_err(|err| EngineError::SinkError(Box::new(err)))?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
/// Approximate span for an empty query – determine the first index and the length of the root. | ||
pub(super) fn approx_span<I, S>(input: &I, sink: &mut S) -> Result<(), EngineError> | ||
where | ||
I: Input, | ||
S: Sink<MatchSpan>, | ||
{ | ||
// The root spans the entire document, by definition, with the exception of whitespace. | ||
// We need to find the start index exactly, and then can return the length of the rest as the approximate | ||
// length of the root. | ||
// | ||
// Some input know their lengths: bytes already in memory, file mmaps, etc. | ||
// A BufferedInput over an arbitrary Read stream cannot know its length, so we actually | ||
// need to iterate until the end and count the bytes. | ||
if let Some((first_idx, _)) = input.seek_non_whitespace_forward(0)? { | ||
let end_idx = match input.len_hint() { | ||
Some(end_idx) => end_idx, // Known length, just take it. | ||
None => { | ||
// Unknown length, iterate and count. | ||
let mut iter = input.iter_blocks::<_, BLOCK_SIZE>(&EmptyRecorder); | ||
let mut end_idx = 0; | ||
|
||
while (iter.next()?).is_some() { | ||
end_idx += BLOCK_SIZE; | ||
} | ||
|
||
end_idx | ||
} | ||
}; | ||
|
||
sink.add_match(MatchSpan::from_indices(first_idx, end_idx)) | ||
.map_err(|err| EngineError::SinkError(Box::new(err)))?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
/// Match for an empty query – copy the entire document, trimming whitespace. | ||
pub(super) fn match_<I, S>(input: &I, sink: &mut S) -> Result<(), EngineError> | ||
where | ||
I: Input, | ||
S: Sink<Match>, | ||
{ | ||
// For a full match we need to copy the entire input starting from first non-whitespace, | ||
// and then trim the whitespace from the end. This might be slow if the document is excessively | ||
// padded with whitespace at start and/or end, but that's a pathological case. | ||
let mut iter = input.iter_blocks::<_, BLOCK_SIZE>(&EmptyRecorder); | ||
let mut res: Vec<u8> = vec![]; | ||
let mut first_significant_idx = None; | ||
|
||
while let Some(block) = iter.next()? { | ||
if first_significant_idx.is_none() { | ||
// Start of the root not found yet, look for it. | ||
first_significant_idx = block.iter().position(|&x| !is_json_whitespace(x)); | ||
|
||
if let Some(first_idx) = first_significant_idx { | ||
// Start of the root found in this block, copy the relevant part. | ||
res.extend(&block[first_idx..]); | ||
} | ||
} else { | ||
// Start of the root was already found, now we are copying everything. | ||
res.extend(&*block); | ||
} | ||
} | ||
|
||
if let Some(start) = first_significant_idx { | ||
// Trim whitespace if we have a result. | ||
while !res.is_empty() && is_json_whitespace(res[res.len() - 1]) { | ||
res.pop(); | ||
} | ||
|
||
sink.add_match(Match::from_start_and_bytes(start, res)) | ||
.map_err(|err| EngineError::SinkError(Box::new(err)))?; | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.