Skip to content

Commit

Permalink
Fix return type of XmlSource::read_bytes_until - we should know if by…
Browse files Browse the repository at this point in the history
…te was found or not

Co-authored-by: Daniel Alley <dalley@redhat.com>
  • Loading branch information
Mingun and dralley committed Nov 15, 2023
1 parent f3e8b1f commit 75c9d5e
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 57 deletions.
2 changes: 1 addition & 1 deletion src/reader/async_tokio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

use tokio::io::{self, AsyncBufRead, AsyncBufReadExt};

use crate::errors::{Error, Result, SyntaxError};
use crate::events::Event;
use crate::name::{QName, ResolveResult};
use crate::reader::buffered_reader::impl_buffered_source;
use crate::reader::{
is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader, Span,
};
use crate::{Error, Result};

/// A struct for read XML asynchronously from an [`AsyncBufRead`].
///
Expand Down
8 changes: 2 additions & 6 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ macro_rules! impl_buffered_source {
byte: u8,
buf: &'b mut Vec<u8>,
position: &mut usize,
) -> Result<Option<&'b [u8]>> {
) -> Result<(&'b [u8], bool)> {
// search byte must be within the ascii range
debug_assert!(byte.is_ascii());

Expand Down Expand Up @@ -90,11 +90,7 @@ macro_rules! impl_buffered_source {
}
*position += read;

if read == 0 {
Ok(None)
} else {
Ok(Some(&buf[start..]))
}
Ok((&buf[start..], done))
}

$($async)? fn read_bang_element $(<$lf>)? (
Expand Down
79 changes: 39 additions & 40 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,7 @@ macro_rules! read_until_open {
$(.$await)?
{
// Return Text event with `bytes` content
Ok(Some(bytes)) => $self.state.emit_text(bytes).map(Ok),
Ok(None) => Ok(Ok(Event::Eof)),
Ok((bytes, _found)) => $self.state.emit_text(bytes).map(Ok),
Err(e) => Err(e),
}
}};
Expand Down Expand Up @@ -334,17 +333,17 @@ macro_rules! read_until_close {
.read_bytes_until(b'>', $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => $self.state.emit_end(bytes),
Ok((bytes, true)) => $self.state.emit_end(bytes),
Ok((_, false)) => Err(Error::Syntax(SyntaxError::UnclosedTag)),
Err(e) => Err(e),
},
// `<?` - processing instruction
Ok(Some(b'?')) => match $reader
.read_bytes_until(b'>', $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => $self.state.emit_question_mark(bytes),
Ok((bytes, true)) => $self.state.emit_question_mark(bytes),
Ok((_, false)) => Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl)),
Err(e) => Err(e),
},
// `<...` - opening or self-closed tag
Expand Down Expand Up @@ -741,8 +740,8 @@ trait XmlSource<'r, B> {

/// Read input until `byte` is found or end of input is reached.
///
/// Returns a slice of data read up to `byte`, which does not include into result.
/// If input (`Self`) is exhausted, returns `None`.
/// Returns a slice of data read up to `byte` (exclusive),
/// and a flag noting whether `byte` was found in the input or not.
///
/// # Example
///
Expand All @@ -753,7 +752,7 @@ trait XmlSource<'r, B> {
///
/// assert_eq!(
/// input.read_bytes_until(b'*', (), &mut position).unwrap(),
/// Some(b"abc".as_ref())
/// (b"abc".as_ref(), true)
/// );
/// assert_eq!(position, 4); // position after the symbol matched
/// ```
Expand All @@ -770,7 +769,7 @@ trait XmlSource<'r, B> {
byte: u8,
buf: B,
position: &mut usize,
) -> Result<Option<&'r [u8]>>;
) -> Result<(&'r [u8], bool)>;

/// Read input until comment, CDATA or processing instruction is finished.
///
Expand Down Expand Up @@ -998,13 +997,13 @@ mod test {
let mut input = b"".as_ref();
// ^= 0

let (bytes, found) = $source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
None
(Bytes(bytes), found),
(Bytes(b""), false)
);
assert_eq!(position, 0);
}
Expand All @@ -1018,13 +1017,13 @@ mod test {
let mut input = b"abcdef".as_ref();
// ^= 6

let (bytes, found) = $source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abcdef"))
(Bytes(bytes), found),
(Bytes(b"abcdef"), false)
);
assert_eq!(position, 6);
}
Expand All @@ -1039,13 +1038,13 @@ mod test {
let mut input = b"*abcdef".as_ref();
// ^= 1

let (bytes, found) = $source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b""))
(Bytes(bytes), found),
(Bytes(b""), true)
);
assert_eq!(position, 1); // position after the symbol matched
}
Expand All @@ -1060,13 +1059,13 @@ mod test {
let mut input = b"abc*def".as_ref();
// ^= 4

let (bytes, found) = $source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abc"))
(Bytes(bytes), found),
(Bytes(b"abc"), true)
);
assert_eq!(position, 4); // position after the symbol matched
}
Expand All @@ -1081,13 +1080,13 @@ mod test {
let mut input = b"abcdef*".as_ref();
// ^= 7

let (bytes, found) = $source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abcdef"))
(Bytes(bytes), found),
(Bytes(b"abcdef"), true)
);
assert_eq!(position, 7); // position after the symbol matched
}
Expand Down
13 changes: 5 additions & 8 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,24 +260,21 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
byte: u8,
_buf: (),
position: &mut usize,
) -> Result<Option<&'a [u8]>> {
) -> Result<(&'a [u8], bool)> {
// search byte must be within the ascii range
debug_assert!(byte.is_ascii());
if self.is_empty() {
return Ok(None);
}

Ok(Some(if let Some(i) = memchr::memchr(byte, self) {
if let Some(i) = memchr::memchr(byte, self) {
*position += i + 1;
let bytes = &self[..i];
*self = &self[i + 1..];
bytes
Ok((bytes, true))
} else {
*position += self.len();
let bytes = &self[..];
*self = &[];
bytes
}))
Ok((bytes, false))
}
}

fn read_bang_element(
Expand Down
10 changes: 8 additions & 2 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,14 @@ pub(super) struct ReaderState {
}

impl ReaderState {
/// Trims whitespaces from `bytes`, if required, and returns a [`Text`] event.
/// Trims end whitespaces from `bytes`, if required, and returns a [`Text`]
/// event or an [`Eof`] event, if text after trimming is empty.
///
/// # Parameters
/// - `bytes`: data from the start of stream to the first `<` or from `>` to `<`
///
/// [`Text`]: Event::Text
/// [`Eof`]: Event::Eof
pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Result<Event<'b>> {
let mut content = bytes;

Expand All @@ -67,7 +69,11 @@ impl ReaderState {
content = &bytes[..len];
}

Ok(Event::Text(BytesText::wrap(content, self.decoder())))
if content.is_empty() {
Ok(Event::Eof)
} else {
Ok(Event::Text(BytesText::wrap(content, self.decoder())))
}
}

/// reads `BytesElement` starting with a `!`,
Expand Down

0 comments on commit 75c9d5e

Please sign in to comment.