Skip to content

Commit

Permalink
Auto merge of #98943 - WilliamVenner:feat/bufread_skip_until, r=dtolnay
Browse files Browse the repository at this point in the history
Add `BufRead::skip_until`

Alternative version of `BufRead::read_until` that simply discards data, rather than copying it into a buffer.

Useful for situations like skipping irrelevant data in a binary file format that is NUL-terminated.

<details>
<summary>Benchmark</summary>

```
running 2 tests
test bench_read_until ... bench:         123 ns/iter (+/- 6)
test bench_skip_until ... bench:          66 ns/iter (+/- 3)
```

```rs
#![feature(test)]
extern crate test;
use test::Bencher;

use std::io::{ErrorKind, BufRead};

fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize, std::io::Error> {
    let mut read = 0;
    loop {
        let (done, used) = {
            let available = match r.fill_buf() {
                Ok(n) => n,
                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
                Err(e) => return Err(e),
            };
            match memchr::memchr(delim, available) {
                Some(i) => (true, i + 1),
                None => (false, available.len()),
            }
        };
        r.consume(used);
        read += used;
        if done || used == 0 {
            return Ok(read);
        }
    }
}

const STR: &[u8] = b"Ferris\0Hello, world!\0";

#[bench]
fn bench_skip_until(b: &mut Bencher) {
    b.iter(|| {
        let mut io = std::io::Cursor::new(test::black_box(STR));
        skip_until(&mut io, b'\0').unwrap();
        let mut hello = Vec::with_capacity(b"Hello, world!\0".len());
        let num_bytes = io.read_until(b'\0', &mut hello).unwrap();
        assert_eq!(num_bytes, b"Hello, world!\0".len());
        assert_eq!(hello, b"Hello, world!\0");
    });
}

#[bench]
fn bench_read_until(b: &mut Bencher) {
    b.iter(|| {
        let mut io = std::io::Cursor::new(test::black_box(STR));
        io.read_until(b'\0', &mut Vec::new()).unwrap();
        let mut hello = Vec::with_capacity(b"Hello, world!\0".len());
        let num_bytes = io.read_until(b'\0', &mut hello).unwrap();
        assert_eq!(num_bytes, b"Hello, world!\0".len());
        assert_eq!(hello, b"Hello, world!\0");
    });
}
```
</details>
  • Loading branch information
bors committed Nov 23, 2023
2 parents a1a3773 + 7c1ab71 commit e68f935
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 0 deletions.
84 changes: 84 additions & 0 deletions library/std/src/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2044,6 +2044,28 @@ fn read_until<R: BufRead + ?Sized>(r: &mut R, delim: u8, buf: &mut Vec<u8>) -> R
}
}

fn skip_until<R: BufRead + ?Sized>(r: &mut R, delim: u8) -> Result<usize> {
let mut read = 0;
loop {
let (done, used) = {
let available = match r.fill_buf() {
Ok(n) => n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
};
match memchr::memchr(delim, available) {
Some(i) => (true, i + 1),
None => (false, available.len()),
}
};
r.consume(used);
read += used;
if done || used == 0 {
return Ok(read);
}
}
}

/// A `BufRead` is a type of `Read`er which has an internal buffer, allowing it
/// to perform extra ways of reading.
///
Expand Down Expand Up @@ -2247,6 +2269,68 @@ pub trait BufRead: Read {
read_until(self, byte, buf)
}

/// Skip all bytes until the delimiter `byte` or EOF is reached.
///
/// This function will read (and discard) bytes from the underlying stream until the
/// delimiter or EOF is found.
///
/// If successful, this function will return the total number of bytes read,
/// including the delimiter byte.
///
/// This is useful for efficiently skipping data such as NUL-terminated strings
/// in binary file formats without buffering.
///
/// This function is blocking and should be used carefully: it is possible for
/// an attacker to continuously send bytes without ever sending the delimiter
/// or EOF.
///
/// # Errors
///
/// This function will ignore all instances of [`ErrorKind::Interrupted`] and
/// will otherwise return any errors returned by [`fill_buf`].
///
/// If an I/O error is encountered then all bytes read so far will be
/// present in `buf` and its length will have been adjusted appropriately.
///
/// [`fill_buf`]: BufRead::fill_buf
///
/// # Examples
///
/// [`std::io::Cursor`][`Cursor`] is a type that implements `BufRead`. In
/// this example, we use [`Cursor`] to read some NUL-terminated information
/// about Ferris from a binary string, skipping the fun fact:
///
/// ```
/// #![feature(bufread_skip_until)]
///
/// use std::io::{self, BufRead};
///
/// let mut cursor = io::Cursor::new(b"Ferris\0Likes long walks on the beach\0Crustacean\0");
///
/// // read name
/// let mut name = Vec::new();
/// let num_bytes = cursor.read_until(b'\0', &mut name)
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 7);
/// assert_eq!(name, b"Ferris\0");
///
/// // skip fun fact
/// let num_bytes = cursor.skip_until(b'\0')
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 30);
///
/// // read animal type
/// let mut animal = Vec::new();
/// let num_bytes = cursor.read_until(b'\0', &mut animal)
/// .expect("reading from cursor won't fail");
/// assert_eq!(num_bytes, 11);
/// assert_eq!(animal, b"Crustacean\0");
/// ```
#[unstable(feature = "bufread_skip_until", issue = "111735")]
fn skip_until(&mut self, byte: u8) -> Result<usize> {
skip_until(self, byte)
}

/// Read all bytes until a newline (the `0xA` byte) is reached, and append
/// them to the provided `String` buffer.
///
Expand Down
30 changes: 30 additions & 0 deletions library/std/src/io/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,36 @@ fn read_until() {
assert_eq!(v, []);
}

#[test]
fn skip_until() {
let bytes: &[u8] = b"read\0ignore\0read\0ignore\0read\0ignore\0";
let mut reader = BufReader::new(bytes);

// read from the bytes, alternating between
// consuming `read\0`s and skipping `ignore\0`s
loop {
// consume `read\0`
let mut out = Vec::new();
let read = reader.read_until(0, &mut out).unwrap();
if read == 0 {
// eof
break;
} else {
assert_eq!(out, b"read\0");
assert_eq!(read, b"read\0".len());
}

// skip past `ignore\0`
let skipped = reader.skip_until(0).unwrap();
assert_eq!(skipped, b"ignore\0".len());
}

// ensure we are at the end of the byte slice and that we can skip no further
// also ensure skip_until matches the behavior of read_until at EOF
let skipped = reader.skip_until(0).unwrap();
assert_eq!(skipped, 0);
}

#[test]
fn split() {
let buf = Cursor::new(&b"12"[..]);
Expand Down

0 comments on commit e68f935

Please sign in to comment.