Skip to content

Commit

Permalink
Add Read::initializer.
Browse files Browse the repository at this point in the history
This is an API that allows types to indicate that they can be passed
buffers of uninitialized memory which can improve performance.
  • Loading branch information
sfackler committed Jun 21, 2017
1 parent 4450779 commit ecbb896
Show file tree
Hide file tree
Showing 28 changed files with 222 additions and 269 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# `read_initializer`

The tracking issue for this feature is: [#42788]

[#0]: https://github.com/rust-lang/rust/issues/42788

------------------------
14 changes: 9 additions & 5 deletions src/libstd/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

use fmt;
use ffi::OsString;
use io::{self, SeekFrom, Seek, Read, Write};
use io::{self, SeekFrom, Seek, Read, Initializer, Write};
use path::{Path, PathBuf};
use sys::fs as fs_imp;
use sys_common::{AsInnerMut, FromInner, AsInner, IntoInner};
Expand Down Expand Up @@ -446,8 +446,10 @@ impl Read for File {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf)
}
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
self.inner.read_to_end(buf)

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -468,8 +470,10 @@ impl<'a> Read for &'a File {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf)
}
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
self.inner.read_to_end(buf)

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}
#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
22 changes: 16 additions & 6 deletions src/libstd/io/buffered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use io::prelude::*;
use cmp;
use error;
use fmt;
use io::{self, DEFAULT_BUF_SIZE, Error, ErrorKind, SeekFrom};
use io::{self, Initializer, DEFAULT_BUF_SIZE, Error, ErrorKind, SeekFrom};
use memchr;

/// The `BufReader` struct adds buffering to any reader.
Expand Down Expand Up @@ -92,11 +92,16 @@ impl<R: Read> BufReader<R> {
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
BufReader {
inner: inner,
buf: vec![0; cap].into_boxed_slice(),
pos: 0,
cap: 0,
unsafe {
let mut buffer = Vec::with_capacity(cap);
buffer.set_len(cap);
inner.initializer().initialize(&mut buffer);
BufReader {
inner: inner,
buf: buffer.into_boxed_slice(),
pos: 0,
cap: 0,
}
}
}

Expand Down Expand Up @@ -180,6 +185,11 @@ impl<R: Read> Read for BufReader<R> {
self.consume(nread);
Ok(nread)
}

// we can't skip unconditionally because of the large buffer case in read.
unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
7 changes: 6 additions & 1 deletion src/libstd/io/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use io::prelude::*;

use core::convert::TryInto;
use cmp;
use io::{self, SeekFrom, Error, ErrorKind};
use io::{self, Initializer, SeekFrom, Error, ErrorKind};

/// A `Cursor` wraps another type and provides it with a
/// [`Seek`] implementation.
Expand Down Expand Up @@ -229,6 +229,11 @@ impl<T> Read for Cursor<T> where T: AsRef<[u8]> {
self.pos += n as u64;
Ok(n)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
17 changes: 16 additions & 1 deletion src/libstd/io/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// except according to those terms.

use cmp;
use io::{self, SeekFrom, Read, Write, Seek, BufRead, Error, ErrorKind};
use io::{self, SeekFrom, Read, Initializer, Write, Seek, BufRead, Error, ErrorKind};
use fmt;
use mem;

Expand All @@ -23,6 +23,11 @@ impl<'a, R: Read + ?Sized> Read for &'a mut R {
(**self).read(buf)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
(**self).initializer()
}

#[inline]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
(**self).read_to_end(buf)
Expand Down Expand Up @@ -87,6 +92,11 @@ impl<R: Read + ?Sized> Read for Box<R> {
(**self).read(buf)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
(**self).initializer()
}

#[inline]
fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
(**self).read_to_end(buf)
Expand Down Expand Up @@ -171,6 +181,11 @@ impl<'a> Read for &'a [u8] {
Ok(amt)
}

#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::nop()
}

#[inline]
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
if buf.len() > self.len() {
Expand Down
128 changes: 109 additions & 19 deletions src/libstd/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ use fmt;
use result;
use str;
use memchr;
use ptr;

#[stable(feature = "rust1", since = "1.0.0")]
pub use self::buffered::{BufReader, BufWriter, LineWriter};
Expand All @@ -292,7 +293,7 @@ pub use self::stdio::{stdin, stdout, stderr, Stdin, Stdout, Stderr};
pub use self::stdio::{StdoutLock, StderrLock, StdinLock};
#[unstable(feature = "print_internals", issue = "0")]
pub use self::stdio::{_print, _eprint};
#[unstable(feature = "libstd_io_internals", issue = "0")]
#[unstable(feature = "libstd_io_internals", issue = "42788")]
#[doc(no_inline, hidden)]
pub use self::stdio::{set_panic, set_print};

Expand All @@ -307,6 +308,14 @@ mod stdio;

const DEFAULT_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE;

struct Guard<'a> { buf: &'a mut Vec<u8>, len: usize }

impl<'a> Drop for Guard<'a> {
fn drop(&mut self) {
unsafe { self.buf.set_len(self.len); }
}
}

// A few methods below (read_to_string, read_line) will append data into a
// `String` buffer, but we need to be pretty careful when doing this. The
// implementation will just call `.as_mut_vec()` and then delegate to a
Expand All @@ -328,23 +337,16 @@ const DEFAULT_BUF_SIZE: usize = ::sys_common::io::DEFAULT_BUF_SIZE;
fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
where F: FnOnce(&mut Vec<u8>) -> Result<usize>
{
struct Guard<'a> { s: &'a mut Vec<u8>, len: usize }
impl<'a> Drop for Guard<'a> {
fn drop(&mut self) {
unsafe { self.s.set_len(self.len); }
}
}

unsafe {
let mut g = Guard { len: buf.len(), s: buf.as_mut_vec() };
let ret = f(g.s);
if str::from_utf8(&g.s[g.len..]).is_err() {
let mut g = Guard { len: buf.len(), buf: buf.as_mut_vec() };
let ret = f(g.buf);
if str::from_utf8(&g.buf[g.len..]).is_err() {
ret.and_then(|_| {
Err(Error::new(ErrorKind::InvalidData,
"stream did not contain valid UTF-8"))
})
} else {
g.len = g.s.len();
g.len = g.buf.len();
ret
}
}
Expand All @@ -356,25 +358,32 @@ fn append_to_string<F>(buf: &mut String, f: F) -> Result<usize>
// of data to return. Simply tacking on an extra DEFAULT_BUF_SIZE space every
// time is 4,500 times (!) slower than this if the reader has a very small
// amount of data to return.
//
// Because we're extending the buffer with uninitialized data for trusted
// readers, we need to make sure to truncate that if any of this panics.
fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize> {
let start_len = buf.len();
let mut len = start_len;
let mut g = Guard { len: buf.len(), buf: buf };
let mut new_write_size = 16;
let ret;
loop {
if len == buf.len() {
if g.len == g.buf.len() {
if new_write_size < DEFAULT_BUF_SIZE {
new_write_size *= 2;
}
buf.resize(len + new_write_size, 0);
unsafe {
g.buf.reserve(new_write_size);
g.buf.set_len(g.len + new_write_size);
r.initializer().initialize(&mut g.buf[g.len..]);
}
}

match r.read(&mut buf[len..]) {
match r.read(&mut g.buf[g.len..]) {
Ok(0) => {
ret = Ok(len - start_len);
ret = Ok(g.len - start_len);
break;
}
Ok(n) => len += n,
Ok(n) => g.len += n,
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => {
ret = Err(e);
Expand All @@ -383,7 +392,6 @@ fn read_to_end<R: Read + ?Sized>(r: &mut R, buf: &mut Vec<u8>) -> Result<usize>
}
}

buf.truncate(len);
ret
}

Expand Down Expand Up @@ -494,6 +502,31 @@ pub trait Read {
#[stable(feature = "rust1", since = "1.0.0")]
fn read(&mut self, buf: &mut [u8]) -> Result<usize>;

/// Determines if this `Read`er can work with buffers of uninitialized
/// memory.
///
/// The default implementation returns an initializer which will zero
/// buffers.
///
/// If a `Read`er guarantees that it can work properly with uninitialized
/// memory, it should call `Initializer::nop()`. See the documentation for
/// `Initializer` for details.
///
/// The behavior of this method must be independent of the state of the
/// `Read`er - the method only takes `&self` so that it can be used through
/// trait objects.
///
/// # Unsafety
///
/// This method is unsafe because a `Read`er could otherwise return a
/// non-zeroing `Initializer` from another `Read` type without an `unsafe`
/// block.
#[unstable(feature = "read_initializer", issue = "42788")]
#[inline]
unsafe fn initializer(&self) -> Initializer {
Initializer::zeroing()
}

/// Read all bytes until EOF in this source, placing them into `buf`.
///
/// All bytes read from this source will be appended to the specified buffer
Expand Down Expand Up @@ -829,6 +862,50 @@ pub trait Read {
}
}

/// A type used to conditionally initialize buffers passed to `Read` methods.
#[unstable(feature = "read_initializer", issue = "42788")]
#[derive(Debug)]
pub struct Initializer(bool);

impl Initializer {
/// Returns a new `Initializer` which will zero out buffers.
#[unstable(feature = "read_initializer", issue = "42788")]
#[inline]
pub fn zeroing() -> Initializer {
Initializer(true)
}

/// Returns a new `Initializer` which will not zero out buffers.
///
/// # Unsafety
///
/// This may only be called by `Read`ers which guarantee that they will not
/// read from buffers passed to `Read` methods, and that the return value of
/// the method accurately reflects the number of bytes that have been
/// written to the head of the buffer.
#[unstable(feature = "read_initializer", issue = "42788")]
#[inline]
pub unsafe fn nop() -> Initializer {
Initializer(false)
}

/// Indicates if a buffer should be initialized.
#[unstable(feature = "read_initializer", issue = "42788")]
#[inline]
pub fn should_initialize(&self) -> bool {
self.0
}

/// Initializes a buffer if necessary.
#[unstable(feature = "read_initializer", issue = "42788")]
#[inline]
pub fn initialize(&self, buf: &mut [u8]) {
if self.should_initialize() {
unsafe { ptr::write_bytes(buf.as_mut_ptr(), 0, buf.len()) }
}
}
}

/// A trait for objects which are byte-oriented sinks.
///
/// Implementors of the `Write` trait are sometimes called 'writers'.
Expand Down Expand Up @@ -1608,6 +1685,15 @@ impl<T: Read, U: Read> Read for Chain<T, U> {
}
self.second.read(buf)
}

unsafe fn initializer(&self) -> Initializer {
let initializer = self.first.initializer();
if initializer.should_initialize() {
initializer
} else {
self.second.initializer()
}
}
}

#[stable(feature = "chain_bufread", since = "1.9.0")]
Expand Down Expand Up @@ -1772,6 +1858,10 @@ impl<T: Read> Read for Take<T> {
self.limit -= n as u64;
Ok(n)
}

unsafe fn initializer(&self) -> Initializer {
self.inner.initializer()
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand Down
Loading

0 comments on commit ecbb896

Please sign in to comment.