Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EncoderStringWriter #142

Merged
merged 5 commits into from
Sep 28, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion RELEASE-NOTES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Next

- Config methods are const
- Added `EncoderStringWriter` to allow encoding directly to a String
- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work)
- As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`

# 0.12.2

Add `BinHex` alphabet
- Add `BinHex` alphabet

# 0.12.1

Expand Down
97 changes: 60 additions & 37 deletions src/write/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
/// use std::io::Write;
///
/// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
/// let mut wrapped_writer = Vec::new();
/// {
/// let mut enc = base64::write::EncoderWriter::new(
/// &mut wrapped_writer, base64::STANDARD);
/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD);
///
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors
/// enc.finish().unwrap();
/// // handle errors as you normally would
/// enc.write_all(b"asdf").unwrap();
///
/// }
/// // could leave this out to be called by Drop, if you don't care
/// // about handling errors or getting the delegate writer back
/// let delegate = enc.finish().unwrap();
///
/// // base64 was written to the writer
/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]);
/// assert_eq!(b"YXNkZg==", &delegate[..]);
///
/// ```
///
/// # Panics
///
/// Calling `write()` after `finish()` is invalid and will panic.
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Errors
///
Expand All @@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3;
///
/// It has some minor performance loss compared to encoding slices (a couple percent).
/// It does not do any heap allocation.
pub struct EncoderWriter<'a, W: 'a + Write> {
pub struct EncoderWriter<W: Write> {
config: Config,
/// Where encoded data is written to
w: &'a mut W,
/// Where encoded data is written to. It's an Option as it's None immediately before Drop is
/// called so that finish() can return the underlying writer. None implies that finish() has
/// been called successfully.
delegate: Option<W>,
/// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
/// with the next `write()`, encode it, then proceed with the rest of the input normally.
extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
Expand All @@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> {
output: [u8; BUF_SIZE],
/// How much of `output` is occupied with encoded data that couldn't be written last time
output_occupied_len: usize,
/// True iff padding / partial last chunk has been written.
finished: bool,
/// panic safety: don't write again in destructor if writer panicked while we were writing to it
panicked: bool,
}

impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
impl<W: Write> fmt::Debug for EncoderWriter<W> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
Expand All @@ -89,38 +86,58 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> EncoderWriter<'a, W> {
impl<W: Write> EncoderWriter<W> {
/// Create a new encoder that will write to the provided delegate writer `w`.
pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> {
pub fn new(w: W, config: Config) -> EncoderWriter<W> {
EncoderWriter {
config,
w,
delegate: Some(w),
extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
extra_input_occupied_len: 0,
output: [0u8; BUF_SIZE],
output_occupied_len: 0,
finished: false,
panicked: false,
}
}

/// Encode all remaining buffered data and write it, including any trailing incomplete input
/// triples and associated padding.
///
/// Once this succeeds, no further writes can be performed, as that would produce invalid
/// base64.
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided
/// to its `write` each invocation.
/// This may write to the delegate writer multiple times if the delegate writer does not accept
/// all input provided to its `write` each invocation.
///
/// If you don't care about error handling, it is not necessary to call this function, as the
/// equivalent finalization is done by the Drop impl.
///
/// Returns the writer that this was constructed around.
///
/// # Errors
///
/// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
pub fn finish(&mut self) -> Result<()> {
if self.finished {
return Ok(());
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> Result<W> {
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved
// If we could consume self in finish(), we wouldn't have to worry about this case, but
// finish() is retryable in the face of I/O errors, so we can't consume here.
if self.delegate.is_none() {
panic!("Encoder has already had finish() called")
};

self.write_final_leftovers()?;

let writer = self.delegate.take().expect("Writer must be present");

Ok(writer)
}

/// Write any remaining buffered data to the delegate writer.
fn write_final_leftovers(&mut self) -> Result<()> {
if self.delegate.is_none() {
// finish() has already successfully called this, and we are now in drop() with a None
// writer, so just no-op
return Ok(());
}

self.write_all_encoded_output()?;

if self.extra_input_occupied_len > 0 {
Expand All @@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
self.extra_input_occupied_len = 0;
}

self.finished = true;
Ok(())
}

Expand All @@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
/// that no write took place.
fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
self.panicked = true;
let res = self.w.write(&self.output[..current_output_len]);
let res = self
.delegate
.as_mut()
.expect("Writer must be present")
.write(&self.output[..current_output_len]);
self.panicked = false;

res.map(|consumed| {
Expand Down Expand Up @@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> {
}
}

impl<'a, W: Write> Write for EncoderWriter<'a, W> {
impl<W: Write> Write for EncoderWriter<W> {
/// Encode input and then write to the delegate writer.
///
/// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
Expand All @@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
///
/// Any errors emitted by the delegate writer are returned.
fn write(&mut self, input: &[u8]) -> Result<usize> {
if self.finished {
if self.delegate.is_none() {
panic!("Cannot write more after calling finish()");
}

Expand Down Expand Up @@ -341,15 +361,18 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> {
/// incomplete chunks of input or write padding.
fn flush(&mut self) -> Result<()> {
self.write_all_encoded_output()?;
self.w.flush()
self.delegate
.as_mut()
.expect("Writer must be present")
.flush()
}
}

impl<'a, W: Write> Drop for EncoderWriter<'a, W> {
impl<W: Write> Drop for EncoderWriter<W> {
fn drop(&mut self) {
if !self.panicked {
// like `BufWriter`, ignore errors during drop
let _ = self.finish();
let _ = self.write_final_leftovers();
}
}
}
107 changes: 107 additions & 0 deletions src/write/encoder_string_writer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use crate::Config;
use std::io;
use std::io::Write;
use super::encoder::EncoderWriter;

/// A `Write` implementation that base64-encodes data using the provided config and accumulates the
/// resulting base64 in memory, which is then exposed as a String via `finish()`.
///
/// # Examples
///
/// ```
/// use std::io::Write;
///
/// let mut enc = base64::write::EncoderStringWriter::new(base64::STANDARD);
///
/// enc.write_all(b"asdf").unwrap();
///
/// // get the resulting String
/// let b64_string = enc.finish().unwrap();
///
/// assert_eq!("YXNkZg==", &b64_string);
/// ```
///
/// # Panics
///
/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
/// error is invalid and will panic.
///
/// # Performance
///
/// B64-encoded data is buffered in the heap since the point is to collect it in a String.
pub struct EncoderStringWriter {
encoder: EncoderWriter<Vec<u8>>,
}
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved

impl EncoderStringWriter {
/// Create a new EncoderStringWriter that will encode with the provided config.
pub fn new(config: Config) -> EncoderStringWriter {
EncoderStringWriter { encoder: EncoderWriter::new(Vec::new(), config) }
}

/// Encode all remaining buffered data, including any trailing incomplete input triples and
/// associated padding.
///
/// Once this succeeds, no further writes or calls to this method are allowed.
///
/// Returns the base64-encoded form of the accumulated written data.
///
/// # Errors
///
/// The first error that is not of `ErrorKind::Interrupted` will be returned.
pub fn finish(&mut self) -> io::Result<String> {
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved
let buf = self.encoder.finish()?;

let str = String::from_utf8(buf).expect("Base64 should always be valid UTF-8");
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved
Ok(str)
}
}

impl<'a> Write for EncoderStringWriter {
marshallpierce marked this conversation as resolved.
Show resolved Hide resolved
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.encoder.write(buf)
}

fn flush(&mut self) -> io::Result<()> {
self.encoder.flush()
}
}

#[cfg(test)]
mod tests {
use crate::encode_config_buf;
use crate::tests::random_config;
use rand::Rng;
use std::io::Write;
use crate::write::encoder_string_writer::EncoderStringWriter;

#[test]
fn every_possible_split_of_input() {
let mut rng = rand::thread_rng();
let mut orig_data = Vec::<u8>::new();
let mut normal_encoded = String::new();

let size = 5_000;

for i in 0..size {
orig_data.clear();
normal_encoded.clear();

for _ in 0..size {
orig_data.push(rng.gen());
}

let config = random_config(&mut rng);
encode_config_buf(&orig_data, config, &mut normal_encoded);

let mut stream_encoder = EncoderStringWriter::new(config);
// Write the first i bytes, then the rest
stream_encoder.write_all(&orig_data[0..i]).unwrap();
stream_encoder.write_all(&orig_data[i..]).unwrap();

let stream_encoded = stream_encoder.finish().unwrap();

assert_eq!(normal_encoded, stream_encoded);
}
}
}
4 changes: 2 additions & 2 deletions src/write/encoder_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_
}
}

stream_encoder.finish().unwrap();
let _ = stream_encoder.finish().unwrap();

assert_eq!(orig_len, bytes_consumed);
}
Expand Down Expand Up @@ -500,7 +500,7 @@ fn do_encode_random_config_matches_normal_encode(max_input_len: usize) {
bytes_consumed += input_len;
}

stream_encoder.finish().unwrap();
let _ = stream_encoder.finish().unwrap();

assert_eq!(orig_len, bytes_consumed);
}
Expand Down
2 changes: 2 additions & 0 deletions src/write/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//! Implementations of `io::Write` to transparently handle base64.
mod encoder;
mod encoder_string_writer;
pub use self::encoder::EncoderWriter;
pub use self::encoder_string_writer::EncoderStringWriter;

#[cfg(test)]
mod encoder_tests;