From 0186035f27766716a5f013415c067a43ec4db748 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Thu, 24 Sep 2020 07:37:56 -0600 Subject: [PATCH 1/5] Add EncoderStringWriter --- RELEASE-NOTES.md | 9 ++- src/write/encoder.rs | 97 ++++++++++++++++---------- src/write/encoder_string_writer.rs | 107 +++++++++++++++++++++++++++++ src/write/encoder_tests.rs | 4 +- src/write/mod.rs | 2 + 5 files changed, 179 insertions(+), 40 deletions(-) create mode 100644 src/write/encoder_string_writer.rs diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 81c2c280..b551c6ef 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,6 +1,13 @@ +# Next + +- Config methods are const +- Added `EncoderStringWriter` to allow encoding directly to a String +- `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work) + - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()` + # 0.12.2 -Add `BinHex` alphabet +- Add `BinHex` alphabet # 0.12.1 diff --git a/src/write/encoder.rs b/src/write/encoder.rs index bece69b3..a7ea4162 100644 --- a/src/write/encoder.rs +++ b/src/write/encoder.rs @@ -25,27 +25,24 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3; /// use std::io::Write; /// /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. -/// let mut wrapped_writer = Vec::new(); -/// { -/// let mut enc = base64::write::EncoderWriter::new( -/// &mut wrapped_writer, base64::STANDARD); +/// let mut enc = base64::write::EncoderWriter::new(Vec::new(), base64::STANDARD); /// -/// // handle errors as you normally would -/// enc.write_all(b"asdf").unwrap(); -/// // could leave this out to be called by Drop, if you don't care -/// // about handling errors -/// enc.finish().unwrap(); +/// // handle errors as you normally would +/// enc.write_all(b"asdf").unwrap(); /// -/// } +/// // could leave this out to be called by Drop, if you don't care +/// // about handling errors or getting the delegate writer back +/// let delegate = enc.finish().unwrap(); /// /// // base64 was written to the writer -/// assert_eq!(b"YXNkZg==", &wrapped_writer[..]); +/// assert_eq!(b"YXNkZg==", &delegate[..]); /// /// ``` /// /// # Panics /// -/// Calling `write()` after `finish()` is invalid and will panic. +/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without +/// error is invalid and will panic. /// /// # Errors /// @@ -56,10 +53,12 @@ const MIN_ENCODE_CHUNK_SIZE: usize = 3; /// /// It has some minor performance loss compared to encoding slices (a couple percent). /// It does not do any heap allocation. -pub struct EncoderWriter<'a, W: 'a + Write> { +pub struct EncoderWriter { config: Config, - /// Where encoded data is written to - w: &'a mut W, + /// Where encoded data is written to. It's an Option as it's None immediately before Drop is + /// called so that finish() can return the underlying writer. None implies that finish() has + /// been called successfully. + delegate: Option, /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk /// with the next `write()`, encode it, then proceed with the rest of the input normally. extra_input: [u8; MIN_ENCODE_CHUNK_SIZE], @@ -70,13 +69,11 @@ pub struct EncoderWriter<'a, W: 'a + Write> { output: [u8; BUF_SIZE], /// How much of `output` is occupied with encoded data that couldn't be written last time output_occupied_len: usize, - /// True iff padding / partial last chunk has been written. - finished: bool, /// panic safety: don't write again in destructor if writer panicked while we were writing to it panicked: bool, } -impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> { +impl fmt::Debug for EncoderWriter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -89,17 +86,16 @@ impl<'a, W: Write> fmt::Debug for EncoderWriter<'a, W> { } } -impl<'a, W: Write> EncoderWriter<'a, W> { +impl EncoderWriter { /// Create a new encoder that will write to the provided delegate writer `w`. - pub fn new(w: &'a mut W, config: Config) -> EncoderWriter<'a, W> { + pub fn new(w: W, config: Config) -> EncoderWriter { EncoderWriter { config, - w, + delegate: Some(w), extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], extra_input_occupied_len: 0, output: [0u8; BUF_SIZE], output_occupied_len: 0, - finished: false, panicked: false, } } @@ -107,20 +103,41 @@ impl<'a, W: Write> EncoderWriter<'a, W> { /// Encode all remaining buffered data and write it, including any trailing incomplete input /// triples and associated padding. /// - /// Once this succeeds, no further writes can be performed, as that would produce invalid - /// base64. + /// Once this succeeds, no further writes or calls to this method are allowed. /// - /// This may write to the delegate writer multiple times if the delegate writer does not accept all input provided - /// to its `write` each invocation. + /// This may write to the delegate writer multiple times if the delegate writer does not accept + /// all input provided to its `write` each invocation. + /// + /// If you don't care about error handling, it is not necessary to call this function, as the + /// equivalent finalization is done by the Drop impl. + /// + /// Returns the writer that this was constructed around. /// /// # Errors /// - /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. - pub fn finish(&mut self) -> Result<()> { - if self.finished { - return Ok(()); + /// The first error that is not of `ErrorKind::Interrupted` will be returned. + pub fn finish(&mut self) -> Result { + // If we could consume self in finish(), we wouldn't have to worry about this case, but + // finish() is retryable in the face of I/O errors, so we can't consume here. + if self.delegate.is_none() { + panic!("Encoder has already had finish() called") }; + self.write_final_leftovers()?; + + let writer = self.delegate.take().expect("Writer must be present"); + + Ok(writer) + } + + /// Write any remaining buffered data to the delegate writer. + fn write_final_leftovers(&mut self) -> Result<()> { + if self.delegate.is_none() { + // finish() has already successfully called this, and we are now in drop() with a None + // writer, so just no-op + return Ok(()); + } + self.write_all_encoded_output()?; if self.extra_input_occupied_len > 0 { @@ -138,7 +155,6 @@ impl<'a, W: Write> EncoderWriter<'a, W> { self.extra_input_occupied_len = 0; } - self.finished = true; Ok(()) } @@ -152,7 +168,11 @@ impl<'a, W: Write> EncoderWriter<'a, W> { /// that no write took place. fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> { self.panicked = true; - let res = self.w.write(&self.output[..current_output_len]); + let res = self + .delegate + .as_mut() + .expect("Writer must be present") + .write(&self.output[..current_output_len]); self.panicked = false; res.map(|consumed| { @@ -197,7 +217,7 @@ impl<'a, W: Write> EncoderWriter<'a, W> { } } -impl<'a, W: Write> Write for EncoderWriter<'a, W> { +impl Write for EncoderWriter { /// Encode input and then write to the delegate writer. /// /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes @@ -215,7 +235,7 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> { /// /// Any errors emitted by the delegate writer are returned. fn write(&mut self, input: &[u8]) -> Result { - if self.finished { + if self.delegate.is_none() { panic!("Cannot write more after calling finish()"); } @@ -341,15 +361,18 @@ impl<'a, W: Write> Write for EncoderWriter<'a, W> { /// incomplete chunks of input or write padding. fn flush(&mut self) -> Result<()> { self.write_all_encoded_output()?; - self.w.flush() + self.delegate + .as_mut() + .expect("Writer must be present") + .flush() } } -impl<'a, W: Write> Drop for EncoderWriter<'a, W> { +impl Drop for EncoderWriter { fn drop(&mut self) { if !self.panicked { // like `BufWriter`, ignore errors during drop - let _ = self.finish(); + let _ = self.write_final_leftovers(); } } } diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs new file mode 100644 index 00000000..cca22b76 --- /dev/null +++ b/src/write/encoder_string_writer.rs @@ -0,0 +1,107 @@ +use crate::Config; +use std::io; +use std::io::Write; +use super::encoder::EncoderWriter; + +/// A `Write` implementation that base64-encodes data using the provided config and accumulates the +/// resulting base64 in memory, which is then exposed as a String via `finish()`. +/// +/// # Examples +/// +/// ``` +/// use std::io::Write; +/// +/// let mut enc = base64::write::EncoderStringWriter::new(base64::STANDARD); +/// +/// enc.write_all(b"asdf").unwrap(); +/// +/// // get the resulting String +/// let b64_string = enc.finish().unwrap(); +/// +/// assert_eq!("YXNkZg==", &b64_string); +/// ``` +/// +/// # Panics +/// +/// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without +/// error is invalid and will panic. +/// +/// # Performance +/// +/// B64-encoded data is buffered in the heap since the point is to collect it in a String. +pub struct EncoderStringWriter { + encoder: EncoderWriter>, +} + +impl EncoderStringWriter { + /// Create a new EncoderStringWriter that will encode with the provided config. + pub fn new(config: Config) -> EncoderStringWriter { + EncoderStringWriter { encoder: EncoderWriter::new(Vec::new(), config) } + } + + /// Encode all remaining buffered data, including any trailing incomplete input triples and + /// associated padding. + /// + /// Once this succeeds, no further writes or calls to this method are allowed. + /// + /// Returns the base64-encoded form of the accumulated written data. + /// + /// # Errors + /// + /// The first error that is not of `ErrorKind::Interrupted` will be returned. + pub fn finish(&mut self) -> io::Result { + let buf = self.encoder.finish()?; + + let str = String::from_utf8(buf).expect("Base64 should always be valid UTF-8"); + Ok(str) + } +} + +impl<'a> Write for EncoderStringWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.encoder.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.encoder.flush() + } +} + +#[cfg(test)] +mod tests { + use crate::encode_config_buf; + use crate::tests::random_config; + use rand::Rng; + use std::io::Write; + use crate::write::encoder_string_writer::EncoderStringWriter; + + #[test] + fn every_possible_split_of_input() { + let mut rng = rand::thread_rng(); + let mut orig_data = Vec::::new(); + let mut normal_encoded = String::new(); + + let size = 5_000; + + for i in 0..size { + orig_data.clear(); + normal_encoded.clear(); + + for _ in 0..size { + orig_data.push(rng.gen()); + } + + let config = random_config(&mut rng); + encode_config_buf(&orig_data, config, &mut normal_encoded); + + let mut stream_encoder = EncoderStringWriter::new(config); + // Write the first i bytes, then the rest + stream_encoder.write_all(&orig_data[0..i]).unwrap(); + stream_encoder.write_all(&orig_data[i..]).unwrap(); + + let stream_encoded = stream_encoder.finish().unwrap(); + + assert_eq!(normal_encoded, stream_encoded); + } + } +} diff --git a/src/write/encoder_tests.rs b/src/write/encoder_tests.rs index 59a6127a..09b4d3a2 100644 --- a/src/write/encoder_tests.rs +++ b/src/write/encoder_tests.rs @@ -436,7 +436,7 @@ fn writes_that_only_write_part_of_input_and_sometimes_interrupt_produce_correct_ } } - stream_encoder.finish().unwrap(); + let _ = stream_encoder.finish().unwrap(); assert_eq!(orig_len, bytes_consumed); } @@ -500,7 +500,7 @@ fn do_encode_random_config_matches_normal_encode(max_input_len: usize) { bytes_consumed += input_len; } - stream_encoder.finish().unwrap(); + let _ = stream_encoder.finish().unwrap(); assert_eq!(orig_len, bytes_consumed); } diff --git a/src/write/mod.rs b/src/write/mod.rs index f8ed7076..98cb48c4 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -1,6 +1,8 @@ //! Implementations of `io::Write` to transparently handle base64. mod encoder; +mod encoder_string_writer; pub use self::encoder::EncoderWriter; +pub use self::encoder_string_writer::EncoderStringWriter; #[cfg(test)] mod encoder_tests; From acf75180c9df4ab6540c64f9eb85ab99db0b5931 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Thu, 24 Sep 2020 15:59:09 -0600 Subject: [PATCH 2/5] Add benchmark --- benches/benchmarks.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 07f88721..325e4db6 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -123,6 +123,18 @@ fn do_encode_bench_stream(b: &mut Bencher, &size: &usize) { }); } +fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { + let mut v: Vec = Vec::with_capacity(size); + fill(&mut v); + + b.iter(|| { + let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); + stream_enc.write_all(&v).unwrap(); + stream_enc.flush().unwrap(); + let _ = stream_enc.finish().unwrap(); + }); +} + fn fill(v: &mut Vec) { let cap = v.capacity(); // weak randomness is plenty; we just want to not be completely friendly to the branch predictor @@ -147,6 +159,7 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { .with_function("encode_reuse_buf", do_encode_bench_reuse_buf) .with_function("encode_slice", do_encode_bench_slice) .with_function("encode_reuse_buf_stream", do_encode_bench_stream) + .with_function("encode_string_stream", do_encode_bench_string_stream) } fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { From 24ca190cd0568da4fd6a1b36a041c00f2eb4d031 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Thu, 24 Sep 2020 16:55:07 -0600 Subject: [PATCH 3/5] ESW into_inner cannot fail, so no need for Result Other tidying as well --- RELEASE-NOTES.md | 2 +- benches/benchmarks.rs | 2 +- src/write/encoder.rs | 3 +++ src/write/encoder_string_writer.rs | 25 +++++++++++++------------ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index b551c6ef..92f7014e 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -3,7 +3,7 @@ - Config methods are const - Added `EncoderStringWriter` to allow encoding directly to a String - `EncoderWriter` now owns its delegate writer rather than keeping a reference to it (though refs still work) - - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()` + - As a consequence, it is now possible to extract the delegate writer from an `EncoderWriter` via `finish()`, which returns `Result` instead of `Result<()>`. # 0.12.2 diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 325e4db6..e98cb938 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -131,7 +131,7 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); - let _ = stream_enc.finish().unwrap(); + let _ = stream_enc.into_inner(); }); } diff --git a/src/write/encoder.rs b/src/write/encoder.rs index a7ea4162..8a48f438 100644 --- a/src/write/encoder.rs +++ b/src/write/encoder.rs @@ -359,6 +359,9 @@ impl Write for EncoderWriter { /// Because this is usually treated as OK to call multiple times, it will *not* flush any /// incomplete chunks of input or write padding. + /// # Errors + /// + /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. fn flush(&mut self) -> Result<()> { self.write_all_encoded_output()?; self.delegate diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index cca22b76..fad3499b 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -4,7 +4,7 @@ use std::io::Write; use super::encoder::EncoderWriter; /// A `Write` implementation that base64-encodes data using the provided config and accumulates the -/// resulting base64 in memory, which is then exposed as a String via `finish()`. +/// resulting base64 in memory, which is then exposed as a String via `into_inner()`. /// /// # Examples /// @@ -16,7 +16,7 @@ use super::encoder::EncoderWriter; /// enc.write_all(b"asdf").unwrap(); /// /// // get the resulting String -/// let b64_string = enc.finish().unwrap(); +/// let b64_string = enc.into_inner(); /// /// assert_eq!("YXNkZg==", &b64_string); /// ``` @@ -36,7 +36,12 @@ pub struct EncoderStringWriter { impl EncoderStringWriter { /// Create a new EncoderStringWriter that will encode with the provided config. pub fn new(config: Config) -> EncoderStringWriter { - EncoderStringWriter { encoder: EncoderWriter::new(Vec::new(), config) } + EncoderStringWriter::from(String::new(), config) + } + + /// Create a new EncoderStringWriter that will append to the provided string. + pub fn from(s: String, config: Config) -> EncoderStringWriter { + EncoderStringWriter { encoder: EncoderWriter::new(s.into_bytes(), config) } } /// Encode all remaining buffered data, including any trailing incomplete input triples and @@ -45,15 +50,11 @@ impl EncoderStringWriter { /// Once this succeeds, no further writes or calls to this method are allowed. /// /// Returns the base64-encoded form of the accumulated written data. - /// - /// # Errors - /// - /// The first error that is not of `ErrorKind::Interrupted` will be returned. - pub fn finish(&mut self) -> io::Result { - let buf = self.encoder.finish()?; + pub fn into_inner(mut self) -> String { + let buf = self.encoder.finish() + .expect("Writing to a Vec should never fail"); - let str = String::from_utf8(buf).expect("Base64 should always be valid UTF-8"); - Ok(str) + String::from_utf8(buf).expect("Base64 should always be valid UTF-8") } } @@ -99,7 +100,7 @@ mod tests { stream_encoder.write_all(&orig_data[0..i]).unwrap(); stream_encoder.write_all(&orig_data[i..]).unwrap(); - let stream_encoded = stream_encoder.finish().unwrap(); + let stream_encoded = stream_encoder.into_inner(); assert_eq!(normal_encoded, stream_encoded); } From 5a56885c655deb54ccb206165e8351b0476c78b8 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Fri, 25 Sep 2020 17:53:00 -0600 Subject: [PATCH 4/5] Introduce StrWriter to allow ESW to wrap both a String and a &mut String --- benches/benchmarks.rs | 15 +++++ examples/make_tables.rs | 6 ++ src/write/encoder_string_writer.rs | 99 +++++++++++++++++++++++++----- 3 files changed, 103 insertions(+), 17 deletions(-) diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index e98cb938..3d27bbb7 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -129,6 +129,20 @@ fn do_encode_bench_string_stream(b: &mut Bencher, &size: &usize) { b.iter(|| { let mut stream_enc = write::EncoderStringWriter::new(TEST_CONFIG); + stream_enc.write_all(&v).unwrap(); + stream_enc.flush().unwrap(); + let _ = stream_enc.into_inner(); + }); +} + +fn do_encode_bench_string_reuse_buf_stream(b: &mut Bencher, &size: &usize) { + let mut v: Vec = Vec::with_capacity(size); + fill(&mut v); + + let mut buf = String::new(); + b.iter(|| { + buf.clear(); + let mut stream_enc = write::EncoderStringWriter::from(&mut buf, TEST_CONFIG); stream_enc.write_all(&v).unwrap(); stream_enc.flush().unwrap(); let _ = stream_enc.into_inner(); @@ -160,6 +174,7 @@ fn encode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { .with_function("encode_slice", do_encode_bench_slice) .with_function("encode_reuse_buf_stream", do_encode_bench_stream) .with_function("encode_string_stream", do_encode_bench_string_stream) + .with_function("encode_string_reuse_buf_stream", do_encode_bench_string_reuse_buf_stream) } fn decode_benchmarks(byte_sizes: &[usize]) -> ParameterizedBenchmark { diff --git a/examples/make_tables.rs b/examples/make_tables.rs index 5ef3075f..db6fcf2b 100644 --- a/examples/make_tables.rs +++ b/examples/make_tables.rs @@ -164,8 +164,14 @@ fn print_decode_table(alphabet: &[u8], const_name: &str, indent_depth: usize) { } fn check_alphabet(alphabet: &[u8]) { + // ensure all characters are distinct assert_eq!(64, alphabet.len()); let mut set: HashSet = HashSet::new(); set.extend(alphabet); assert_eq!(64, set.len()); + + // must be ASCII to be valid as single UTF-8 bytes + for &b in alphabet { + assert!(b <= 0x7F_u8); + } } diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index fad3499b..2b19ccf5 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -8,6 +8,8 @@ use super::encoder::EncoderWriter; /// /// # Examples /// +/// Buffer base64 in a new String: +/// /// ``` /// use std::io::Write; /// @@ -21,6 +23,23 @@ use super::encoder::EncoderWriter; /// assert_eq!("YXNkZg==", &b64_string); /// ``` /// +/// Or, append to an existing String: +/// +/// ``` +/// use std::io::Write; +/// +/// let mut buf = String::from("base64: "); +/// +/// let mut enc = base64::write::EncoderStringWriter::from(&mut buf, base64::STANDARD); +/// +/// enc.write_all(b"asdf").unwrap(); +/// +/// // release the &mut reference on buf +/// let _ = enc.into_inner(); +/// +/// assert_eq!("base64: YXNkZg==", &buf); +/// ``` +/// /// # Panics /// /// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without @@ -28,20 +47,16 @@ use super::encoder::EncoderWriter; /// /// # Performance /// -/// B64-encoded data is buffered in the heap since the point is to collect it in a String. -pub struct EncoderStringWriter { - encoder: EncoderWriter>, +/// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain +/// bytes to a `io::Write`. +pub struct EncoderStringWriter { + encoder: EncoderWriter>, } -impl EncoderStringWriter { - /// Create a new EncoderStringWriter that will encode with the provided config. - pub fn new(config: Config) -> EncoderStringWriter { - EncoderStringWriter::from(String::new(), config) - } - - /// Create a new EncoderStringWriter that will append to the provided string. - pub fn from(s: String, config: Config) -> EncoderStringWriter { - EncoderStringWriter { encoder: EncoderWriter::new(s.into_bytes(), config) } +impl EncoderStringWriter { + /// Create a EncoderStringWriter that will append to the provided `StrWrite`. + pub fn from(str_writer: S, config: Config) -> Self { + EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_writer }, config) } } /// Encode all remaining buffered data, including any trailing incomplete input triples and @@ -50,15 +65,21 @@ impl EncoderStringWriter { /// Once this succeeds, no further writes or calls to this method are allowed. /// /// Returns the base64-encoded form of the accumulated written data. - pub fn into_inner(mut self) -> String { - let buf = self.encoder.finish() - .expect("Writing to a Vec should never fail"); + pub fn into_inner(mut self) -> S { + self.encoder.finish() + .expect("Writing to a Vec should never fail") + .str_writer + } +} - String::from_utf8(buf).expect("Base64 should always be valid UTF-8") +impl EncoderStringWriter { + /// Create a EncoderStringWriter that will encode into a new String with the provided config. + pub fn new(config: Config) -> Self { + EncoderStringWriter::from(String::new(), config) } } -impl<'a> Write for EncoderStringWriter { +impl Write for EncoderStringWriter { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.write(buf) } @@ -68,6 +89,50 @@ impl<'a> Write for EncoderStringWriter { } } +/// An abstraction around infallible writes of `str`s. +/// +/// Typically, this will just be String. +pub trait StrWrite { + /// The write must succeed, and must write the entire `buf`. + fn write(&mut self, buf: &str); +} + +/// As for io::Write, StrWrite is implemented automatically for `&mut S`. +impl StrWrite for &mut S { + fn write(&mut self, buf: &str) { + (**self).write(buf) + } +} + +impl StrWrite for String { + fn write(&mut self, buf: &str) { + self.push_str(buf) + } +} + +/// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units. +/// +/// This is safe because we only use it when writing base64, which is always valid UTF-8. +struct Utf8SingleCodeUnitWriter { + str_writer: S +} + +impl io::Write for Utf8SingleCodeUnitWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let s = std::str::from_utf8(buf) + .expect("Input must be valid UTF-8"); + + self.str_writer.write(s); + + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + // no op + Ok(()) + } +} + #[cfg(test)] mod tests { use crate::encode_config_buf; From 8b1ae22babb33a5c7cee84f0445434bff6880c76 Mon Sep 17 00:00:00 2001 From: Marshall Pierce Date: Mon, 28 Sep 2020 06:36:27 -0600 Subject: [PATCH 5/5] Rename StrWrite to StrConsumer It really wasn't very much like io::Write, so best to avoid the misleading name --- src/write/encoder_string_writer.rs | 47 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/src/write/encoder_string_writer.rs b/src/write/encoder_string_writer.rs index 2b19ccf5..a2033c4a 100644 --- a/src/write/encoder_string_writer.rs +++ b/src/write/encoder_string_writer.rs @@ -49,14 +49,14 @@ use super::encoder::EncoderWriter; /// /// Because it has to validate that the base64 is UTF-8, it is about 80% as fast as writing plain /// bytes to a `io::Write`. -pub struct EncoderStringWriter { +pub struct EncoderStringWriter { encoder: EncoderWriter>, } -impl EncoderStringWriter { - /// Create a EncoderStringWriter that will append to the provided `StrWrite`. - pub fn from(str_writer: S, config: Config) -> Self { - EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_writer }, config) } +impl EncoderStringWriter { + /// Create a EncoderStringWriter that will append to the provided `StrConsumer`. + pub fn from(str_consumer: S, config: Config) -> Self { + EncoderStringWriter { encoder: EncoderWriter::new(Utf8SingleCodeUnitWriter { str_consumer }, config) } } /// Encode all remaining buffered data, including any trailing incomplete input triples and @@ -68,7 +68,7 @@ impl EncoderStringWriter { pub fn into_inner(mut self) -> S { self.encoder.finish() .expect("Writing to a Vec should never fail") - .str_writer + .str_consumer } } @@ -79,7 +79,7 @@ impl EncoderStringWriter { } } -impl Write for EncoderStringWriter { +impl Write for EncoderStringWriter { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.write(buf) } @@ -89,23 +89,22 @@ impl Write for EncoderStringWriter { } } -/// An abstraction around infallible writes of `str`s. -/// -/// Typically, this will just be String. -pub trait StrWrite { - /// The write must succeed, and must write the entire `buf`. - fn write(&mut self, buf: &str); +/// An abstraction around consuming `str`s produced by base64 encoding. +pub trait StrConsumer { + /// Consume the base64 encoded data in `buf` + fn consume(&mut self, buf: &str); } -/// As for io::Write, StrWrite is implemented automatically for `&mut S`. -impl StrWrite for &mut S { - fn write(&mut self, buf: &str) { - (**self).write(buf) +/// As for io::Write, `StrConsumer` is implemented automatically for `&mut S`. +impl StrConsumer for &mut S { + fn consume(&mut self, buf: &str) { + (**self).consume(buf) } } -impl StrWrite for String { - fn write(&mut self, buf: &str) { +/// Pushes the str onto the end of the String +impl StrConsumer for String { + fn consume(&mut self, buf: &str) { self.push_str(buf) } } @@ -113,16 +112,18 @@ impl StrWrite for String { /// A `Write` that only can handle bytes that are valid single-byte UTF-8 code units. /// /// This is safe because we only use it when writing base64, which is always valid UTF-8. -struct Utf8SingleCodeUnitWriter { - str_writer: S +struct Utf8SingleCodeUnitWriter { + str_consumer: S } -impl io::Write for Utf8SingleCodeUnitWriter { +impl io::Write for Utf8SingleCodeUnitWriter { fn write(&mut self, buf: &[u8]) -> io::Result { + // Because we expect all input to be valid utf-8 individual bytes, we can encode any buffer + // length let s = std::str::from_utf8(buf) .expect("Input must be valid UTF-8"); - self.str_writer.write(s); + self.str_consumer.consume(s); Ok(buf.len()) }