Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor readers to use type parameters and not concrete vtables #207

Closed
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ time = { workspace = true, optional = true, features = [
"std",
] }
zeroize = { version = "1.8.1", optional = true, features = ["zeroize_derive"] }
zstd = { version = "0.13.1", optional = true, default-features = false }
# zstd = { version = "0.13.1", optional = true, default-features = false }
zstd = { git = "https://github.com/gyscos/zstd-rs", rev = "a3738d680542e34b5529207ee30491ed7b69ed71", optional = true, default-features = false }
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
zopfli = { version = "0.8.1", optional = true }
deflate64 = { version = "0.1.8", optional = true }
lzma-rs = { version = "0.3.0", default-features = false, optional = true }
Expand Down
4 changes: 2 additions & 2 deletions benches/merge_archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ fn perform_raw_copy_file<R: Read + Seek, W: Write + Seek>(
mut target: ZipWriter<W>,
) -> ZipResult<ZipWriter<W>> {
for i in 0..src.len() {
let entry = src.by_index(i)?;
target.raw_copy_file(entry)?;
let entry = src.by_index_raw(i)?;
target.copy_file(entry)?;
}
Ok(target)
}
Expand Down
31 changes: 25 additions & 6 deletions benches/read_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@ use bencher::Bencher;
use getrandom::getrandom;
use tempdir::TempDir;
use zip::write::SimpleFileOptions;
use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter};
use zip::{
result::ZipResult,
unstable::{
read::streaming::{StreamingZipEntry, ZipStreamFileMetadata},
stream::{ZipStreamReader, ZipStreamVisitor},
},
CompressionMethod, ZipArchive, ZipWriter,
};

const FILE_COUNT: usize = 15_000;
const FILE_SIZE: usize = 1024;
Expand Down Expand Up @@ -106,12 +113,24 @@ fn parse_stream_archive(bench: &mut Bencher) {
let out = dir.path().join("bench-out.zip");
fs::write(&out, &bytes).unwrap();

struct V;
impl ZipStreamVisitor for V {
fn visit_file(&mut self, _file: &mut StreamingZipEntry<impl Read>) -> ZipResult<()> {
Ok(())
}

fn visit_additional_metadata(
&mut self,
_metadata: &ZipStreamFileMetadata,
) -> ZipResult<()> {
Ok(())
}
}

bench.iter(|| {
let mut f = fs::File::open(&out).unwrap();
while zip::read::read_zipfile_from_stream(&mut f)
.unwrap()
.is_some()
{}
let f = fs::File::open(&out).unwrap();
let archive = ZipStreamReader::new(f);
archive.visit(&mut V).unwrap();
});
bench.bytes = bytes.len() as u64;
}
Expand Down
2 changes: 2 additions & 0 deletions examples/extract.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fs;
use std::io;

use zip::unstable::read::ArchiveEntry;

fn main() {
std::process::exit(real_main());
}
Expand Down
2 changes: 2 additions & 0 deletions examples/file_info.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::fs;
use std::io::BufReader;

use zip::unstable::read::ArchiveEntry;

fn main() {
std::process::exit(real_main());
}
Expand Down
7 changes: 5 additions & 2 deletions examples/stdin_info.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
use std::io::{self, Read};

use zip::unstable::read::{streaming::StreamingArchive, ArchiveEntry};

fn main() {
std::process::exit(real_main());
}

fn real_main() -> i32 {
let stdin = io::stdin();
let mut stdin_handle = stdin.lock();
let stdin_handle = stdin.lock();
let mut buf = [0u8; 16];

let mut archive = StreamingArchive::new(stdin_handle);
loop {
match zip::read::read_zipfile_from_stream(&mut stdin_handle) {
match archive.next_entry() {
Ok(Some(mut file)) => {
println!(
"{}: {} bytes ({} bytes packed)",
Expand Down
24 changes: 20 additions & 4 deletions fuzz/fuzz_targets/fuzz_read.rs
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
use libfuzzer_sys::fuzz_target;
use std::io::{Read, Seek, SeekFrom};
use tikv_jemallocator::Jemalloc;
use zip::read::read_zipfile_from_stream;
use zip::unstable::{
read::streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata},
stream::ZipStreamVisitor,
};

const MAX_BYTES_TO_READ: u64 = 1 << 24;

Expand All @@ -18,11 +21,24 @@ fn decompress_all(data: &[u8]) -> Result<(), Box<dyn std::error::Error>> {
let mut file = zip.by_index(i)?.take(MAX_BYTES_TO_READ);
std::io::copy(&mut file, &mut std::io::sink())?;
}

let mut reader = zip.into_inner();
reader.seek(SeekFrom::Start(0))?;
while let Ok(Some(mut file)) = read_zipfile_from_stream(&mut reader) {
std::io::copy(&mut file, &mut std::io::sink())?;
reader.rewind()?;

struct V;
impl ZipStreamVisitor for V {
fn visit_file(&mut self, file: &mut StreamingZipEntry<impl Read>) -> ZipResult<()> {
std::io::copy(&mut file, &mut std::io::sink())?
}

fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()> {
Ok(())
}
}

let archive = StreamingArchive::new(reader)?;
archive.visit(&mut V)?;

Ok(())
}

Expand Down
8 changes: 5 additions & 3 deletions src/aes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
data_length: u64,
}

impl<R: Read> AesReader<R> {
impl<R> AesReader<R> {
pub const fn new(reader: R, aes_mode: AesMode, compressed_size: u64) -> AesReader<R> {
let data_length = compressed_size
- (PWD_VERIFY_LENGTH + AUTH_CODE_LENGTH + aes_mode.salt_length()) as u64;
Expand All @@ -77,7 +77,9 @@
data_length,
}
}
}

impl<R: Read> AesReader<R> {
/// Read the AES header bytes and validate the password.
///
/// Even if the validation succeeds, there is still a 1 in 65536 chance that an incorrect
Expand Down Expand Up @@ -150,7 +152,7 @@
/// There is a 1 in 65536 chance that an invalid password passes that check.
/// After the data has been read and decrypted an HMAC will be checked and provide a final means
/// to check if either the password is invalid or if the data has been changed.
pub struct AesReaderValid<R: Read> {
pub struct AesReaderValid<R> {
reader: R,
data_remaining: u64,
cipher: Cipher,
Expand Down Expand Up @@ -214,9 +216,9 @@
}
}

impl<R: Read> AesReaderValid<R> {
impl<R> AesReaderValid<R> {
/// Consumes this decoder, returning the underlying reader.
pub fn into_inner(self) -> R {

Check failure on line 221 in src/aes.rs

View workflow job for this annotation

GitHub Actions / Build and test : macOS-latest, msrv

method `into_inner` is never used

Check failure on line 221 in src/aes.rs

View workflow job for this annotation

GitHub Actions / Build and test --all-features: macOS-latest, msrv

method `into_inner` is never used
self.reader
}
}
Expand Down
80 changes: 29 additions & 51 deletions src/crc32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,76 +10,54 @@
inner: R,
hasher: Hasher,
check: u32,
/// Signals if `inner` stores aes encrypted data.
/// AE-2 encrypted data doesn't use crc and sets the value to 0.
enabled: bool,
}

impl<R> Crc32Reader<R> {
/// Get a new Crc32Reader which checks the inner reader against checksum.
/// The check is disabled if `ae2_encrypted == true`.
pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader<R> {
pub(crate) fn new(inner: R, checksum: u32) -> Self {
Crc32Reader {
inner,
hasher: Hasher::new(),
check: checksum,
enabled: !ae2_encrypted,
}
}

fn check_matches(&self) -> bool {
self.check == self.hasher.clone().finalize()
fn check_matches(&self) -> Result<(), &'static str> {
let res = self.hasher.clone().finalize();
if self.check == res {
Ok(())
} else {
/* TODO: make this into our own Crc32Error error type! */

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
Err("Invalid checksum")
}
}

#[allow(dead_code)]
cosmicexplorer marked this conversation as resolved.
Show resolved Hide resolved
pub fn into_inner(self) -> R {
self.inner
}
}

#[cold]
fn invalid_checksum() -> io::Error {
io::Error::new(io::ErrorKind::InvalidData, "Invalid checksum")
}

impl<R: Read> Read for Crc32Reader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let count = self.inner.read(buf)?;

if self.enabled {
if count == 0 && !buf.is_empty() && !self.check_matches() {
return Err(invalid_checksum());
}
self.hasher.update(&buf[..count]);
}
Ok(count)
}

fn read_to_end(&mut self, buf: &mut Vec<u8>) -> io::Result<usize> {
let start = buf.len();
let n = self.inner.read_to_end(buf)?;

if self.enabled {
self.hasher.update(&buf[start..]);
if !self.check_matches() {
return Err(invalid_checksum());
}
/* We want to make sure we only check the hash when the input stream is exhausted. */
if buf.is_empty() {
/* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we
* still want to "pull" from the source in case it surfaces an i/o error. This will
* always return a count of Ok(0) if successful. */
return self.inner.read(buf);
}

Ok(n)
}

fn read_to_string(&mut self, buf: &mut String) -> io::Result<usize> {
let start = buf.len();
let n = self.inner.read_to_string(buf)?;

if self.enabled {
self.hasher.update(&buf.as_bytes()[start..]);
if !self.check_matches() {
return Err(invalid_checksum());
}
let count = self.inner.read(buf)?;
if count == 0 {
return self
.check_matches()
.map(|()| 0)
/* TODO: use io::Error::other for MSRV >=1.74 */

Check notice

Code scanning / devskim

A "TODO" or similar was left in source code, possibly indicating incomplete functionality Note

Suspicious comment
.map_err(|e| io::Error::new(io::ErrorKind::Other, e));
}

Ok(n)
self.hasher.update(&buf[..count]);
Ok(count)
}
}

Expand All @@ -92,10 +70,10 @@
let data: &[u8] = b"";
let mut buf = [0; 1];

let mut reader = Crc32Reader::new(data, 0, false);
let mut reader = Crc32Reader::new(data, 0);
assert_eq!(reader.read(&mut buf).unwrap(), 0);

let mut reader = Crc32Reader::new(data, 1, false);
let mut reader = Crc32Reader::new(data, 1);
assert!(reader
.read(&mut buf)
.unwrap_err()
Expand All @@ -108,7 +86,7 @@
let data: &[u8] = b"1234";
let mut buf = [0; 1];

let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false);
let mut reader = Crc32Reader::new(data, 0x9be3e0a3);
assert_eq!(reader.read(&mut buf).unwrap(), 1);
assert_eq!(reader.read(&mut buf).unwrap(), 1);
assert_eq!(reader.read(&mut buf).unwrap(), 1);
Expand All @@ -123,7 +101,7 @@
let data: &[u8] = b"1234";
let mut buf = [0; 5];

let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false);
let mut reader = Crc32Reader::new(data, 0x9be3e0a3);
assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0);
assert_eq!(reader.read(&mut buf).unwrap(), 4);
}
Expand Down
Loading
Loading