From 3259c5e39b7fe872a3d2844d67a3dbc2d2ff1c0d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Tue, 16 Jul 2024 20:35:57 -0400 Subject: [PATCH] Revert "remove read_zipfile_from_stream()" This reverts commit 3abac52d1999350bb2959ac1e6ee60bc4650b7d9. --- benches/read_metadata.rs | 31 ++------ examples/stdin_info.rs | 7 +- fuzz/fuzz_targets/fuzz_read.rs | 24 ++----- src/read.rs | 76 ++++++++++++++++++-- src/read/stream.rs | 127 +++++++++++++++++++++++++++++---- src/unstable/read.rs | 1 - src/write.rs | 2 +- 7 files changed, 197 insertions(+), 71 deletions(-) mode change 100755 => 100644 fuzz/fuzz_targets/fuzz_read.rs diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 1c11d7302..73f2b26ed 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -7,14 +7,7 @@ use bencher::Bencher; use getrandom::getrandom; use tempdir::TempDir; use zip::write::SimpleFileOptions; -use zip::{ - result::ZipResult, - unstable::{ - read::streaming::{StreamingZipEntry, ZipStreamFileMetadata}, - stream::{ZipStreamReader, ZipStreamVisitor}, - }, - CompressionMethod, ZipArchive, ZipWriter, -}; +use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter}; const FILE_COUNT: usize = 15_000; const FILE_SIZE: usize = 1024; @@ -113,24 +106,12 @@ fn parse_stream_archive(bench: &mut Bencher) { let out = dir.path().join("bench-out.zip"); fs::write(&out, &bytes).unwrap(); - struct V; - impl ZipStreamVisitor for V { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { - Ok(()) - } - - fn visit_additional_metadata( - &mut self, - _metadata: &ZipStreamFileMetadata, - ) -> ZipResult<()> { - Ok(()) - } - } - bench.iter(|| { - let f = fs::File::open(&out).unwrap(); - let archive = ZipStreamReader::new(f); - archive.visit(&mut V).unwrap(); + let mut f = fs::File::open(&out).unwrap(); + while zip::read::read_zipfile_from_stream(&mut f) + .unwrap() + .is_some() + {} }); bench.bytes = bytes.len() as u64; } diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs index 25f8f895b..a609916a0 100644 --- a/examples/stdin_info.rs +++ b/examples/stdin_info.rs @@ -1,19 +1,16 @@ use std::io::{self, Read}; -use zip::unstable::read::{streaming::StreamingArchive, ArchiveEntry}; - fn main() { std::process::exit(real_main()); } fn real_main() -> i32 { let stdin = io::stdin(); - let stdin_handle = stdin.lock(); + let mut stdin_handle = stdin.lock(); let mut buf = [0u8; 16]; - let mut archive = StreamingArchive::new(stdin_handle); loop { - match archive.next_entry() { + match zip::read::read_zipfile_from_stream(&mut stdin_handle) { Ok(Some(mut file)) => { println!( "{}: {} bytes ({} bytes packed)", diff --git a/fuzz/fuzz_targets/fuzz_read.rs b/fuzz/fuzz_targets/fuzz_read.rs old mode 100755 new mode 100644 index 4a4327528..78fe670ec --- a/fuzz/fuzz_targets/fuzz_read.rs +++ b/fuzz/fuzz_targets/fuzz_read.rs @@ -3,10 +3,7 @@ use libfuzzer_sys::fuzz_target; use std::io::{Read, Seek, SeekFrom}; use tikv_jemallocator::Jemalloc; -use zip::unstable::{ - read::streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, - stream::ZipStreamVisitor, -}; +use zip::read::read_zipfile_from_stream; const MAX_BYTES_TO_READ: u64 = 1 << 24; @@ -21,24 +18,11 @@ fn decompress_all(data: &[u8]) -> Result<(), Box> { let mut file = zip.by_index(i)?.take(MAX_BYTES_TO_READ); std::io::copy(&mut file, &mut std::io::sink())?; } - let mut reader = zip.into_inner(); - reader.rewind()?; - - struct V; - impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { - std::io::copy(&mut file, &mut std::io::sink())? - } - - fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()> { - Ok(()) - } + reader.seek(SeekFrom::Start(0))?; + while let Ok(Some(mut file)) = read_zipfile_from_stream(&mut reader) { + std::io::copy(&mut file, &mut std::io::sink())?; } - - let archive = StreamingArchive::new(reader)?; - archive.visit(&mut V)?; - Ok(()) } diff --git a/src/read.rs b/src/read.rs index 7e13066c3..9fb3555a0 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1900,6 +1900,71 @@ impl<'a> Drop for ZipFile<'a> { } } +/// Read ZipFile structures from a non-seekable reader. +/// +/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions +/// as some information will be missing when reading this manner. +/// +/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is +/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory +/// is encountered. No more files should be read after this. +/// +/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after +/// the structure is done. +/// +/// Missing fields are: +/// * `comment`: set to an empty string +/// * `data_start`: set to 0 +/// * `external_attributes`: `unix_mode()`: will return None +pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { + // We can't use the typical ::parse() method, as we follow separate code paths depending on the + // "magic" value (since the magic value will be from the central directory header if we've + // finished iterating over all the actual files). + /* TODO: smallvec? */ + let mut block = [0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + let block: Box<[u8]> = block.into(); + + let signature = spec::Magic::from_first_le_bytes(&block); + + match signature { + spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), + _ => return Err(ZipLocalEntryBlock::WRONG_MAGIC_ERROR), + } + + let block = ZipLocalEntryBlock::interpret(&block)?; + + let mut result = ZipFileData::from_local_block(block, reader)?; + + match parse_extra_field(&mut result) { + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + } + + let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); + + let result_crc32 = result.crc32; + let result_compression_method = result.compression_method; + let crypto_reader = make_crypto_reader( + result_compression_method, + result_crc32, + result.last_modified_time, + result.using_data_descriptor, + limit_reader, + None, + None, + #[cfg(feature = "aes-crypto")] + result.compressed_size, + )?; + + Ok(Some(ZipFile { + data: Cow::Owned(result), + crypto_reader: None, + reader: make_reader(result_compression_method, result_crc32, crypto_reader)?, + })) +} + #[cfg(test)] mod test { use crate::result::ZipResult; @@ -1952,13 +2017,16 @@ mod test { #[test] fn zip_read_streaming() { - use crate::unstable::read::streaming::StreamingArchive; + use super::read_zipfile_from_stream; let mut v = Vec::new(); v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); - let reader = Cursor::new(v); - let mut archive = StreamingArchive::new(reader); - while archive.next_entry().unwrap().is_some() {} + let mut reader = Cursor::new(v); + loop { + if read_zipfile_from_stream(&mut reader).unwrap().is_none() { + break; + } + } } #[test] diff --git a/src/read/stream.rs b/src/read/stream.rs index c6c27aca4..7fb76e70c 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,33 +1,50 @@ use std::fs; use std::io::{self, Read}; -use std::path::Path; +use std::path::{Path, PathBuf}; -use super::{ZipError, ZipResult}; -use crate::unstable::read::{ - streaming::{StreamingArchive, StreamingZipEntry, ZipStreamFileMetadata}, - ArchiveEntry, +use super::{ + central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, + ZipFile, ZipFileData, ZipResult, }; +use crate::spec::FixedSizeBlock; /// Stream decoder for zip. #[derive(Debug)] -pub struct ZipStreamReader(StreamingArchive); +pub struct ZipStreamReader(R); impl ZipStreamReader { /// Create a new ZipStreamReader pub const fn new(reader: R) -> Self { - Self(StreamingArchive::new(reader)) + Self(reader) } } impl ZipStreamReader { + fn parse_central_directory(&mut self) -> ZipResult { + // Give archive_offset and central_header_start dummy value 0, since + // they are not used in the output. + let archive_offset = 0; + let central_header_start = 0; + + // Parse central header + let block = ZipCentralEntryBlock::parse(&mut self.0)?; + let file = central_header_to_zip_file_inner( + &mut self.0, + archive_offset, + central_header_start, + block, + )?; + Ok(ZipStreamFileMetadata(file)) + } + /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - while let Some(mut file) = self.0.next_entry()? { + while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { visitor.visit_file(&mut file)?; } - while let Some(metadata) = self.0.next_metadata_entry()? { + while let Ok(metadata) = self.parse_central_directory() { visitor.visit_additional_metadata(&metadata)?; } @@ -42,7 +59,7 @@ impl ZipStreamReader { pub fn extract>(self, directory: P) -> ZipResult<()> { struct Extractor<'a>(&'a Path); impl ZipStreamVisitor for Extractor<'_> { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -96,7 +113,7 @@ pub trait ZipStreamVisitor { /// - `comment`: set to an empty string /// - `data_start`: set to 0 /// - `external_attributes`: `unix_mode()`: will return None - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()>; + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; /// This function is guranteed to be called after all `visit_file`s. /// @@ -104,6 +121,86 @@ pub trait ZipStreamVisitor { fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; } +/// Additional metadata for the file. +#[derive(Debug)] +pub struct ZipStreamFileMetadata(ZipFileData); + +impl ZipStreamFileMetadata { + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.0.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.0.file_name_raw + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [std::path::Component::ParentDir]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + pub fn mangled_name(&self) -> PathBuf { + self.0.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option { + self.0.enclosed_name() + } + + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.name() + .chars() + .next_back() + .map_or(false, |c| c == '/' || c == '\\') + } + + /// Returns whether the file is a regular file + pub fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.0.file_comment + } + + /// Get unix mode for the file + pub const fn unix_mode(&self) -> Option { + self.0.unix_mode() + } +} + #[cfg(test)] mod test { use super::*; @@ -111,7 +208,7 @@ mod test { struct DummyVisitor; impl ZipStreamVisitor for DummyVisitor { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { Ok(()) } @@ -127,7 +224,7 @@ mod test { #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { - fn visit_file(&mut self, _file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { self.0 += 1; Ok(()) } @@ -170,7 +267,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { if file.is_file() { self.filenames.insert(file.name().into()); } @@ -207,7 +304,7 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut StreamingZipEntry) -> ZipResult<()> { + fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { let full_name = file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( diff --git a/src/unstable/read.rs b/src/unstable/read.rs index 86f608ea6..a4d5e4092 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -487,7 +487,6 @@ pub mod streaming { use std::mem; use std::ops; - #[derive(Debug)] pub struct StreamingArchive { reader: R, remaining_before_next_entry: u64, diff --git a/src/write.rs b/src/write.rs index d719b0b2b..aab92327f 100644 --- a/src/write.rs +++ b/src/write.rs @@ -13,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::find_entry_content_range; +use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64;