diff --git a/src/read.rs b/src/read.rs index 4bfce7e7d..94ed23664 100644 --- a/src/read.rs +++ b/src/read.rs @@ -40,6 +40,10 @@ use bzip2::read::BzDecoder; #[cfg(feature = "zstd")] use zstd::stream::read::Decoder as ZstdDecoder; +mod config; + +pub use config::*; + /// Provides high level API for reading from a stream. pub(crate) mod stream; @@ -56,6 +60,9 @@ pub(crate) mod zip_archive { pub(crate) files: super::IndexMap, super::ZipFileData>, pub(super) offset: u64, pub(super) dir_start: u64, + // This isn't yet used anywhere, but it is here for use cases in the future. + #[allow(dead_code)] + pub(super) config: super::Config, } /// ZIP archive reader @@ -382,12 +389,15 @@ impl ZipArchive { ) -> ZipResult { let initial_offset = match files.first() { Some((_, file)) => file.header_start, - None => 0, + None => central_start, }; let shared = Arc::new(zip_archive::Shared { files, offset: initial_offset, dir_start: central_start, + config: Config { + archive_offset: ArchiveOffset::Known(initial_offset), + }, }); Ok(Self { reader, @@ -473,19 +483,44 @@ impl ZipArchive { } fn get_directory_info_zip32( + config: &Config, + reader: &mut R, footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { - // Some zip files have data prepended to them, resulting in the - // offsets all being too small. Get the amount of error by comparing - // the actual file position we found the CDE at with the offset - // recorded in the CDE. - let archive_offset = cde_start_pos - .checked_sub(footer.central_directory_size as u64) - .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) - .ok_or(ZipError::InvalidArchive( - "Invalid central directory size or offset", - ))?; + let archive_offset = match config.archive_offset { + ArchiveOffset::Known(n) => n, + ArchiveOffset::FromCentralDirectory | ArchiveOffset::Detect => { + // Some zip files have data prepended to them, resulting in the + // offsets all being too small. Get the amount of error by comparing + // the actual file position we found the CDE at with the offset + // recorded in the CDE. + let mut offset = cde_start_pos + .checked_sub(footer.central_directory_size as u64) + .and_then(|x| x.checked_sub(footer.central_directory_offset as u64)) + .ok_or(ZipError::InvalidArchive( + "Invalid central directory size or offset", + ))?; + + if config.archive_offset == ArchiveOffset::Detect { + // Check whether the archive offset makes sense by peeking at the directory start. If it + // doesn't, fall back to using no archive offset. This supports zips with the central + // directory entries somewhere other than directly preceding the end of central directory. + reader.seek(io::SeekFrom::Start( + offset + footer.central_directory_offset as u64, + ))?; + let mut buf = [0; 4]; + reader.read_exact(&mut buf)?; + if spec::Magic::from_le_bytes(buf) + != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE + { + offset = 0; + } + } + + offset + } + }; let directory_start = footer.central_directory_offset as u64 + archive_offset; let number_of_files = footer.number_of_files_on_this_disk as usize; @@ -512,6 +547,7 @@ impl ZipArchive { } fn get_directory_info_zip64( + config: &Config, reader: &mut R, footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, @@ -549,6 +585,27 @@ impl ZipArchive { let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; let results: Vec> = search_results.into_iter().map(|(footer64, archive_offset)| { + let archive_offset = match config.archive_offset { + ArchiveOffset::Known(n) => n, + ArchiveOffset::FromCentralDirectory => archive_offset, + ArchiveOffset::Detect => { + archive_offset.checked_add(footer64.central_directory_offset) + .and_then(|start| { + // Check whether the archive offset makes sense by peeking at the directory start. + // + // If any errors occur or no header signature is found, fall back to no offset to see if that works. + reader.seek(io::SeekFrom::Start(start)).ok()?; + let mut buf = [0; 4]; + reader.read_exact(&mut buf).ok()?; + if spec::Magic::from_le_bytes(buf) != spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + None + } else { + Some(archive_offset) + } + }) + .unwrap_or(0) + } + }; let directory_start = footer64 .central_directory_offset .checked_add(archive_offset) @@ -584,14 +641,15 @@ impl ZipArchive { /// Get the directory start offset and number of files. This is done in a /// separate function to ease the control flow design. pub(crate) fn get_metadata( + config: Config, reader: &mut R, footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { // Check if file has a zip64 footer - let mut results = Self::get_directory_info_zip64(reader, footer, cde_start_pos) + let mut results = Self::get_directory_info_zip64(&config, reader, footer, cde_start_pos) .unwrap_or_else(|e| vec![Err(e)]); - let zip32_result = Self::get_directory_info_zip32(footer, cde_start_pos); + let zip32_result = Self::get_directory_info_zip32(&config, reader, footer, cde_start_pos); let mut invalid_errors = Vec::new(); let mut unsupported_errors = Vec::new(); let mut ok_results = Vec::new(); @@ -652,6 +710,7 @@ impl ZipArchive { files, offset: dir_info.archive_offset, dir_start: dir_info.directory_start, + config, }) } }) @@ -712,18 +771,28 @@ impl ZipArchive { } } - /// Read a ZIP archive, collecting the files it contains + /// Read a ZIP archive, collecting the files it contains. /// - /// This uses the central directory record of the ZIP file, and ignores local file headers - pub fn new(mut reader: R) -> ZipResult> { + /// This uses the central directory record of the ZIP file, and ignores local file headers. + /// + /// A default [`Config`] is used. + pub fn new(reader: R) -> ZipResult> { + Self::with_config(Default::default(), reader) + } + + /// Read a ZIP archive providing a read configuration, collecting the files it contains. + /// + /// This uses the central directory record of the ZIP file, and ignores local file headers. + pub fn with_config(config: Config, mut reader: R) -> ZipResult> { let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?; - let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; + let shared = Self::get_metadata(config, &mut reader, &footer, cde_start_pos)?; Ok(ZipArchive { reader, shared: shared.into(), comment: footer.zip_file_comment.into(), }) } + /// Extract a Zip archive into a directory, overwriting files if they /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. /// diff --git a/src/read/config.rs b/src/read/config.rs new file mode 100644 index 000000000..583b40248 --- /dev/null +++ b/src/read/config.rs @@ -0,0 +1,22 @@ +/// Configuration for reading ZIP archives. +#[repr(transparent)] +#[derive(Debug, Default, Clone, Copy)] +pub struct Config { + /// An offset into the reader to use to find the start of the archive. + pub archive_offset: ArchiveOffset, +} + +/// The offset of the start of the archive from the beginning of the reader. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ArchiveOffset { + /// Try to detect the archive offset automatically. + /// + /// This will look at the central directory specified by `FromCentralDirectory` for a header. + /// If missing, this will behave as if `None` were specified. + #[default] + Detect, + /// Use the central directory length and offset to determine the start of the archive. + FromCentralDirectory, + /// Specify a fixed archive offset. + Known(u64), +} diff --git a/src/write.rs b/src/write.rs index 994eb0cee..c5c8798e7 100644 --- a/src/write.rs +++ b/src/write.rs @@ -3,7 +3,7 @@ #[cfg(feature = "aes-crypto")] use crate::aes::AesWriter; use crate::compression::CompressionMethod; -use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; +use crate::read::{find_content, Config, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, Block}; #[cfg(feature = "aes-crypto")] @@ -538,10 +538,19 @@ impl ZipWriterStats { impl ZipWriter { /// Initializes the archive from an existing ZIP archive, making it ready for append. - pub fn new_append(mut readwriter: A) -> ZipResult> { + /// + /// This uses a default configuration to initially read the archive. + pub fn new_append(readwriter: A) -> ZipResult> { + Self::new_append_with_config(Default::default(), readwriter) + } + + /// Initializes the archive from an existing ZIP archive, making it ready for append. + /// + /// This uses the given read configuration to initially read the archive. + pub fn new_append_with_config(config: Config, mut readwriter: A) -> ZipResult> { let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?; - let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; + let metadata = ZipArchive::get_metadata(config, &mut readwriter, &footer, cde_start_pos)?; Ok(ZipWriter { inner: Storer(MaybeEncrypted::Unencrypted(readwriter)),