diff --git a/benches/merge_archive.rs b/benches/merge_archive.rs index 698e0a80c..ff07e1a6c 100644 --- a/benches/merge_archive.rs +++ b/benches/merge_archive.rs @@ -41,8 +41,8 @@ fn perform_raw_copy_file( mut target: ZipWriter, ) -> ZipResult> { for i in 0..src.len() { - let entry = src.by_index_raw(i)?; - target.copy_file(entry)?; + let entry = src.by_index(i)?; + target.raw_copy_file(entry)?; } Ok(target) } diff --git a/examples/extract.rs b/examples/extract.rs index aaa652663..57cfba0d7 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -1,8 +1,6 @@ use std::fs; use std::io; -use zip::unstable::read::ArchiveEntry; - fn main() { std::process::exit(real_main()); } diff --git a/examples/file_info.rs b/examples/file_info.rs index c29c356a4..84878c583 100644 --- a/examples/file_info.rs +++ b/examples/file_info.rs @@ -1,8 +1,6 @@ use std::fs; use std::io::BufReader; -use zip::unstable::read::ArchiveEntry; - fn main() { std::process::exit(real_main()); } diff --git a/src/crc32.rs b/src/crc32.rs index 9ad7e4b2f..878b2ee61 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -10,26 +10,25 @@ pub struct Crc32Reader { inner: R, hasher: Hasher, check: u32, + /// Signals if `inner` stores aes encrypted data. + /// AE-2 encrypted data doesn't use crc and sets the value to 0. + enabled: bool, } impl Crc32Reader { /// Get a new Crc32Reader which checks the inner reader against checksum. - pub(crate) fn new(inner: R, checksum: u32) -> Self { + /// The check is disabled if `ae2_encrypted == true`. + pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader { Crc32Reader { inner, hasher: Hasher::new(), check: checksum, + enabled: !ae2_encrypted, } } - fn check_matches(&self) -> Result<(), &'static str> { - let res = self.hasher.clone().finalize(); - if self.check == res { - Ok(()) - } else { - /* TODO: make this into our own Crc32Error error type! */ - Err("Invalid checksum") - } + fn check_matches(&self) -> bool { + self.check == self.hasher.clone().finalize() } #[allow(dead_code)] @@ -38,27 +37,159 @@ impl Crc32Reader { } } +#[cold] +fn invalid_checksum() -> io::Error { + io::Error::new(io::ErrorKind::InvalidData, "Invalid checksum") +} + impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - /* We want to make sure we only check the hash when the input stream is exhausted. */ - if buf.is_empty() { - /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we - * still want to "pull" from the source in case it surfaces an i/o error. This will - * always return a count of Ok(0) if successful. */ - return self.inner.read(buf); - } - let count = self.inner.read(buf)?; - if count == 0 { - return self - .check_matches() - .map(|()| 0) - /* TODO: use io::Error::other for MSRV >=1.74 */ - .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); + + if self.enabled { + if count == 0 && !buf.is_empty() && !self.check_matches() { + return Err(invalid_checksum()); + } + self.hasher.update(&buf[..count]); } - self.hasher.update(&buf[..count]); Ok(count) } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + let start = buf.len(); + let n = self.inner.read_to_end(buf)?; + + if self.enabled { + self.hasher.update(&buf[start..]); + if !self.check_matches() { + return Err(invalid_checksum()); + } + } + + Ok(n) + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + let start = buf.len(); + let n = self.inner.read_to_string(buf)?; + + if self.enabled { + self.hasher.update(&buf.as_bytes()[start..]); + if !self.check_matches() { + return Err(invalid_checksum()); + } + } + + Ok(n) + } +} + +pub(crate) mod non_crypto { + use std::io; + use std::io::prelude::*; + + use crc32fast::Hasher; + + /// Reader that validates the CRC32 when it reaches the EOF. + pub struct Crc32Reader { + inner: R, + hasher: Hasher, + check: u32, + } + + impl Crc32Reader { + /// Get a new Crc32Reader which checks the inner reader against checksum. + pub(crate) fn new(inner: R, checksum: u32) -> Self { + Crc32Reader { + inner, + hasher: Hasher::new(), + check: checksum, + } + } + + fn check_matches(&self) -> Result<(), &'static str> { + let res = self.hasher.clone().finalize(); + if self.check == res { + Ok(()) + } else { + /* TODO: make this into our own Crc32Error error type! */ + Err("Invalid checksum") + } + } + + pub fn into_inner(self) -> R { + self.inner + } + } + + impl Read for Crc32Reader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + /* We want to make sure we only check the hash when the input stream is exhausted. */ + if buf.is_empty() { + /* If the input buf is empty (this shouldn't happen, but isn't guaranteed), we + * still want to "pull" from the source in case it surfaces an i/o error. This will + * always return a count of Ok(0) if successful. */ + return self.inner.read(buf); + } + + let count = self.inner.read(buf)?; + if count == 0 { + return self + .check_matches() + .map(|()| 0) + /* TODO: use io::Error::other for MSRV >=1.74 */ + .map_err(|e| io::Error::new(io::ErrorKind::Other, e)); + } + self.hasher.update(&buf[..count]); + Ok(count) + } + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn test_empty_reader() { + let data: &[u8] = b""; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + + let mut reader = Crc32Reader::new(data, 1); + assert!(reader + .read(&mut buf) + .unwrap_err() + .to_string() + .contains("Invalid checksum")); + } + + #[test] + fn test_byte_by_byte() { + let data: &[u8] = b"1234"; + let mut buf = [0; 1]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 1); + assert_eq!(reader.read(&mut buf).unwrap(), 0); + // Can keep reading 0 bytes after the end + assert_eq!(reader.read(&mut buf).unwrap(), 0); + } + + #[test] + fn test_zero_read() { + let data: &[u8] = b"1234"; + let mut buf = [0; 5]; + + let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); + assert_eq!(reader.read(&mut buf).unwrap(), 4); + } + } } #[cfg(test)] @@ -70,10 +201,10 @@ mod test { let data: &[u8] = b""; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0); + let mut reader = Crc32Reader::new(data, 0, false); assert_eq!(reader.read(&mut buf).unwrap(), 0); - let mut reader = Crc32Reader::new(data, 1); + let mut reader = Crc32Reader::new(data, 1, false); assert!(reader .read(&mut buf) .unwrap_err() @@ -86,7 +217,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); @@ -101,7 +232,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 5]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3); + let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } diff --git a/src/read.rs b/src/read.rs index bdcd861bc..7e13066c3 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1,7 +1,10 @@ //! Types for reading ZIP archives +#[cfg(feature = "aes-crypto")] +use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; +use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; @@ -10,16 +13,30 @@ use crate::types::{ AesMode, AesModeInfo, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, ZipLocalEntryBlock, }; +use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; +use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, prelude::*, SeekFrom}; +use std::io::{self, copy, prelude::*, sink, SeekFrom}; use std::mem; use std::mem::size_of; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::{Arc, OnceLock}; +#[cfg(feature = "deflate-flate2")] +use flate2::read::DeflateDecoder; + +#[cfg(feature = "deflate64")] +use deflate64::Deflate64Decoder; + +#[cfg(feature = "bzip2")] +use bzip2::read::BzDecoder; + +#[cfg(feature = "zstd")] +use zstd::stream::read::Decoder as ZstdDecoder; + mod config; pub use config::*; @@ -106,10 +123,15 @@ pub(crate) mod zip_archive { #[cfg(feature = "aes-crypto")] use crate::aes::PWD_VERIFY_LENGTH; use crate::extra_fields::UnicodeExtraField; -use crate::result::ZipError::InvalidArchive; +#[cfg(feature = "lzma")] +use crate::read::lzma::LzmaDecoder; +#[cfg(feature = "xz")] +use crate::read::xz::XzDecoder; +use crate::result::ZipError::{InvalidArchive, InvalidPassword, UnsupportedArchive}; +use crate::types::ffi::S_IFLNK; use crate::unstable::{path_to_string, LittleEndianReadExt}; -use crate::crc32::Crc32Reader; +use crate::crc32::non_crypto::Crc32Reader as NewCrc32Reader; use crate::unstable::read::{ construct_decompressing_reader, find_entry_content_range, CryptoEntryReader, CryptoVariant, }; @@ -117,6 +139,224 @@ pub use crate::unstable::read::{ArchiveEntry, ZipEntry}; pub use zip_archive::ZipArchive; +#[allow(clippy::large_enum_variant)] +pub(crate) enum CryptoReader<'a> { + Plaintext(io::Take<&'a mut dyn Read>), + ZipCrypto(ZipCryptoReaderValid>), + #[cfg(feature = "aes-crypto")] + Aes { + reader: AesReaderValid>, + vendor_version: AesVendorVersion, + }, +} + +impl<'a> Read for CryptoReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read(buf), + CryptoReader::ZipCrypto(r) => r.read(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read(buf), + } + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read_to_end(buf), + CryptoReader::ZipCrypto(r) => r.read_to_end(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read_to_end(buf), + } + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + match self { + CryptoReader::Plaintext(r) => r.read_to_string(buf), + CryptoReader::ZipCrypto(r) => r.read_to_string(buf), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.read_to_string(buf), + } + } +} + +impl<'a> CryptoReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + match self { + CryptoReader::Plaintext(r) => r, + CryptoReader::ZipCrypto(r) => r.into_inner(), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.into_inner(), + } + } + + /// Returns `true` if the data is encrypted using AE2. + pub const fn is_ae2_encrypted(&self) -> bool { + #[cfg(feature = "aes-crypto")] + return matches!( + self, + CryptoReader::Aes { + vendor_version: AesVendorVersion::Ae2, + .. + } + ); + #[cfg(not(feature = "aes-crypto"))] + false + } +} + +pub(crate) enum ZipFileReader<'a> { + NoReader, + Raw(io::Take<&'a mut dyn Read>), + Stored(Crc32Reader>), + #[cfg(feature = "_deflate-any")] + Deflated(Crc32Reader>>), + #[cfg(feature = "deflate64")] + Deflate64(Crc32Reader>>>), + #[cfg(feature = "bzip2")] + Bzip2(Crc32Reader>>), + #[cfg(feature = "zstd")] + Zstd(Crc32Reader>>>), + #[cfg(feature = "lzma")] + Lzma(Crc32Reader>>>), + #[cfg(feature = "xz")] + Xz(Crc32Reader>>), +} + +impl<'a> Read for ZipFileReader<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read(buf), + ZipFileReader::Stored(r) => r.read(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read(buf), + } + } + + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_exact(buf), + ZipFileReader::Stored(r) => r.read_exact(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_exact(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_exact(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_exact(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_exact(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_exact(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_exact(buf), + } + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_to_end(buf), + ZipFileReader::Stored(r) => r.read_to_end(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_to_end(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_to_end(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_to_end(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_to_end(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_to_end(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_to_end(buf), + } + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r.read_to_string(buf), + ZipFileReader::Stored(r) => r.read_to_string(buf), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.read_to_string(buf), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.read_to_string(buf), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.read_to_string(buf), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.read_to_string(buf), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => r.read_to_string(buf), + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.read_to_string(buf), + } + } +} + +impl<'a> ZipFileReader<'a> { + /// Consumes this decoder, returning the underlying reader. + pub fn drain(self) { + let mut inner = match self { + ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), + ZipFileReader::Raw(r) => r, + ZipFileReader::Stored(r) => r.into_inner().into_inner(), + #[cfg(feature = "_deflate-any")] + ZipFileReader::Deflated(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "deflate64")] + ZipFileReader::Deflate64(r) => r.into_inner().into_inner().into_inner().into_inner(), + #[cfg(feature = "bzip2")] + ZipFileReader::Bzip2(r) => r.into_inner().into_inner().into_inner(), + #[cfg(feature = "zstd")] + ZipFileReader::Zstd(r) => r.into_inner().finish().into_inner().into_inner(), + #[cfg(feature = "lzma")] + ZipFileReader::Lzma(r) => { + // Lzma reader owns its buffer rather than mutably borrowing it, so we have to drop + // it separately + if let Ok(mut remaining) = r.into_inner().finish() { + let _ = copy(&mut remaining, &mut sink()); + } + return; + } + #[cfg(feature = "xz")] + ZipFileReader::Xz(r) => r.into_inner().into_inner().into_inner(), + }; + let _ = copy(&mut inner, &mut sink()); + } +} + +/// A struct for reading a zip file +pub struct ZipFile<'a> { + pub(crate) data: Cow<'a, ZipFileData>, + pub(crate) crypto_reader: Option>, + pub(crate) reader: ZipFileReader<'a>, +} + +pub(crate) fn find_content<'a>( + data: &ZipFileData, + reader: &'a mut (impl Read + Seek), +) -> ZipResult> { + // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! + let data_start = match data.data_start.get() { + Some(data_start) => *data_start, + None => find_data_start(data, reader)?, + }; + + reader.seek(io::SeekFrom::Start(data_start))?; + Ok((reader as &mut dyn Read).take(data.compressed_size)) +} + pub(crate) fn find_data_start( data: &ZipFileData, reader: &mut (impl Read + Seek), @@ -146,6 +386,131 @@ pub(crate) fn find_data_start( Ok(data_start) } +#[allow(clippy::too_many_arguments)] +pub(crate) fn make_crypto_reader<'a>( + compression_method: CompressionMethod, + crc32: u32, + mut last_modified_time: Option, + using_data_descriptor: bool, + reader: io::Take<&'a mut dyn Read>, + password: Option<&[u8]>, + aes_info: Option, + #[cfg(feature = "aes-crypto")] compressed_size: u64, +) -> ZipResult> { + #[allow(deprecated)] + { + if let CompressionMethod::Unsupported(_) = compression_method { + return unsupported_zip_error("Compression method not supported"); + } + } + + let reader = match (password, aes_info) { + #[cfg(not(feature = "aes-crypto"))] + (Some(_), Some(_)) => { + return Err(ZipError::UnsupportedArchive( + "AES encrypted files cannot be decrypted without the aes-crypto feature.", + )) + } + #[cfg(feature = "aes-crypto")] + ( + Some(password), + Some(AesModeInfo { + aes_mode, + vendor_version, + .. + }), + ) => CryptoReader::Aes { + reader: AesReader::new(reader, aes_mode, compressed_size).validate(password)?, + vendor_version, + }, + (Some(password), None) => { + if !using_data_descriptor { + last_modified_time = None; + } + let validator = if let Some(last_modified_time) = last_modified_time { + ZipCryptoValidator::InfoZipMsdosTime(last_modified_time.timepart()) + } else { + ZipCryptoValidator::PkzipCrc32(crc32) + }; + CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) + } + (None, Some(_)) => return Err(InvalidPassword), + (None, None) => CryptoReader::Plaintext(reader), + }; + Ok(reader) +} + +pub(crate) fn make_reader( + compression_method: CompressionMethod, + crc32: u32, + reader: CryptoReader, +) -> ZipResult { + let ae2_encrypted = reader.is_ae2_encrypted(); + + match compression_method { + CompressionMethod::Stored => Ok(ZipFileReader::Stored(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))), + #[cfg(feature = "_deflate-any")] + CompressionMethod::Deflated => { + let deflate_reader = DeflateDecoder::new(reader); + Ok(ZipFileReader::Deflated(Crc32Reader::new( + deflate_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => { + let deflate64_reader = Deflate64Decoder::new(reader); + Ok(ZipFileReader::Deflate64(Crc32Reader::new( + deflate64_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => { + let bzip2_reader = BzDecoder::new(reader); + Ok(ZipFileReader::Bzip2(Crc32Reader::new( + bzip2_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => { + let zstd_reader = ZstdDecoder::new(reader).unwrap(); + Ok(ZipFileReader::Zstd(Crc32Reader::new( + zstd_reader, + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => { + let reader = LzmaDecoder::new(reader); + Ok(ZipFileReader::Lzma(Crc32Reader::new( + Box::new(reader), + crc32, + ae2_encrypted, + ))) + } + #[cfg(feature = "xz")] + CompressionMethod::Xz => { + let reader = XzDecoder::new(reader); + Ok(ZipFileReader::Xz(Crc32Reader::new( + reader, + crc32, + ae2_encrypted, + ))) + } + _ => Err(UnsupportedArchive("Compression method not supported")), + } +} + #[derive(Debug)] pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, @@ -632,7 +997,7 @@ impl ZipArchive { &mut self, file_number: usize, ) -> ZipResult> { - let entry = self.by_index_raw(file_number)?; + let entry = self.by_index_raw_new(file_number)?; entry.get_aes_verification_key_and_salt() } @@ -676,7 +1041,7 @@ impl ZipArchive { #[cfg(unix)] let mut files_by_unix_mode = Vec::new(); for i in 0..self.len() { - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; @@ -732,7 +1097,7 @@ impl ZipArchive { } continue; } - let mut file = self.by_index(i)?; + let mut file = self.by_index_new(i)?; let mut outfile = fs::File::create(&outpath)?; io::copy(&mut file, &mut outfile)?; #[cfg(unix)] @@ -775,6 +1140,114 @@ impl ZipArchive { Ok(()) } + /// Search for a file entry by name, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_name_decrypt(&mut self, name: &str, password: &[u8]) -> ZipResult { + self.by_name_with_optional_password(name, Some(password)) + } + + /// Search for a file entry by name + pub fn by_name(&mut self, name: &str) -> ZipResult { + self.by_name_with_optional_password(name, None) + } + + fn by_name_with_optional_password<'a>( + &'a mut self, + name: &str, + password: Option<&[u8]>, + ) -> ZipResult> { + let index = self.index_for_name_err(name)?; + self.by_index_with_optional_password(index, password) + } + + /// Get a contained file by index, decrypt with given password + /// + /// # Warning + /// + /// The implementation of the cryptographic algorithms has not + /// gone through a correctness review, and you should assume it is insecure: + /// passwords used with this API may be compromised. + /// + /// This function sometimes accepts wrong password. This is because the ZIP spec only allows us + /// to check for a 1/256 chance that the password is correct. + /// There are many passwords out there that will also pass the validity checks + /// we are able to perform. This is a weakness of the ZipCrypto algorithm, + /// due to its fairly primitive approach to cryptography. + pub fn by_index_decrypt( + &mut self, + file_number: usize, + password: &[u8], + ) -> ZipResult> { + self.by_index_with_optional_password(file_number, Some(password)) + } + + /// Get a contained file by index + pub fn by_index(&mut self, file_number: usize) -> ZipResult> { + self.by_index_with_optional_password(file_number, None) + } + + /// Get a contained file by index without decompressing it + pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { + let reader = &mut self.reader; + let (_, data) = self + .shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + Ok(ZipFile { + crypto_reader: None, + reader: ZipFileReader::Raw(find_content(data, reader)?), + data: Cow::Borrowed(data), + }) + } + + fn by_index_with_optional_password( + &mut self, + file_number: usize, + mut password: Option<&[u8]>, + ) -> ZipResult> { + let (_, data) = self + .shared + .files + .get_index(file_number) + .ok_or(ZipError::FileNotFound)?; + + match (password, data.encrypted) { + (None, true) => return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)), + (Some(_), false) => password = None, //Password supplied, but none needed! Discard. + _ => {} + } + let limit_reader = find_content(data, &mut self.reader)?; + + let crypto_reader = make_crypto_reader( + data.compression_method, + data.crc32, + data.last_modified_time, + data.using_data_descriptor, + limit_reader, + password, + data.aes_mode, + #[cfg(feature = "aes-crypto")] + data.compressed_size, + )?; + Ok(ZipFile { + crypto_reader: Some(crypto_reader), + reader: ZipFileReader::NoReader, + data: Cow::Borrowed(data), + }) + } + /// Unwrap and return the inner reader object /// /// The position of the reader is undefined. @@ -788,13 +1261,16 @@ where R: Read + Seek, { /// Search for a file entry by name - pub fn by_name(&mut self, name: impl AsRef) -> ZipResult> { + pub fn by_name_new( + &mut self, + name: impl AsRef, + ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index(index) + self.by_index_new(index) } /// Get a contained file by index - pub fn by_index(&mut self, file_number: usize) -> ZipResult> { + pub fn by_index_new(&mut self, file_number: usize) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -804,15 +1280,9 @@ where .files .get_index(file_number) .ok_or(ZipError::FileNotFound)?; - - /* Don't allow users to read out an encrypted entry without providing a password. */ - if data.encrypted { - return Err(ZipError::UnsupportedArchive(ZipError::PASSWORD_REQUIRED)); - } - let content = find_entry_content_range(data, reader)?; let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - let crc32_reader = Crc32Reader::new(entry_reader, data.crc32); + let crc32_reader = NewCrc32Reader::new(entry_reader, data.crc32); Ok(ZipEntry { data, reader: crc32_reader, @@ -820,16 +1290,19 @@ where } /// Get a contained file by name without decompressing it - pub fn by_name_raw( + pub fn by_name_raw_new( &mut self, name: impl AsRef, ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_raw(index) + self.by_index_raw_new(index) } /// Get a contained file by index without decompressing it - pub fn by_index_raw(&mut self, file_number: usize) -> ZipResult> { + pub fn by_index_raw_new( + &mut self, + file_number: usize, + ) -> ZipResult> { let Self { ref mut reader, ref shared, @@ -859,13 +1332,13 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_name_decrypt( + pub fn by_name_decrypt_new( &mut self, name: impl AsRef, password: &[u8], ) -> ZipResult> { let index = self.index_for_name_err(name)?; - self.by_index_decrypt(index, password) + self.by_index_decrypt_new(index, password) } /// Get a contained file by index, decrypt with given password @@ -881,7 +1354,7 @@ where /// There are many passwords out there that will also pass the validity checks /// we are able to perform. This is a weakness of the ZipCrypto algorithm, /// due to its fairly primitive approach to cryptography. - pub fn by_index_decrypt( + pub fn by_index_decrypt_new( &mut self, file_number: usize, password: &[u8], @@ -908,12 +1381,12 @@ where /* Ae2 voids crc checking: https://www.winzip.com/en/support/aes-encryption/ */ CryptoEntryReader::Ae2Encrypted(entry_reader) } else { - CryptoEntryReader::NonAe2Encrypted(Crc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::NonAe2Encrypted(NewCrc32Reader::new(entry_reader, data.crc32)) } } else { - /* Not encrypted, so do the same as in .by_index(): */ + /* Not encrypted, so do the same as in .by_index_new(): */ let entry_reader = construct_decompressing_reader(&data.compression_method, content)?; - CryptoEntryReader::Unencrypted(Crc32Reader::new(entry_reader, data.crc32)) + CryptoEntryReader::Unencrypted(NewCrc32Reader::new(entry_reader, data.crc32)) }; Ok(ZipEntry { @@ -1219,10 +1692,217 @@ pub(crate) fn parse_single_extra_field( Ok(false) } +/// Methods for retrieving information on zip files +impl<'a> ZipFile<'a> { + fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { + if let ZipFileReader::NoReader = self.reader { + let data = &self.data; + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; + } + Ok(&mut self.reader) + } + + pub(crate) fn get_raw_reader(&mut self) -> &mut dyn Read { + if let ZipFileReader::NoReader = self.reader { + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = ZipFileReader::Raw(crypto_reader.into_inner()) + } + &mut self.reader + } + + /// Get the version of the file + pub fn version_made_by(&self) -> (u8, u8) { + ( + self.data.version_made_by / 10, + self.data.version_made_by % 10, + ) + } + + /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. + /// + /// You can use the [`ZipFile::enclosed_name`] method to validate the name + /// as a safe path. + pub fn name(&self) -> &str { + &self.data.file_name + } + + /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. + pub fn name_raw(&self) -> &[u8] { + &self.data.file_name_raw + } + + /// Get the name of the file in a sanitized form. It truncates the name to the first NULL byte, + /// removes a leading '/' and removes '..' parts. + #[deprecated( + since = "0.5.7", + note = "by stripping `..`s from the path, the meaning of paths can change. + `mangled_name` can be used if this behaviour is desirable" + )] + pub fn sanitized_name(&self) -> PathBuf { + self.mangled_name() + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. + /// + /// [`ParentDir`]: `Component::ParentDir` + pub fn mangled_name(&self) -> PathBuf { + self.data.file_name_sanitized() + } + + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn enclosed_name(&self) -> Option { + self.data.enclosed_name() + } + + /// Get the comment of the file + pub fn comment(&self) -> &str { + &self.data.file_comment + } + + /// Get the compression method used to store the file + pub fn compression(&self) -> CompressionMethod { + self.data.compression_method + } + + /// Get the size of the file, in bytes, in the archive + pub fn compressed_size(&self) -> u64 { + self.data.compressed_size + } + + /// Get the size of the file, in bytes, when uncompressed + pub fn size(&self) -> u64 { + self.data.uncompressed_size + } + + /// Get the time the file was last modified + pub fn last_modified(&self) -> Option { + self.data.last_modified_time + } + + /// Returns whether the file is actually a directory + pub fn is_dir(&self) -> bool { + self.data.is_dir() + } + + /// Returns whether the file is actually a symbolic link + pub fn is_symlink(&self) -> bool { + self.unix_mode() + .is_some_and(|mode| mode & S_IFLNK == S_IFLNK) + } + + /// Returns whether the file is a normal file (i.e. not a directory or symlink) + pub fn is_file(&self) -> bool { + !self.is_dir() && !self.is_symlink() + } + + /// Get unix mode for the file + pub fn unix_mode(&self) -> Option { + self.data.unix_mode() + } + + /// Get the CRC32 hash of the original file + pub fn crc32(&self) -> u32 { + self.data.crc32 + } + + /// Get the extra data of the zip header for this file + pub fn extra_data(&self) -> Option<&[u8]> { + self.data.extra_field.as_deref().map(|v| v.as_ref()) + } + + /// Get the starting offset of the data of the compressed file + pub fn data_start(&self) -> u64 { + *self.data.data_start.get().unwrap() + } + + /// Get the starting offset of the zip header for this file + pub fn header_start(&self) -> u64 { + self.data.header_start + } + /// Get the starting offset of the zip header in the central directory for this file + pub fn central_header_start(&self) -> u64 { + self.data.central_header_start + } + + /// iterate through all extra fields + pub fn extra_data_fields(&self) -> impl Iterator { + self.data.extra_fields.iter() + } +} + +impl<'a> Read for ZipFile<'a> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.get_reader()?.read(buf) + } + + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + self.get_reader()?.read_exact(buf) + } + + fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { + self.get_reader()?.read_to_end(buf) + } + + fn read_to_string(&mut self, buf: &mut String) -> io::Result { + self.get_reader()?.read_to_string(buf) + } +} + +impl<'a> Drop for ZipFile<'a> { + fn drop(&mut self) { + // self.data is Owned, this reader is constructed by a streaming reader. + // In this case, we want to exhaust the reader so that the next file is accessible. + if let Cow::Owned(_) = self.data { + // Get the inner `Take` reader so all decryption, decompression and CRC calculation is skipped. + match &mut self.reader { + ZipFileReader::NoReader => { + let innerreader = self.crypto_reader.take(); + let _ = copy( + &mut innerreader.expect("Invalid reader state").into_inner(), + &mut sink(), + ); + } + reader => { + let innerreader = std::mem::replace(reader, ZipFileReader::NoReader); + innerreader.drain(); + } + }; + } + } +} + #[cfg(test)] mod test { use crate::result::ZipResult; - use crate::unstable::read::ArchiveEntry; use crate::write::SimpleFileOptions; use crate::CompressionMethod::Stored; use crate::{ZipArchive, ZipWriter}; diff --git a/src/unstable/read.rs b/src/unstable/read.rs index d4712468f..86f608ea6 100644 --- a/src/unstable/read.rs +++ b/src/unstable/read.rs @@ -1,7 +1,7 @@ //! Alternate implementation of [`crate::read`]. use crate::compression::CompressionMethod; -use crate::crc32::Crc32Reader; +use crate::crc32::non_crypto::Crc32Reader; use crate::extra_fields::ExtraField; use crate::read::find_data_start; use crate::result::{ZipError, ZipResult}; diff --git a/src/write.rs b/src/write.rs index 9faa9ac94..d719b0b2b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -3,6 +3,7 @@ #[cfg(feature = "aes-crypto")] use crate::aes::AesWriter; use crate::compression::CompressionMethod; +use crate::read::ZipFile; use crate::read::{parse_single_extra_field, Config, ZipArchive}; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CDEBlock}; @@ -12,7 +13,7 @@ use crate::types::{ ffi, AesModeInfo, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, MIN_VERSION, }; -use crate::unstable::read::{find_entry_content_range, ArchiveEntry, ZipEntry}; +use crate::unstable::read::find_entry_content_range; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -1168,7 +1169,7 @@ impl ZipWriter { /// This method extracts file metadata from the `source` archive, then simply performs a single /// big [`io::copy()`](io::copy) to transfer all the actual file contents without any /// decompression or decryption. This is more performant than the equivalent operation of - /// calling [`Self::copy_file()`] for each entry from the `source` archive in sequence. + /// calling [`Self::raw_copy_file()`] for each entry from the `source` archive in sequence. /// ///``` /// # fn main() -> Result<(), zip::result::ZipError> { @@ -1209,7 +1210,7 @@ impl ZipWriter { self.finish_file()?; /* Ensure we accept the file contents on faith (and avoid overwriting the data). - * See copy_file_rename(). */ + * See raw_copy_file_rename(). */ self.writing_to_file = true; self.writing_raw = true; @@ -1264,19 +1265,15 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name_raw("src_file.txt")?; + /// let file = src.by_name("src_file.txt")?; /// /// // Copy and rename the previously obtained file entry to the destination zip archive - /// dst.copy_file_rename(file, "new_name.txt")?; + /// dst.raw_copy_file_rename(file, "new_name.txt")?; /// /// Ok(()) /// } /// ``` - pub fn copy_file_rename( - &mut self, - mut file: ZipEntry<'_, impl Read>, - name: S, - ) -> ZipResult<()> + pub fn raw_copy_file_rename(&mut self, mut file: ZipFile, name: S) -> ZipResult<()> where S: Into> + ToOwned, SToOwned: Into>, @@ -1303,22 +1300,22 @@ impl ZipWriter { self.writing_to_file = true; self.writing_raw = true; - io::copy(&mut file, self)?; + io::copy(file.get_raw_reader(), self)?; Ok(()) } - /// Like `copy_file_rename`, but uses Path arguments. + /// Like `raw_copy_file_to_path`, but uses Path arguments. /// /// This function ensures that the '/' path separator is used and normalizes `.` and `..`. It /// ignores any `..` or Windows drive letter that would produce a path outside the ZIP file's /// root. - pub fn copy_file_to_path>( + pub fn raw_copy_file_to_path>( &mut self, - file: ZipEntry<'_, impl Read>, + file: ZipFile, path: P, ) -> ZipResult<()> { - self.copy_file_rename(file, path_to_string(path)) + self.raw_copy_file_rename(file, path_to_string(path)) } /// Add a new file using the already compressed data from a ZIP file being read, this allows faster @@ -1336,17 +1333,17 @@ impl ZipWriter { /// W: Write + Seek, /// { /// // Retrieve file entry by name - /// let file = src.by_name_raw("src_file.txt")?; + /// let file = src.by_name("src_file.txt")?; /// /// // Copy the previously obtained file entry to the destination zip archive - /// dst.copy_file(file)?; + /// dst.raw_copy_file(file)?; /// /// Ok(()) /// } /// ``` - pub fn copy_file(&mut self, file: ZipEntry<'_, impl Read>) -> ZipResult<()> { + pub fn raw_copy_file(&mut self, file: ZipFile) -> ZipResult<()> { let name = file.name().to_owned(); - self.copy_file_rename(file, name) + self.raw_copy_file_rename(file, name) } /// Add a directory entry. @@ -1982,7 +1979,6 @@ mod test { use crate::compression::CompressionMethod; use crate::result::ZipResult; use crate::types::DateTime; - use crate::unstable::read::ArchiveEntry; use crate::write::EncryptWith::ZipCrypto; use crate::write::SimpleFileOptions; use crate::zipcrypto::ZipCryptoKeys; diff --git a/tests/aes_encryption.rs b/tests/aes_encryption.rs old mode 100755 new mode 100644 index e605b4eda..c135914d0 --- a/tests/aes_encryption.rs +++ b/tests/aes_encryption.rs @@ -1,10 +1,7 @@ #![cfg(feature = "aes-crypto")] use std::io::{self, Read, Write}; -use zip::{ - result::ZipError, unstable::read::ArchiveEntry, write::SimpleFileOptions, AesMode, - CompressionMethod, ZipArchive, -}; +use zip::{result::ZipError, write::SimpleFileOptions, AesMode, CompressionMethod, ZipArchive}; const SECRET_CONTENT: &str = "Lorem ipsum dolor sit amet"; diff --git a/tests/deflate64.rs b/tests/deflate64.rs old mode 100755 new mode 100644 index 4989f3a91..b0cd95a95 --- a/tests/deflate64.rs +++ b/tests/deflate64.rs @@ -1,7 +1,7 @@ #![cfg(feature = "deflate64")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_deflate64() { diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 97b969421..7943a2ac3 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -2,7 +2,6 @@ use std::collections::HashSet; use std::io::prelude::*; use std::io::Cursor; use zip::result::ZipResult; -use zip::unstable::read::ArchiveEntry; use zip::unstable::LittleEndianWriteExt; use zip::write::ExtendedFileOptions; use zip::write::FileOptions; @@ -47,18 +46,18 @@ fn copy() { { let file = src_archive - .by_name_raw(ENTRY_NAME) + .by_name(ENTRY_NAME) .expect("Missing expected file"); - zip.copy_file(file).expect("Couldn't copy file"); + zip.raw_copy_file(file).expect("Couldn't copy file"); } { let file = src_archive - .by_name_raw(ENTRY_NAME) + .by_name(ENTRY_NAME) .expect("Missing expected file"); - zip.copy_file_rename(file, COPY_ENTRY_NAME) + zip.raw_copy_file_rename(file, COPY_ENTRY_NAME) .expect("Couldn't copy and rename file"); } } diff --git a/tests/lzma.rs b/tests/lzma.rs old mode 100755 new mode 100644 index 8ab2166e6..01a14a2e0 --- a/tests/lzma.rs +++ b/tests/lzma.rs @@ -1,7 +1,7 @@ #![cfg(feature = "lzma")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_lzma() { diff --git a/tests/xz.rs b/tests/xz.rs old mode 100755 new mode 100644 index 94881fac9..110b40859 --- a/tests/xz.rs +++ b/tests/xz.rs @@ -1,7 +1,7 @@ #![cfg(feature = "xz")] use std::io::{self, Read}; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn decompress_xz() -> io::Result<()> { diff --git a/tests/zip64_large.rs b/tests/zip64_large.rs index 9864ee7ff..468ef198f 100644 --- a/tests/zip64_large.rs +++ b/tests/zip64_large.rs @@ -189,8 +189,6 @@ impl Read for Zip64File { #[test] fn zip64_large() { - use zip::unstable::read::ArchiveEntry; - let zipfile = Zip64File::new(); let mut archive = zip::ZipArchive::new(zipfile).unwrap(); let mut buf = [0u8; 32]; diff --git a/tests/zip_crypto.rs b/tests/zip_crypto.rs index 12f091724..4c4cc8b29 100644 --- a/tests/zip_crypto.rs +++ b/tests/zip_crypto.rs @@ -34,7 +34,6 @@ const ZIP_CRYPTO_FILE: &[u8] = &[ use std::io::Cursor; use zip::result::ZipError; -use zip::unstable::read::ArchiveEntry; #[test] fn encrypting_file() { diff --git a/tests/zip_extended_timestamp.rs b/tests/zip_extended_timestamp.rs index 58fb2953f..9657028f9 100644 --- a/tests/zip_extended_timestamp.rs +++ b/tests/zip_extended_timestamp.rs @@ -1,5 +1,5 @@ use std::io; -use zip::{unstable::read::ArchiveEntry, ZipArchive}; +use zip::ZipArchive; #[test] fn test_extended_timestamp() {