diff --git a/Cargo.toml b/Cargo.toml index c5405c9d5..e7a599015 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ authors = [ license = "MIT" repository = "https://github.com/zip-rs/zip2.git" keywords = ["zip", "archive", "compression"] +categories = ["compression", "filesystem", "parser-implementations"] rust-version = "1.73.0" description = """ Library to support the reading and writing of zip files. @@ -23,7 +24,9 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [workspace.dependencies] +arbitrary = { version = "1.3.2", features = ["derive"] } time = { version = "0.3.36", default-features = false } +zip = { path = ".", default-features = false } [dependencies] aes = { version = "0.8.4", optional = true } @@ -53,7 +56,7 @@ lzma-rs = { version = "0.3.0", default-features = false, optional = true } crossbeam-utils = "0.8.20" [target.'cfg(fuzzing)'.dependencies] -arbitrary = { version = "1.3.2", features = ["derive"] } +arbitrary.workspace = true [dev-dependencies] bencher = "0.1.5" diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 000000000..135270248 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,70 @@ +[package] +name = "zip-cli" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[lib] + +[[bin]] +name = "zip-cli" + +[dependencies] +glob = { version = "0.3", optional = true } +regex = { version = "1", optional = true } + +[dependencies.zip] +path = ".." +default-features = false + +[features] +aes-crypto = ["zip/aes-crypto"] +bzip2 = ["zip/bzip2"] +chrono = ["zip/chrono"] +deflate64 = ["zip/deflate64"] +deflate = ["zip/deflate"] +deflate-flate2 = ["zip/deflate-flate2"] +deflate-zlib = ["zip/deflate-zlib"] +deflate-zlib-ng = ["zip/deflate-zlib-ng"] +deflate-zopfli = ["zip/deflate-zopfli"] +lzma = ["zip/lzma"] +time = ["zip/time"] +xz = ["zip/xz"] +zstd = ["zip/zstd"] + +glob = ["dep:glob"] +rx = ["dep:regex"] + +default = [ + "aes-crypto", + "bzip2", + "deflate64", + "deflate", + "lzma", + "time", + "xz", + "zstd", + "glob", + "rx", +] + + +[profile.release] +strip = true +lto = true +opt-level = 3 +codegen-units = 1 diff --git a/cli/clite/Cargo.toml b/cli/clite/Cargo.toml new file mode 100644 index 000000000..607bf3314 --- /dev/null +++ b/cli/clite/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "zip-clite" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "zip-clite" + +[dependencies] + +[dependencies.zip-cli] +path = ".." +default-features = false +features = ["deflate-flate2", "deflate-zlib"] + +[profile.release] +strip = true +lto = true +opt-level = "s" +codegen-units = 1 diff --git a/cli/clite/src/main.rs b/cli/clite/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/clite/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/cli/src/args.rs b/cli/src/args.rs new file mode 100644 index 000000000..b59fd0cbd --- /dev/null +++ b/cli/src/args.rs @@ -0,0 +1,208 @@ +use std::{collections::VecDeque, ffi::OsString, fmt, sync::OnceLock}; + +#[derive(Debug)] +pub enum ArgParseError { + StdoutMessage(String), + StderrMessage(String), +} + +#[derive(Debug)] +pub struct ZipCli { + pub verbose: bool, + pub command: ZipCommand, +} + +#[derive(Debug)] +enum SubcommandName { + Compress, + Info, + Extract, +} + +static PARSED_EXE_NAME: OnceLock = OnceLock::new(); + +impl ZipCli { + const VERSION: &'static str = env!("CARGO_PKG_VERSION"); + const DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION"); + + pub const INTERNAL_ERROR_EXIT_CODE: i32 = 3; + pub const ARGV_PARSE_FAILED_EXIT_CODE: i32 = 2; + pub const NON_FAILURE_EXIT_CODE: i32 = 0; + + pub fn binary_name() -> &'static str { + PARSED_EXE_NAME.get().expect("binary name was not set yet") + } + + fn generate_version_text() -> String { + format!("{} {}\n", Self::binary_name(), Self::VERSION) + } + + fn generate_usage_line() -> String { + format!("Usage: {} [OPTIONS] ", Self::binary_name()) + } + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} + +Commands: + {}{}{} + {}{}{} + {}{}{} + +Options: + -v, --verbose Write information logs to stderr + -h, --help Print help + -V, --version Print version +", + Self::DESCRIPTION, + Self::generate_usage_line(), + compress::Compress::COMMAND_NAME, + compress::Compress::COMMAND_TABS, + compress::Compress::COMMAND_DESCRIPTION, + info::Info::COMMAND_NAME, + info::Info::COMMAND_TABS, + info::Info::COMMAND_DESCRIPTION, + extract::Extract::COMMAND_NAME, + extract::Extract::COMMAND_TABS, + extract::Extract::COMMAND_DESCRIPTION, + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} + +For more information, try '--help'. +", + Self::generate_usage_line() + ) + } + + fn parse_up_to_subcommand_name( + argv: &mut VecDeque, + ) -> Result<(bool, SubcommandName), ArgParseError> { + let mut verbose: bool = false; + let mut subcommand_name: Option = None; + while subcommand_name.is_none() { + match argv.pop_front() { + None => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StderrMessage(help_text)); + } + Some(arg) => match arg.as_encoded_bytes() { + b"-v" | b"--verbose" => verbose = true, + b"-V" | b"--version" => { + let version_text = Self::generate_version_text(); + return Err(ArgParseError::StdoutMessage(version_text)); + } + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"compress" => subcommand_name = Some(SubcommandName::Compress), + b"info" => subcommand_name = Some(SubcommandName::Info), + b"extract" => subcommand_name = Some(SubcommandName::Extract), + arg_bytes => { + let context = if arg_bytes.starts_with(b"-") { + format!("unrecognized global flag {arg:?}") + } else { + format!("unrecognized subcommand name {arg:?}") + }; + let help_text = Self::generate_brief_help_text(&context); + return Err(ArgParseError::StderrMessage(help_text)); + } + }, + } + } + Ok((verbose, subcommand_name.unwrap())) + } + + pub fn parse_argv(argv: impl IntoIterator) -> Result { + let mut argv: VecDeque = argv.into_iter().collect(); + let exe_name: String = argv + .pop_front() + .expect("exe name not on command line") + .into_string() + .expect("exe name not valid unicode"); + PARSED_EXE_NAME + .set(exe_name) + .expect("exe name already written"); + let (verbose, subcommand_name) = Self::parse_up_to_subcommand_name(&mut argv)?; + let command = match subcommand_name { + SubcommandName::Info => ZipCommand::Info(info::Info::parse_argv(argv)?), + SubcommandName::Extract => ZipCommand::Extract(extract::Extract::parse_argv(argv)?), + SubcommandName::Compress => ZipCommand::Compress(compress::Compress::parse_argv(argv)?), + }; + Ok(Self { verbose, command }) + } +} + +#[derive(Debug)] +pub enum ZipCommand { + Compress(compress::Compress), + Info(info::Info), + Extract(extract::Extract), +} + +pub trait CommandFormat: fmt::Debug { + const COMMAND_NAME: &'static str; + const COMMAND_TABS: &'static str; + const COMMAND_DESCRIPTION: &'static str; + + const USAGE_LINE: &'static str; + + fn generate_usage_line() -> String { + format!( + "Usage: {} {} {}", + ZipCli::binary_name(), + Self::COMMAND_NAME, + Self::USAGE_LINE, + ) + } + + fn generate_help() -> String; + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} +{}", + Self::COMMAND_DESCRIPTION, + Self::generate_usage_line(), + Self::generate_help(), + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} +", + Self::generate_usage_line() + ) + } + + fn exit_arg_invalid(context: &str) -> ArgParseError { + let message = Self::generate_brief_help_text(context); + ArgParseError::StderrMessage(message) + } + + fn parse_argv(argv: VecDeque) -> Result + where + Self: Sized; +} + +pub mod compress; +pub mod extract; +pub mod info; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs new file mode 100644 index 000000000..796b47990 --- /dev/null +++ b/cli/src/args/compress.rs @@ -0,0 +1,479 @@ +use super::{ArgParseError, CommandFormat}; + +use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum CompressionMethodArg { + Stored, + Deflate, /* requires having zip/_deflate-any set to compile */ + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CompressionLevel(pub i64); + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct UnixPermissions(pub u32); + +impl UnixPermissions { + pub fn parse(s: &str) -> Result { + Ok(Self(u32::from_str_radix(s, 8)?)) + } +} + +#[derive(Debug)] +pub enum CompressionArg { + CompressionMethod(CompressionMethodArg), + Level(CompressionLevel), + UnixPermissions(UnixPermissions), + LargeFile(bool), + Name(String), + Dir, + Symlink, + Immediate(OsString), + FilePath(PathBuf), + RecursiveDirPath(PathBuf), +} + +#[derive(Debug)] +pub enum OutputType { + Stdout { allow_tty: bool }, + File { path: PathBuf, append: bool }, +} + +#[derive(Debug)] +pub struct Compress { + pub output: OutputType, + pub archive_comment: Option, + pub args: Vec, + pub positional_paths: Vec, +} + +impl Compress { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; +} + +/* TODO: add support for entry and file comments! */ +impl CommandFormat for Compress { + const COMMAND_NAME: &'static str = "compress"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Generate an archive from data in argument strings or read from the filesystem."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-FLAGS] [--archive-comment ] [ENTRY]... [--] [PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +Output flags: +Where and how to write the generated zip archive. + + -o, --output-file + Output zip file path to write. + The output file is truncated if it already exists, unless --append is + provided. If not provided, output is written to stdout. + + --append + If an output path is provided with -o, open it as an existing zip + archive and append to it. If the output path does not already exist, + no error is produced, and a new zip file is created at the given path. + + --stdout + Allow writing output to stdout even if stdout is a tty. + +Global flags: +These flags describe information set for the entire produced archive. + + --archive-comment + If provided, this will set the archive's comment field to the + specified bytes. This does not need to be valid unicode. + +Entries: +After output flags are provided, the rest of the command line is +attributes and entry data. Attributes modify later entries. + +Sticky attributes: +These flags apply to everything that comes after them until reset by another +instance of the same attribute. Sticky attributes continue to apply to +positional arguments received after processing all flags. + + -c, --compression-method + Which compression technique to use. + Defaults to deflate if not specified. + + Possible values: + - stored: uncompressed + - deflate: with deflate (default) +{}{}{} + -l, --compression-level + How much compression to perform, from 0..=24. + The accepted range of values differs for each technique. + + -m, --mode + Unix permissions to apply to the file, in octal (like chmod). + + --large-file [true|false] + Whether to enable large file support. + This may take up more space for records, but allows files over 32 bits + in length to be written, up to 64 bit sizes. + File arguments over 32 bits in length (either provided explicitly or + encountered when traversing a recursive directory) will have this flag + set automatically, without affecting the sticky value for + later options. + Therefore, this option likely never has to be set explicitly by + the user. + +Non-sticky attributes: +These flags only apply to the next entry after them, and may not be repeated. + + -n, --name + The name to apply to the entry. This must be UTF-8 encoded. + + -s, --symlink + Make the next entry into a symlink entry. + A symlink entry may be immediate with -i, or it may copy the target + from an existing symlink with -f. + +Entry data: +Each of these flags creates an entry in the output zip archive. + + -d, --dir + Create a directory entry. + A name must be provided beforehand with -n. + + -i, --immediate + Write an entry containing the data in the argument, which need not be + UTF-8 encoded but will exit early upon encountering any null bytes. + A name must be provided beforehand with -n. + + -f, --file + Write an entry with the contents of this file path. + A name may be provided beforehand with -n, otherwise the name will be + inferred from relativizing the given path to the working directory. + Note that sockets are currently not supported and will produce an + error. Providing a path to a directory will produce an error. + + If -s was specified beforehand, the path will be read as a symlink, + which will produce an error if the path does not point to a symbolic + link. If -s was not specified beforehand and a symlink path was + provided, then the symbolic link will be interpreted as if it was + a file with the contents of the symlink target, but with its name + corresponding to the symlink path (unless overridden with -n). + + -r, --recursive-dir + Write all the recursive contents of this directory path. + A name may be provided beforehand with -n, which will be used as the + prefix for all recursive contents of this directory. Otherwise, the + name will be inferred from relativizing the given path to the + working directory. + + -s is not allowed before this argument. If a path to a symbolic link + is provided, it will be treated as if it pointed to a directory with + the recursive contents of the target directory, but with its name + corresponding to the symlink path (unless overridden with -n). + Providing a symlink path which points to a file will produce an error. + +Positional entries: + [PATH]... + Write the file or recursive directory contents, relativizing the path. + If the given path points to a file, then a single file entry will + be written. + If the given path is a symlink, then a single symlink entry will + be written. + If the given path refers to a directory, then the recursive contents + will be written, reproducing files and symlinks. + Socket paths will produce an error. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + let mut archive_comment: Option = None; + let mut args: Vec = Vec::new(); + let mut positional_paths: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Output flags */ + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(Self::exit_arg_invalid(&format!( + "--stdout provided along with output file {output_path:?}" + ))); + } else if append_to_output_path { + return Err(Self::exit_arg_invalid( + "--stdout provided along with --append", + )); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid("--stdout provided after entries")); + } else if allow_stdout { + return Err(Self::exit_arg_invalid("--stdout provided twice")); + } else { + allow_stdout = true; + } + } + b"--append" => { + if append_to_output_path { + return Err(Self::exit_arg_invalid("--append provided twice")); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid("--append provided after entries")); + } else if allow_stdout { + return Err(Self::exit_arg_invalid( + "--stdout provided along with --append", + )); + } else { + append_to_output_path = true; + } + } + b"-o" | b"--output-file" => { + let new_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for -o/--output-file") + })?; + if let Some(prev_path) = output_path.take() { + return Err(Self::exit_arg_invalid(&format!( + "--output-file provided twice: {prev_path:?} and {new_path:?}" + ))); + } else if allow_stdout { + return Err(Self::exit_arg_invalid( + "--stdout provided along with output file", + )); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid( + "-o/--output-file provided after entries", + )); + } else { + output_path = Some(new_path); + } + } + + /* Global flags */ + b"--archive-comment" => { + let new_comment = argv.pop_front().ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --archive-comment") + })?; + if let Some(prev_comment) = archive_comment.take() { + return Err(Self::exit_arg_invalid(&format!( + "--archive-comment provided twice: {prev_comment:?} and {new_comment:?}" + ))); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid( + "--archive-comment provided after entries", + )); + } else { + archive_comment = Some(new_comment); + } + } + + /* Attributes */ + b"-c" | b"--compression-method" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -c/--compression-method", + )) + } + Some(name) => match name.as_encoded_bytes() { + b"stored" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Stored, + )), + b"deflate" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Deflate, + )), + #[cfg(feature = "deflate64")] + b"deflate64" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Deflate64, + )), + #[cfg(feature = "bzip2")] + b"bzip2" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Bzip2, + )), + #[cfg(feature = "zstd")] + b"zstd" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Zstd, + )), + _ => { + return Err(Self::exit_arg_invalid( + "unrecognized compression method {name:?}", + )); + } + }, + }, + b"-l" | b"--compression-level" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -l/--compression-level", + )); + } + Some(level) => match level.into_string() { + Err(level) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for compression level: {level:?}" + ))); + } + Ok(level) => match level.parse::() { + Err(e) => { + return Err(Self::exit_arg_invalid(&format!( + "failed to parse integer for compression level: {e}" + ))); + } + Ok(level) => { + if (0..=24).contains(&level) { + args.push(CompressionArg::Level(CompressionLevel(level))) + } else { + return Err(Self::exit_arg_invalid(&format!( + "compression level {level} was not between 0 and 24" + ))); + } + } + }, + }, + }, + b"-m" | b"--mode" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -m/--mode")); + } + Some(mode) => match mode.into_string() { + Err(mode) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for mode: {mode:?}" + ))); + } + Ok(mode) => match UnixPermissions::parse(&mode) { + Err(e) => { + return Err(Self::exit_arg_invalid(&format!( + "failed to parse integer for mode: {e}" + ))); + } + Ok(mode) => args.push(CompressionArg::UnixPermissions(mode)), + }, + }, + }, + b"--large-file" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for --large-file", + )); + } + Some(large_file) => match large_file.as_encoded_bytes() { + b"true" => args.push(CompressionArg::LargeFile(true)), + b"false" => args.push(CompressionArg::LargeFile(false)), + _ => { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized value for --large-file: {large_file:?}" + ))); + } + }, + }, + + /* Data */ + b"-n" | b"--name" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -n/--name")) + } + Some(name) => match name.into_string() { + Err(name) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for name: {name:?}" + ))); + } + Ok(name) => args.push(CompressionArg::Name(name)), + }, + }, + b"-s" | b"--symlink" => args.push(CompressionArg::Symlink), + b"-d" | b"--dir" => args.push(CompressionArg::Dir), + b"-i" | b"--immediate" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -i/--immediate", + )); + } + Some(data) => args.push(CompressionArg::Immediate(data)), + }, + b"-f" | b"--file" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -f/--file")); + } + Some(file) => args.push(CompressionArg::FilePath(file.into())), + }, + b"-r" | b"--recursive-dir" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -r/--recursive-dir", + )); + } + Some(dir) => args.push(CompressionArg::RecursiveDirPath(dir.into())), + }, + + /* Transition to positional args */ + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_paths.extend(argv.into_iter().map(|arg| arg.into())); + + let output = if let Some(path) = output_path { + OutputType::File { + path, + append: append_to_output_path, + } + } else { + OutputType::Stdout { + allow_tty: allow_stdout, + } + }; + + Ok(Self { + output, + archive_comment, + args, + positional_paths, + }) + } +} + +impl crate::driver::ExecuteCommand for Compress { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::compress::execute_compress(err, self) + } +} diff --git a/cli/src/args/extract.rs b/cli/src/args/extract.rs new file mode 100644 index 000000000..1a580ad54 --- /dev/null +++ b/cli/src/args/extract.rs @@ -0,0 +1,1619 @@ +use super::{ArgParseError, CommandFormat}; + +use zip::CompressionMethod; + +use std::{collections::VecDeque, ffi::OsString, mem, path::PathBuf}; + +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ContentTransform { + Extract { name: Option }, +} + +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum ComponentSelector { + #[default] + Path, + Basename, + Dirname, + FileExtension, +} + +impl ComponentSelector { + pub fn parse(s: &[u8]) -> Option { + match s { + b"path" => Some(Self::Path), + b"basename" => Some(Self::Basename), + b"dirname" => Some(Self::Dirname), + b"ext" => Some(Self::FileExtension), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum PatternSelectorType { + Glob, + Literal, + Regexp, +} + +impl PatternSelectorType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"glob" => Some(Self::Glob), + b"lit" => Some(Self::Literal), + b"rx" => Some(Self::Regexp), + _ => None, + } + } + + pub const fn default_for_match() -> Self { + if cfg!(feature = "glob") { + Self::Glob + } else { + Self::Literal + } + } + + pub const fn default_for_replacement() -> Self { + if cfg!(feature = "rx") { + Self::Regexp + } else { + Self::Literal + } + } +} + +#[derive(Debug)] +pub enum PatternSelectorModifier { + CaseInsensitive, + MultipleMatches, + PrefixAnchored, + SuffixAnchored, +} + +impl PatternSelectorModifier { + pub fn parse(s: &[u8]) -> Option { + match s { + b"i" => Some(Self::CaseInsensitive), + b"g" => Some(Self::MultipleMatches), + b"p" => Some(Self::PrefixAnchored), + b"s" => Some(Self::SuffixAnchored), + _ => None, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct PatternModifierFlags { + pub case_insensitive: bool, + pub multiple_matches: bool, + pub prefix_anchored: bool, + pub suffix_anchored: bool, +} + +#[derive(Debug)] +pub struct PatternSelector { + pub pat_sel: PatternSelectorType, + pub modifiers: PatternModifierFlags, +} + +impl PatternSelector { + pub fn parse(s: &[u8]) -> Option { + match s.iter().position(|c| *c == b':') { + Some(modifiers_ind) => { + let pat_sel_str = &s[..modifiers_ind]; + let modifiers_str = &s[(modifiers_ind + 1)..]; + + let pat_sel = PatternSelectorType::parse(pat_sel_str)?; + + let mut modifiers = PatternModifierFlags::default(); + let mod_els = modifiers_str + .split(|c| *c == b':') + .map(PatternSelectorModifier::parse) + .collect::>>()?; + for m in mod_els.into_iter() { + match m { + PatternSelectorModifier::CaseInsensitive => { + modifiers.case_insensitive = true; + } + PatternSelectorModifier::MultipleMatches => { + modifiers.multiple_matches = true; + } + PatternSelectorModifier::PrefixAnchored => { + modifiers.prefix_anchored = true; + } + PatternSelectorModifier::SuffixAnchored => { + modifiers.suffix_anchored = true; + } + } + } + Some(Self { pat_sel, modifiers }) + } + None => { + let pat_sel = PatternSelectorType::parse(s)?; + Some(Self { + pat_sel, + modifiers: Default::default(), + }) + } + } + } + + pub fn default_for_context(ctx: PatternContext) -> Self { + match ctx { + PatternContext::Match => Self::default_for_match(), + PatternContext::Replacement => Self::default_for_replacement(), + } + } + + pub fn default_for_match() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_match(), + modifiers: PatternModifierFlags::default(), + } + } + + pub fn default_for_replacement() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_replacement(), + modifiers: PatternModifierFlags::default(), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PatternContext { + Match, + Replacement, +} + +pub fn parse_only_pat_sel(s: &[u8], ctx: PatternContext) -> Option { + match s.iter().position(|c| *c == b':') { + Some(pat_sel_ind) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some(pat_sel) + } + None => Some(PatternSelector::default_for_context(ctx)), + } +} + +pub fn parse_comp_and_pat_sel( + s: &[u8], + ctx: PatternContext, +) -> Option<(ComponentSelector, PatternSelector)> { + match ( + s.iter().position(|c| *c == b'='), + s.iter().position(|c| *c == b':'), + ) { + (Some(comp_sel_ind), Some(pat_sel_ind)) => { + if comp_sel_ind >= pat_sel_ind { + return None; + } + let comp_sel_str = &s[(comp_sel_ind + 1)..pat_sel_ind]; + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some((comp_sel, pat_sel)) + } + (Some(comp_sel_ind), None) => { + let comp_sel_str = &s[(comp_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + (None, Some(pat_sel_ind)) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + let comp_sel = ComponentSelector::default(); + Some((comp_sel, pat_sel)) + } + (None, None) => { + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + } +} + +#[derive(Debug)] +pub enum EntryType { + File, + Dir, + Symlink, +} + +impl EntryType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"file" => Some(Self::File), + b"dir" => Some(Self::Dir), + b"symlink" => Some(Self::Symlink), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum NonSpecificCompressionMethodArg { + Any, + Known, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum SpecificCompressionMethodArg { + Stored, + Deflated, + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, + #[cfg(feature = "lzma")] + Lzma, + #[cfg(feature = "xz")] + Xz, +} + +impl SpecificCompressionMethodArg { + pub const KNOWN_COMPRESSION_METHODS: &[CompressionMethod] = &[ + CompressionMethod::Stored, + CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + CompressionMethod::Lzma, + #[cfg(feature = "xz")] + CompressionMethod::Xz, + ]; + + pub fn translate_to_zip(self) -> CompressionMethod { + match self { + Self::Stored => CompressionMethod::Stored, + Self::Deflated => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + Self::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + Self::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + Self::Zstd => CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + Self::Lzma => CompressionMethod::Lzma, + #[cfg(feature = "xz")] + Self::Xz => CompressionMethod::Xz, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum CompressionMethodArg { + NonSpecific(NonSpecificCompressionMethodArg), + Specific(SpecificCompressionMethodArg), +} + +impl CompressionMethodArg { + pub fn parse(s: &[u8]) -> Option { + match s { + b"any" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Any)), + b"known" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Known)), + b"stored" => Some(Self::Specific(SpecificCompressionMethodArg::Stored)), + b"deflated" => Some(Self::Specific(SpecificCompressionMethodArg::Deflated)), + #[cfg(feature = "deflate64")] + b"deflate64" => Some(Self::Specific(SpecificCompressionMethodArg::Deflate64)), + #[cfg(feature = "bzip2")] + b"bzip2" => Some(Self::Specific(SpecificCompressionMethodArg::Bzip2)), + #[cfg(feature = "zstd")] + b"zstd" => Some(Self::Specific(SpecificCompressionMethodArg::Zstd)), + #[cfg(feature = "lzma")] + b"lzma" => Some(Self::Specific(SpecificCompressionMethodArg::Lzma)), + #[cfg(feature = "xz")] + b"xz" => Some(Self::Specific(SpecificCompressionMethodArg::Xz)), + _ => None, + } + } +} + +#[derive(Debug)] +pub enum DepthLimitArg { + Max(u8), + Min(u8), +} + +#[derive(Debug)] +pub enum SizeArg { + Max(u64), + Min(u64), +} + +#[derive(Debug)] +pub struct MatchArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, +} + +#[derive(Debug)] +pub enum TrivialPredicate { + True, + False, +} + +#[derive(Debug)] +pub enum Predicate { + Trivial(TrivialPredicate), + EntryType(EntryType), + CompressionMethod(CompressionMethodArg), + DepthLimit(DepthLimitArg), + Size(SizeArg), + Match(MatchArg), +} + +#[derive(Debug)] +enum ExprOp { + Negation, + And, + Or, +} + +#[derive(Debug)] +enum ExprArg { + PrimitivePredicate(Predicate), + Op(ExprOp), + Subgroup(MatchExpression), +} + +#[derive(Debug, Default)] +struct SingleExprLevel { + expr_args: Vec, +} + +impl SingleExprLevel { + pub fn push_arg(&mut self, arg: ExprArg) { + self.expr_args.push(arg); + } + + fn get_negation(expr_args: &mut VecDeque) -> Result { + let negated_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + Extract::exit_arg_invalid(&format!( + "negation was only expression in list inside match expr (rest: {expr_args:?})" + )) + })? { + ExprArg::Subgroup(match_expr) => { + /* We have a valid match expression, so just negate it without + * wrapping. */ + MatchExpression::Negated(Box::new(match_expr)) + } + ExprArg::PrimitivePredicate(predicate) => { + /* We got a primitive predicate, so just negate it! */ + MatchExpression::Negated(Box::new(MatchExpression::PrimitivePredicate(predicate))) + } + ExprArg::Op(op) => { + /* Negation before any other operator is invalid. */ + return Err(Extract::exit_arg_invalid(&format!( + "negation before operator {op:?} inside match expr is invalid (rest: {expr_args:?})" + ))); + } + }; + Ok(negated_expr) + } + + fn get_non_operator( + expr_args: &mut VecDeque, + ) -> Result { + let next_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + /* We can't fold an empty list. */ + Extract::exit_arg_invalid(&format!( + "empty expression list inside match expr (rest: {expr_args:?})" + )) + })? { + /* This is already an evaluated match expression, so just start with that. */ + ExprArg::Subgroup(match_expr) => match_expr, + ExprArg::PrimitivePredicate(predicate) => { + /* Success! We start with a simple predicate. */ + MatchExpression::PrimitivePredicate(predicate) + } + ExprArg::Op(op) => match op { + /* We started with negation, which means we need to get the next arg to resolve + * it. */ + ExprOp::Negation => Self::get_negation(expr_args)?, + /* Starting with a binary operator is invalid. */ + op @ (ExprOp::And | ExprOp::Or) => { + return Err(Extract::exit_arg_invalid(&format!( + "expression list cannot begin with binary operator {op:?} (rest: {expr_args:?})" + ))); + } + }, + }; + Ok(next_expr) + } + + pub fn fold(self) -> Result { + let Self { expr_args } = self; + let mut expr_args: VecDeque<_> = expr_args.into(); + + /* Get a valid match expression to start our fold with. */ + let mut cur_expr: MatchExpression = Self::get_non_operator(&mut expr_args)?; + + /* Now fold the expression rightwards! */ + while let Some(next_arg) = expr_args.pop_front() { + match next_arg { + /* Implicit AND, wrapping the primitive result into a match. */ + ExprArg::PrimitivePredicate(predicate) => { + let next_expr = MatchExpression::PrimitivePredicate(predicate); + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Implicit AND, without needing to wrap the result. */ + ExprArg::Subgroup(match_expr) => { + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(match_expr), + }; + } + /* Evaluate the operator according to association. */ + ExprArg::Op(op) => match op { + /* Negation applies to the next element, so retrieve it! */ + ExprOp::Negation => { + let next_expr = Self::get_negation(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Explicit AND requires the next element. */ + ExprOp::And => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: true, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* OR requires the next element. */ + ExprOp::Or => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::Or { + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + }, + } + } + + assert!(expr_args.is_empty()); + Ok(cur_expr) + } +} + +#[derive(Debug)] +pub enum MatchExpression { + PrimitivePredicate(Predicate), + Negated(Box), + And { + explicit: bool, + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, + Grouped(Box), +} + +impl MatchExpression { + pub fn parse_argv( + argv: &mut VecDeque, + ) -> Result { + let mut expr_stack: Vec = Vec::new(); + let mut top_exprs = SingleExprLevel::default(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + /* Parse primitive predicates. */ + b"-true" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::True, + ))); + } + b"-false" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::False, + ))); + } + b"-t" | b"--type" => { + let type_arg = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -t/--type"))?; + let entry_type = + EntryType::parse(type_arg.as_encoded_bytes()).ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --type argument: {type_arg:?}")) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::EntryType( + entry_type, + ))); + } + b"--compression-method" => { + let method_arg = argv.pop_front().ok_or_else(|| { + C::exit_arg_invalid("no argument provided for --compression-method") + })?; + let method = CompressionMethodArg::parse(method_arg.as_encoded_bytes()) + .ok_or_else(|| { + C::exit_arg_invalid(&format!( + "invalid --compression-method argument: {method_arg:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::CompressionMethod( + method, + ))); + } + b"--max-depth" => { + let max_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Max(max_depth), + ))); + } + b"--min-depth" => { + let min_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Min(min_depth), + ))); + } + b"--max-size" => { + let max_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Max( + max_size, + )))); + } + b"--min-size" => { + let min_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Min( + min_size, + )))); + } + b"-m" => { + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -m"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for -m: {pattern:?}" + )) + })?; + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(PatternContext::Match); + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + arg_bytes if arg_bytes.starts_with(b"--match") => { + let (comp_sel, pat_sel) = parse_comp_and_pat_sel( + arg_bytes, + PatternContext::Match, + ) + .ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --match argument modifiers: {arg:?}")) + })?; + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --match"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --match: {pattern:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + + /* Parse operators. */ + b"!" | b"-not" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Negation)); + } + b"&" | b"-and" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::And)); + } + b"|" | b"-or" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Or)); + } + + /* Process groups with stack logic! */ + b"(" | b"-open" => { + expr_stack.push(mem::take(&mut top_exprs)); + } + b")" | b"-close" => { + /* Get the unevaluated exprs from the previous nesting level. */ + let prev_level = expr_stack.pop().ok_or_else(|| { + C::exit_arg_invalid("too many close parens inside match expr") + })?; + /* Move the previous nesting level into current, and evaluate the current + * nesting level. */ + let group_expr = mem::replace(&mut top_exprs, prev_level).fold()?; + /* Wrap the completed group in a Grouped. */ + let group_expr = MatchExpression::Grouped(Box::new(group_expr)); + /* Push the completed and evaluated group into the current nesting level. */ + top_exprs.push_arg(ExprArg::Subgroup(group_expr)); + } + + /* Conclude the match expr processing. */ + b"--expr" => { + break; + } + _ => { + return Err(C::exit_arg_invalid(&format!( + "unrecognized match expression component {arg:?}: all match expressions must start and end with a --expr flag" + ))); + } + } + } + + if !expr_stack.is_empty() { + return Err(C::exit_arg_invalid( + "not enough close parens inside match expr", + )); + } + top_exprs.fold() + } +} + +#[derive(Debug)] +pub enum TrivialTransform { + Identity, +} + +#[derive(Debug)] +pub enum BasicTransform { + StripComponents(u8), + AddPrefix(String), +} + +#[derive(Debug)] +pub struct TransformArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, + pub replacement_spec: String, +} + +#[derive(Debug)] +pub enum ComplexTransform { + Transform(TransformArg), +} + +#[derive(Debug)] +pub enum NameTransform { + Trivial(TrivialTransform), + Basic(BasicTransform), + Complex(ComplexTransform), +} + +#[derive(Debug)] +enum ExtractArg { + Match(MatchExpression), + NameTransform(NameTransform), + ContentTransform(ContentTransform), +} + +#[derive(Debug)] +pub struct EntrySpec { + pub match_expr: Option, + pub name_transforms: Vec, + pub content_transform: ContentTransform, +} + +impl EntrySpec { + fn parse_extract_args( + args: impl IntoIterator, + ) -> Result, ArgParseError> { + let mut match_expr: Option = None; + let mut name_transforms: Vec = Vec::new(); + + let mut ret: Vec = Vec::new(); + + for arg in args.into_iter() { + match arg { + ExtractArg::Match(new_expr) => { + if let Some(prev_expr) = match_expr.take() { + return Err(Extract::exit_arg_invalid(&format!( + "more than one match expr was provided for the same entry: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + ExtractArg::NameTransform(n_trans) => { + name_transforms.push(n_trans); + } + ExtractArg::ContentTransform(c_trans) => { + let spec = Self { + match_expr: match_expr.take(), + name_transforms: mem::take(&mut name_transforms), + content_transform: c_trans, + }; + ret.push(spec); + } + } + } + if let Some(match_expr) = match_expr { + return Err(Extract::exit_arg_invalid(&format!( + "match expr {match_expr:?} was provided with no corresponding content \ +transform. add -x/--extract to construct a complete entry spec" + ))); + } + if !name_transforms.is_empty() { + return Err(Extract::exit_arg_invalid(&format!( + "name transforms {name_transforms:?} were provided with no corresponding \ +content transform. add -x/--extract to construct a complete entry spec" + ))); + } + + Ok(ret) + } +} + +#[derive(Debug)] +pub enum OutputCollation { + ConcatenateStdout, + ConcatenateFile { path: PathBuf, append: bool }, + Filesystem { output_dir: PathBuf, mkdir: bool }, +} + +#[derive(Debug)] +pub struct NamedOutput { + pub name: String, + pub output: OutputCollation, +} + +#[derive(Debug)] +pub struct OutputSpecs { + pub default: Option, + pub named: Vec, +} + +impl Default for OutputSpecs { + fn default() -> Self { + Self { + default: Some(OutputCollation::Filesystem { + output_dir: PathBuf::from("."), + mkdir: false, + }), + named: Vec::new(), + } + } +} + +impl OutputSpecs { + pub fn parse_argv(argv: &mut VecDeque) -> Result { + let mut default: Option = None; + let mut named: Vec = Vec::new(); + let mut cur_name: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Extract::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"--name" => { + let name = argv + .pop_front() + .ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --name") + })? + .into_string() + .map_err(|name| { + Extract::exit_arg_invalid(&format!( + "invalid unicode provided for --name: {name:?}" + )) + })?; + if let Some(prev_name) = cur_name.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple names provided for output: {prev_name:?} and {name:?}" + ))); + } + cur_name = Some(name); + } + b"-d" => { + let dir_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -d"))?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-directory") => { + let mkdir = match arg_bytes { + b"--output-directory" => false, + b"--output-directory:mkdir" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-directory: {arg:?}" + ))); + } + }; + let dir_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-directory") + })?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"--stdout" => { + let output = OutputCollation::ConcatenateStdout; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"-f" => { + let file_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -f"))?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-file") => { + let append = match arg_bytes { + b"--output-file" => false, + b"--output-file:append" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-file: {arg:?}" + ))); + } + }; + let file_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-file") + })?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + _ => { + argv.push_front(arg); + break; + } + } + } + if let Some(name) = cur_name { + return Err(Extract::exit_arg_invalid(&format!( + "trailing --name argument provided without output spec: {name:?}" + ))); + } + + Ok(if default.is_none() && named.is_empty() { + Self::default() + } else { + Self { default, named } + }) + } +} + +#[derive(Debug)] +pub struct InputSpec { + pub stdin_stream: bool, + pub zip_paths: Vec, +} + +#[derive(Debug)] +pub struct Extract { + pub output_specs: OutputSpecs, + pub entry_specs: Vec, + pub input_spec: InputSpec, +} + +impl Extract { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; + + #[cfg(feature = "lzma")] + const LZMA_HELP_LINE: &'static str = " - lzma:\twith lzma\n"; + #[cfg(not(feature = "lzma"))] + const LZMA_HELP_LINE: &'static str = ""; + + #[cfg(feature = "xz")] + const XZ_HELP_LINE: &'static str = " - xz:\t\twith xz\n"; + #[cfg(not(feature = "xz"))] + const XZ_HELP_LINE: &'static str = ""; + + pub fn generate_match_expr_help_text() -> String { + format!( + r#" +## Match expressions (match-expr): + +Entry matching logic composes boolean arithmetic expressions ("expr") in terms +of basic "predicates" which test some component of the zip entry. Expressions +can be composed as follows, in order of precedence: + +expr = ( ) (grouping to force precedence) + = ! (negation) + = & (short-circuiting conjunction "and") + = (implicit &) + = | (disjunction "or") + = (evaluate on entry) + +### Operators: +The operators to compose match expressions must be quoted in shell commands +(e.g. as \( or '('), so alternatives are provided which do not require +special quoting: + +Grouping operators: + (, -open + ), -close + +Unary operators: + !, -not + +Binary operators: + |, -or + &, -and + +### Predicates (predicate): +These arguments are interpreted as basic predicates, returning true or false in +response to a specific zip entry. + +Trivial: +These results do not depend on the entry data at all: + + -true Always return true. + -false Always return false. + +If a match expression is not provided, it defaults to the behavior of -true. + +Basic: +These results are dependent on the entry data: + + -t, --type [file|dir|symlink] + Match entries of the given type. + Note that directory entries may have specific mode bits set, or they may just be + zero-length entries whose name ends in '/'. + + --compression-method + Match entries compressed with the given compression technique. + + Possible values: + - any: any compression method at all + - known: any compression method this binary is able to decompress + - stored: uncompressed + - deflated: with deflate +{}{}{}{}{} + Using e.g. '--compression-method known' as a match expression filters + entries to only those which can be successfully decompressed. + + --max-depth + Match entries with at *most* components of their + containing directory. + --min-depth + Match entries with at *least* components of their + containing directory. + + --max-size + Match entries of at *most* in *uncompressed* size. + --min-size + Match entries of at *least* in *uncompressed* size. + + Directory entries are 0 bytes in size, and symlink entries are the + size required to store their target. + + TODO: Abbrevations such as 1k, 1M are not currently supported; the + precise byte number must be provided, parseable as a u64. + + -m, --match[=][:] + Return true for entries whose name matches . + + See section on "Selector syntax" for and for how + the string argument is interpreted into a string matching + predicate against the entry name. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + Self::LZMA_HELP_LINE, + Self::XZ_HELP_LINE, + ) + } + + pub fn generate_pattern_selector_help_text(match_only: bool) -> String { + format!( + r#" +## Selector syntax: + +The string matching operations of {} expose an interface to +configure various pattern matching techniques on various components of the entry +name string. + +These flags default to interpreting a argument as a glob string to +match against the entire entry name, which can be explicitly requested as +follows: + + --match=path:glob + +The entire range of search options is described below: + +### Component selector (comp-sel): +comp-sel = path [DEFAULT] (match full entry) + = basename (match only the final component of entry) + = dirname (match all except final component of entry) + = ext (match only the file extension, if available) + +### Pattern selector (pat-sel): +pat-sel = glob [DEFAULT{}] (interpret as a shell glob) + = lit (interpret as literal string) + = rx {}(interpret as a regular expression) + = (apply search modifiers from ) + +{} + +Also note that glob and regex patterns require building this binary with the +"glob" and "rx" cargo features respectively. Specifying ':glob' or ':rx' without +the requisite feature support will produce an error. If the requisite feature is +not provided, the default is to use literal matching, which is supported in +all cases. + +#### Pattern modifiers (pat-mod): +pat-mod = :i (use case-insensitive matching for the given pattern) +{} = :p (perform left-anchored "prefix" searches) + = :s (perform right-anchored "suffix" searches) + +Pattern modifiers from (pat-mod) can be sequenced, e.g. ':i:p'. If ':p' and ':s' +are provided together, the result is to perform a doubly-anchored match, against +the entire string. For regexp matching with ':rx', ':p' and ':s' are converted +to '^' or '$' anchors in the regexp pattern string. If the pattern string also +contains '^' or '$' as well, no error is produced. + +*Note:* not all pattern modifiers apply everywhere. In particular, {}':p' and ':s' are +incompatible with glob search and will produce an error. +"#, + if match_only { + "--match" + } else { + "--match and --transform" + }, + if match_only { "" } else { " for matching" }, + if match_only { + "" + } else { + "[DEFAULT for replacement] " + }, + if match_only { + "" + } else { + "*Note:* glob patterns are not supported for replacement, and attempting to use +them with e.g '--transform:glob' will produce an error." + }, + if match_only { + "" + } else { + " = :g (use multi-match behavior for string replacements)\n" + }, + if match_only { + "" + } else { + "':g' only +applies to string replacement, and using it for a match expression like +'--match:rx:g' will produce an error. Additionally, " + } + ) + } + + pub const INPUT_HELP_TEXT: &'static str = r#" +# Input arguments: +Zip file inputs to extract from can be specified by streaming from stdin, or as +at least one path pointing to an existing zip file. Input arguments are always +specified after all output flags and entry specs on the command line. If no +positional argument is provided and --stdin is not present, an error will +be produced. + + --stdin + If this argument is provided, the streaming API will be used to read + entries as they are encountered, instead of filtering them beforehand + as is done with file inputs. This disables some optimizations, but + also avoids waiting for the entire input to buffer to start writing + output, so can be used in a streaming context. + +Positional paths: + ZIP-PATH... + Apply the entry specs to filter and rename entries to extract from all + of the provided zip files. At least one zip path must be provided, and + all provided paths must exist and point to an existing zip file. Pipes + are not supported and will produce an error. + + If --stdin is provided, it will be read in a streaming manner before + reading entries from any positional zip paths. +"#; +} + +impl CommandFormat for Extract { + const COMMAND_NAME: &'static str = "extract"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Decompress and transform matching entries into a stream or directory."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-SPEC]... [ENTRY-SPEC]... [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +# Output flags: +Where and how to collate the extracted entries. + +## Directory extraction: +Extract entries into relative paths of a named directory according to the +entry's name. + + -d, --output-directory[:mkdir] + Output directory path to write extracted entries into. + Paths for extracted entries will be constructed by interpreting entry + names as relative paths to the provided directory. + + If the provided path is not a directory, an error is produced. If the + provided path does not exist, an error is produced, unless :mkdir is + specified, which attempts to create the specified directory along with + any missing parent directories. + + If not provided, entries will be extracted into the current directory + (as if '-d .' had been provided). + +## Pipe decompression: +Concatenate decompressed entry data into a pipe or file. Entry names are +effectively ignored. This disables some optimizations that are possible when +extracting to the filesystem. + + --stdout + Concatenate all extracted entries and write them in order to stdout + instead of writing anything to the filesystem. + This will write output to stdout even if stdout is a tty. + + -f, --output-file[:append] + Write all entries into the specified file path . + + The output file will be truncated if it already exists, unless :append + is provided. If the specified file path could not be created + (e.g. because the containing directory does not exist, or because the + path exists but does not point to a regular file), an error + is produced. + +## Output teeing: +Entries may be *received* by one or more named outputs. Without any output names specified, the +above flags will produce a single receiver named "default". This is the default receiver used for +the -x/--extract argument unless otherwise specified. However, multiple named receivers may be +specified in sequence, separated by the --name flag: + + --name + Assign the output receiver created from the following output flags to the name . + +Note that the first output in a list need not have a name, as it will be assigned to the name +"default" if not provided. + +'--stdout' Creates a single default receiver decompressing contents to stdout. +'-d ./a' Creates a single default receiver extracting entries into './a'. + +'--name one -d ./a' + Creates a single named receiver "one" extracting into './a'. -x/--extract + must specify the name "one", or an error will be produced. +'--output-directory:mkdir ./a --name two --stdout' + Creates a default receiver extracting into './a', which will be created if + it does not exist, and a named receiver "two" concatenating into stdout. +'--name one -d ./a --name two -f ./b' + Creates a named receiver "one" extracting into './a', and a second named receiver "two" + concatenating into the file './b'. + +# Entry specs: + +After output flags are provided, entry specs are processed in order until an +input argument is reached. Entry specs are modelled after the arguments to +find(1), although "actions" are separated from "matching" expressions with +test clauses instead of being fully recursive like find(1). + +The full specification of an entry spec is provided below +(we will use lowercase names to describe this grammar): + + entry-spec = [--expr match-expr --expr] [name-transform]... content-transform + +1. (match-expr) matches against entries, +2. (name-transform) may transform the entry name string, +3. (content-transform) processes the entry content and writes it + to the output. + +Note that only the "content transform" is required: each entry spec must +conclude with exactly one content transform, but the other arguments may +be omitted and will be set to their default values. + +If no entry specs are provided, by default all entries are decompressed and written to the +output collator without modification. This behavior can be requested explicitly +with the command line: + + --expr -true --expr --identity --extract + +*Note:* if a match-expr is provided, it *must* be surrounded with --expr arguments on both sides! +This is a necessary constraint of the current command line parsing. + +{} + +## Name transforms (name-transform): + +Name transforms modify the entry name before writing the entry to the +output. Unlike match expressions, name transforms do not involve any boolean +logic, and instead are composed linearly, each processing the string produced by +the prior name transform in the series. + +*Note:* name transforms do *not* perform any filtering, so if a string +replacement operation "fails", the entry name is simply returned unchanged. + +Trivial: + --identity Return the entry name string unchanged. + +If no name transforms are provided, it defaults to the behavior of --identity. + +Basic: +These transformers do not perform any complex pattern matching, and instead add +or remove a fixed string from the entry name: + + --strip-components + Remove at most directory components from the entry name. + If is greater than or equal the number of components in the + entry dirname, then the basename of the entry is returned. + --add-prefix + Prefix the entry name with a directory path . + A single separator '/' will be added after before the rest of + the entry name, and any trailing '/' in will be trimmed + before joining. + +Complex: +These transformers perform complex pattern matching and replacement upon the +entry name string: + + --transform[=][:] + Extract the portion of the entry name corresponding to , + search it against corresponding to , and then + replace the result with . + + If == 'rx', then may contain references + to numbered capture groups specified by . Otherwise, + is interpreted as a literal string. + + +## Content transforms (content-transform): + +Content transforms determine how to interpret the content of the zip +entry itself. + +*Note:* when multiple entry specs are provided on the command line, a single +entry may be matched more than once. In this case, the entry's content will be +teed to all the specified outputs. + + -x, --extract[=] + Decompress the entry's contents (if necessary) before writing it to + the named output , or the default output if the receiver name is + not specified. + +Attempting to extract an entry using an unsupported compression method with +-x/--extract will produce an error. In this case, --compression-method can be +used to filter out such entries. + +{} +{}"#, + Self::generate_match_expr_help_text(), + Self::generate_pattern_selector_help_text(false), + Self::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut args: Vec = Vec::new(); + let mut stdin_flag: bool = false; + let mut positional_zips: Vec = Vec::new(); + + let output_specs = OutputSpecs::parse_argv(&mut argv)?; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Transition to entry specs */ + /* Try content transforms first, as they are unambiguous sentinel values. */ + b"-x" | b"--extract" => { + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: None, + })); + } + arg_bytes if arg_bytes.starts_with(b"--extract=") => { + let name = arg + .into_string() + .map_err(|arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --extract=: {arg:?}" + )) + })? + .strip_prefix("--extract=") + .unwrap() + .to_string(); + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: Some(name), + })); + } + + /* Try name transforms next, as they only stack linearly and do not require CFG + * parsing of paired delimiters. */ + /* FIXME: none of these name transforms have any effect if --stdout is + * provided. Should we error or warn about this? */ + b"--identity" => { + args.push(ExtractArg::NameTransform(NameTransform::Trivial( + TrivialTransform::Identity, + ))); + } + b"--strip-components" => { + let num: u8 = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --strip-component") + })? + .into_string() + .map_err(|num| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --strip-component: {num:?}" + )) + })? + .parse::() + .map_err(|e| { + Self::exit_arg_invalid(&format!( + "failed to parse --strip-component arg {e:?} as u8" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::StripComponents(num), + ))); + } + b"--add-prefix" => { + let prefix = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --add-prefix") + })? + .into_string() + .map_err(|prefix| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --add-prefix: {prefix:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::AddPrefix(prefix), + ))); + } + arg_bytes if arg_bytes.starts_with(b"--transform") => { + let (comp_sel, pat_sel) = + parse_comp_and_pat_sel(arg_bytes, PatternContext::Replacement).ok_or_else( + || { + Self::exit_arg_invalid(&format!( + "invalid --transform argument modifiers: {arg:?}" + )) + }, + )?; + let pattern = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --transform") + })? + .into_string() + .map_err(|pattern| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {pattern:?}" + )) + })?; + let replacement_spec = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid( + "no argument provided for --transform", + ) + })? + .into_string() + .map_err(|replacement_spec| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {replacement_spec:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Complex( + ComplexTransform::Transform(TransformArg { + comp_sel, + pat_sel, + pattern, + replacement_spec, + }), + ))); + } + + /* Try parsing match specs! */ + b"--expr" => { + let match_expr = MatchExpression::parse_argv::(&mut argv)?; + args.push(ExtractArg::Match(match_expr)); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let entry_specs = EntrySpec::parse_extract_args(args)?; + + Ok(Self { + output_specs, + entry_specs, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Extract { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::extract::execute_extract(err, self) + } +} diff --git a/cli/src/args/info.rs b/cli/src/args/info.rs new file mode 100644 index 000000000..f571829c5 --- /dev/null +++ b/cli/src/args/info.rs @@ -0,0 +1,760 @@ +use super::{ + extract::{Extract, InputSpec, MatchExpression}, + ArgParseError, CommandFormat, +}; + +use std::{collections::VecDeque, ffi::OsString, fmt, path::PathBuf}; + +#[derive(Debug)] +pub struct ModifierParseError(pub String); + +impl fmt::Display for ModifierParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", &self.0) + } +} + +#[derive(Debug)] +pub enum DirectiveParseError { + Modifier(String, ModifierParseError), + Unrecognized(String), +} + +impl fmt::Display for DirectiveParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Modifier(d, e) => { + write!(f, "unrecognized modifier in directive {d:?}: {e}") + } + Self::Unrecognized(d) => { + write!(f, "unrecognized directive: {d:?}") + } + } + } +} + +#[derive(Debug)] +pub enum FormatParseError { + Directive(DirectiveParseError), + Search(String), +} + +impl fmt::Display for FormatParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Directive(e) => { + write!(f, "{e}") + } + Self::Search(e) => { + write!(f, "error in parsing logic: {e}") + } + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ByteSizeFormat { + #[default] + FullDecimal, + HumanAbbreviated, +} + +impl ByteSizeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::FullDecimal), + ":human" => Ok(Self::HumanAbbreviated), + _ => Err(ModifierParseError(format!( + "unrecognized byte size format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OffsetFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl OffsetFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized offset format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryStringFormat { + #[default] + PrintAsString, + EscapeAscii, + WriteBinaryContents, +} + +impl BinaryStringFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":print" => Ok(Self::PrintAsString), + ":escape" => Ok(Self::EscapeAscii), + ":write" => Ok(Self::WriteBinaryContents), + _ => Err(ModifierParseError(format!( + "unrecognized string format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ArchiveOverviewFormatDirective { + ArchiveName, + TotalSize(ByteSizeFormat), + NumEntries, + ArchiveComment(BinaryStringFormat), + FirstEntryStart(OffsetFormat), + CentralDirectoryStart(OffsetFormat), +} + +impl ParseableDirective for ArchiveOverviewFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::ArchiveName), + s if s.starts_with("size") => { + let size_fmt = ByteSizeFormat::parse(&s["size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::TotalSize(size_fmt)) + } + "num" => Ok(Self::NumEntries), + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ArchiveComment(str_fmt)) + } + s if s.starts_with("offset") => { + let offset_fmt = OffsetFormat::parse(&s["offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FirstEntryStart(offset_fmt)) + } + s if s.starts_with("cde-offset") => { + let offset_fmt = OffsetFormat::parse(&s["cde-offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralDirectoryStart(offset_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug)] +pub enum ParseableFormatComponent { + Directive(D), + Escaped(&'static str), + Literal(String), +} + +#[derive(Debug)] +pub struct ParseableFormatSpec { + pub components: Vec>, +} + +pub trait ParseableDirective: Sized { + fn parse_directive(s: &str) -> Result; +} + +impl ParseableFormatSpec +where + D: ParseableDirective, +{ + pub fn parse_format(s: &str) -> Result { + let mut components: Vec> = Vec::new(); + let mut last_source_position: usize = 0; + while let Some(pcnt_pos) = s[last_source_position..] + .find('%') + .map(|p| p + last_source_position) + { + /* Anything in between directives is a literal string. */ + if pcnt_pos > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..pcnt_pos].to_string(), + )); + last_source_position = pcnt_pos; + } + let next_pcnt = s[(pcnt_pos + 1)..] + .find('%') + .map(|p| p + pcnt_pos + 1) + .ok_or_else(|| { + FormatParseError::Search("% directive opened but not closed".to_string()) + })?; + let directive_contents = &s[pcnt_pos..=next_pcnt]; + match directive_contents { + /* An empty directive is a literal percent. */ + "%%" => { + components.push(ParseableFormatComponent::Escaped("%")); + } + /* A single '!' directive is a literal newline. */ + "%!%" => { + components.push(ParseableFormatComponent::Escaped("\n")); + } + "%,%" => { + components.push(ParseableFormatComponent::Escaped("\t")); + } + /* Otherwise, parse the space between percents. */ + d => { + let directive = D::parse_directive(&d[1..(d.len() - 1)]) + .map_err(FormatParseError::Directive)?; + components.push(ParseableFormatComponent::Directive(directive)); + } + } + last_source_position += directive_contents.len(); + } + if s.len() > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..].to_string(), + )); + } + Ok(Self { components }) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum UnixModeFormat { + #[default] + Octal, + Pretty, +} + +impl UnixModeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":octal" => Ok(Self::Octal), + ":pretty" => Ok(Self::Pretty), + _ => Err(ModifierParseError(format!( + "unrecognized unix mode format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TimestampFormat { + DateOnly, + TimeOnly, + #[default] + DateAndTime, +} + +impl TimestampFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":date" => Ok(Self::DateOnly), + ":time" => Ok(Self::TimeOnly), + ":date-time" => Ok(Self::DateAndTime), + _ => Err(ModifierParseError(format!( + "unrecognized timestamp format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CompressionMethodFormat { + Abbreviated, + #[default] + Full, +} + +impl CompressionMethodFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized compression method format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryNumericValueFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl BinaryNumericValueFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized binary numeric value format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum FileTypeFormat { + Abbreviated, + #[default] + Full, +} + +impl FileTypeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized file type format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum EntryFormatDirective { + Name, + FileType(FileTypeFormat), + Comment(BinaryStringFormat), + LocalHeaderStart(OffsetFormat), + ContentStart(OffsetFormat), + ContentEnd(OffsetFormat), + CentralHeaderStart(OffsetFormat), + CompressedSize(ByteSizeFormat), + UncompressedSize(ByteSizeFormat), + UnixMode(UnixModeFormat), + CompressionMethod(CompressionMethodFormat), + CrcValue(BinaryNumericValueFormat), + Timestamp(TimestampFormat), +} + +impl ParseableDirective for EntryFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::Name), + s if s.starts_with("type") => { + let type_fmt = FileTypeFormat::parse(&s["type".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FileType(type_fmt)) + } + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Comment(str_fmt)) + } + s if s.starts_with("header-start") => { + let offset_fmt = OffsetFormat::parse(&s["header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::LocalHeaderStart(offset_fmt)) + } + s if s.starts_with("content-start") => { + let offset_fmt = OffsetFormat::parse(&s["content-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentStart(offset_fmt)) + } + s if s.starts_with("content-end") => { + let offset_fmt = OffsetFormat::parse(&s["content-end".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentEnd(offset_fmt)) + } + s if s.starts_with("central-header-start") => { + let offset_fmt = OffsetFormat::parse(&s["central-header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralHeaderStart(offset_fmt)) + } + s if s.starts_with("compressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["compressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressedSize(size_fmt)) + } + s if s.starts_with("uncompressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["uncompressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UncompressedSize(size_fmt)) + } + s if s.starts_with("unix-mode") => { + let mode_fmt = UnixModeFormat::parse(&s["unix-mode".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UnixMode(mode_fmt)) + } + s if s.starts_with("compression-method") => { + let method_fmt = CompressionMethodFormat::parse(&s["compression-method".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressionMethod(method_fmt)) + } + s if s.starts_with("crc") => { + let num_fmt = BinaryNumericValueFormat::parse(&s["crc".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CrcValue(num_fmt)) + } + s if s.starts_with("timestamp") => { + let ts_fmt = TimestampFormat::parse(&s["timestamp".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Timestamp(ts_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug, Default)] +pub enum FormatSpec { + #[default] + Compact, + Extended, + Custom { + overview: ParseableFormatSpec, + entry: ParseableFormatSpec, + }, +} + +impl FormatSpec { + pub fn parse_format_strings( + archive_format: String, + entry_format: String, + ) -> Result { + let overview = + ParseableFormatSpec::::parse_format(&archive_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse archive format string {archive_format:?}: {e}" + )) + })?; + let entry = ParseableFormatSpec::::parse_format(&entry_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse entry format string {entry_format:?}: {e}" + )) + })?; + Ok(Self::Custom { overview, entry }) + } +} + +#[derive(Debug)] +pub struct Info { + pub format_spec: FormatSpec, + pub match_expr: Option, + pub input_spec: InputSpec, +} + +impl CommandFormat for Info { + const COMMAND_NAME: &'static str = "info"; + const COMMAND_TABS: &'static str = "\t\t"; + const COMMAND_DESCRIPTION: &'static str = + "Print info about archive contents and individual entries."; + + const USAGE_LINE: &'static str = + "[-h|--help] [--extended|--format ] [--expr MATCH-EXPR --expr] [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +By default, a compact representation of the metadata within the top-level +archive and individual entries is printed to stdout. This format, along with the +"extended" format from --extended, is not stable for processing by external +tools. For stable output, a custom format string should be provided with +--format. + +*Note:* the archive metadata is printed *after* the metadata for each entry, +because zip files store metadata at the end of the file! + +Note that the contents of individual entries are not accessible with this +command, and should instead be extracted with the '{}' subcommand, which can +write entries to stdout or a given file path as well as extracted into an +output directory. + + --extended + Print a verbose description of all top-level archive and individual + entry fields. + + --format + Print a custom description of the top-level archive and individual + entry metadata. + + Both format specs must be provided, but empty strings are + accepted. Explicit trailing newlines must be specified and will not be + inserted automatically. + + Note again that archive metadata is printed after all entries + are formatted. + +# Format specs: +Format specs are literal strings interspersed with directives, which are +surrounded by *paired* '%' characters. This is different from typical %-encoded +format strings which only use a single '%'. A doubled '%%' produces a literal +'%', while '%name%' encodes a directive "name". The directives for archive and +entry format strings are different, but certain directives are parsed with +modifier strings which are shared across both format types. These modifiers are +discussed in the section on . + +## Escape characters: +%% + Prints a literal percent '%'. + +%!% + Prints a single literal newline '\n'. + +%,% + Prints a single literal tab character '\t'. + +## Archive format directives: +This is printed at the bottom of the output, after all entries are formatted. + +%name% + The name of the file provided as input, or '' for stdin. + +%size% + The size of the entire archive. + +%num% + The number of entries in the archive. + +%comment% + The archive comment, if provided (otherwise an empty string). + +%offset% + The offset of the first entry's local header from the start of the + file. This is where the zip file content starts, and arbitrary data may be + present in the space before this point. + +%cde-offset% + The offset of the central directory record from the start of the file. This + is where entry contents end, and after this point is only zip metadata until + the end of the file. + +## Entry format directives: +This is printed for each entry. Note again that no newlines are inserted +automatically, so an explicit trailing newline must be provided to avoid writing +all the output to a single line. + +%name% + The name of the entry in the archive. This is the relative path that the + entry would be extracted to. + +%type% + The type of the entry (file, directory, or symlink). + +%comment% + The entry comment, if provided (otherwise an empty string). + +%header-start% + The offset of the entry's local header, which comes before any + entry contents. + +%content-start% + The offset of the entry's possibly-compressed content, which comes after the + local header. + +%content-end% + The offset of the end of the entry's possibly-compressed content. The next + entry's local header begins immediately after. + +%central-header-start% + The offset of the entry's central directory header, at the end of the + zip file. + +%compressed-size% + The size of the entry's possibly-compressed content as stored in + the archive. + +%uncompressed-size% + The size of the entry's content after decompression, as it would be + after extraction. + +%unix-mode% + The mode bits for the entry, if set. If unset, this is interpreted as + a value of 0. + +%compression-method% + The method used to compress the entry. + +%crc% + The CRC32 value for the entry. + +%timestamp% + The timestamp for the entry. + + Note that zip timestamps only have precision down to 2 seconds. + +## Entry format directives: + +## Modifiers : +byte-size = '' [DEFAULT => decimal] + = ':decimal' (decimal numeric representation) + = ':human' (human-abbreviated size e.g. 1K, 1M) + +offset = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +bin-str = '' [DEFAULT => print] + = ':print' (non-unicode chunks are replaced with + the unicode replacement character '�') + = ':escape' (surround with "" and escape each byte as ascii) + = ':write' (write string to output without checking for unicode) + +unix-mode = '' [DEFAULT => octal] + = ':octal' (octal numeric representation) + = ':pretty' (`ls`-like permissions string) + +timestamp = '' [DEFAULT => date-time] + = ':date' (ISO 8601 string representation of date) + = ':time' (HH:MM:SS string representation of time) + = ':date-time' + (ISO 8601 date then HH:MM time joined by a space) + +compression-method + = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of method) + = ':full' (full name of method) + +bin-num = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +file-type = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of file type) + = ':full' (full name of file type) + + +{} + +{} +{} +"#, + Extract::COMMAND_NAME, + Extract::generate_match_expr_help_text(), + Extract::generate_pattern_selector_help_text(true), + Extract::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut format_spec: Option = None; + let mut match_expr: Option = None; + let mut stdin_flag = false; + let mut positional_zips: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Try parsing format specs. */ + b"--extended" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --extended: {prev_spec:?}" + ))); + } + format_spec = Some(FormatSpec::Extended); + } + b"--format" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --format: {prev_spec:?}" + ))); + } + let archive_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + let entry_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + format_spec = Some(FormatSpec::parse_format_strings( + archive_format, + entry_format, + )?); + } + + /* Try parsing match specs! */ + b"--expr" => { + let new_expr = MatchExpression::parse_argv::(&mut argv)?; + if let Some(prev_expr) = match_expr.take() { + return Err(Self::exit_arg_invalid(&format!( + "multiple match expressions provided: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let format_spec = format_spec.unwrap_or_default(); + + Ok(Self { + format_spec, + match_expr, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Info { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::info::execute_info(err, self) + } +} diff --git a/cli/src/compress.rs b/cli/src/compress.rs new file mode 100644 index 000000000..e35058273 --- /dev/null +++ b/cli/src/compress.rs @@ -0,0 +1,502 @@ +use std::{ + fs, + io::{self, Cursor, IsTerminal, Seek, Write}, + mem, + path::Path, +}; + +use zip::{ + unstable::path_to_string, + write::{SimpleFileOptions, ZipWriter}, + CompressionMethod, ZIP64_BYTES_THR, +}; + +use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; + +fn enter_recursive_dir_entries( + err: &mut impl Write, + base_rename: Option, + root: &Path, + writer: &mut ZipWriter, + options: SimpleFileOptions, +) -> Result<(), CommandError> { + let base_dirname: String = base_rename + .unwrap_or_else(|| path_to_string(root).into()) + .trim_end_matches('/') + .to_string(); + writeln!( + err, + "writing top-level directory entry for {base_dirname:?}" + ) + .unwrap(); + writer + .add_directory(&base_dirname, options) + .wrap_err_with(|| format!("error adding top-level directory entry {base_dirname}"))?; + + let mut readdir_stack: Vec<(fs::ReadDir, String)> = vec![( + fs::read_dir(root) + .wrap_err_with(|| format!("error reading directory contents for {}", root.display()))?, + base_dirname, + )]; + while let Some((mut readdir, top_component)) = readdir_stack.pop() { + if let Some(dir_entry) = readdir + .next() + .transpose() + .wrap_err("reading next dir entry")? + { + let mut components: Vec<&str> = readdir_stack.iter().map(|(_, s)| s.as_ref()).collect(); + components.push(&top_component); + + let entry_basename: String = dir_entry.file_name().into_string().map_err(|name| { + CommandError::InvalidArg(format!("failed to decode basename {name:?}")) + })?; + components.push(&entry_basename); + let full_path: String = components.join("/"); + readdir_stack.push((readdir, top_component)); + + let file_type = dir_entry.file_type().wrap_err_with(|| { + format!("failed to read file type for dir entry {dir_entry:?}") + })?; + if file_type.is_symlink() { + let target: String = path_to_string( + fs::read_link(dir_entry.path()) + .wrap_err_with(|| format!("failed to read symlink from {dir_entry:?}"))?, + ) + .into(); + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {full_path} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!( + err, + "writing recursive symlink entry with name {full_path:?} and target {target:?}" + ) + .unwrap(); + writer + .add_symlink(&full_path, &target, options) + .wrap_err_with(|| format!("error adding symlink from {full_path}->{target}"))?; + } else if file_type.is_file() { + writeln!(err, "writing recursive file entry with name {full_path:?}").unwrap(); + let mut f = fs::File::open(dir_entry.path()).wrap_err_with(|| { + format!("error opening file for {full_path} from dir entry {dir_entry:?}") + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&full_path, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {full_path}"))?; + io::copy(&mut f, writer).wrap_err_with(|| { + format!("error copying content for {full_path} from file {f:?}") + })?; + } else { + assert!(file_type.is_dir()); + writeln!( + err, + "writing recursive directory entry with name {full_path:?}" + ) + .unwrap(); + writer + .add_directory(&full_path, options) + .wrap_err_with(|| format!("failed to create directory entry {full_path}"))?; + writeln!( + err, + "adding subdirectories depth-first for recursive directory entry {entry_basename:?}" + ).unwrap(); + let new_readdir = fs::read_dir(dir_entry.path()).wrap_err_with(|| { + format!("failed to read recursive directory contents from {dir_entry:?}") + })?; + readdir_stack.push((new_readdir, entry_basename)); + } + } + } + Ok(()) +} + +pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), CommandError> { + let Compress { + output, + archive_comment, + args, + positional_paths, + } = args; + + let (out, do_append) = match output { + OutputType::File { path, append } => { + if append { + writeln!( + err, + "reading compressed zip from output file path {path:?} for append" + ) + .unwrap(); + match fs::OpenOptions::new() + .read(true) + .write(true) + .create(false) + .open(&path) + { + Ok(f) => { + writeln!(err, "output zip file existed, appending").unwrap(); + (OutputHandle::File(f), true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + writeln!( + err, + "output zip file did not exist, creating new file instead of appending" + ) + .unwrap(); + let out = + OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create new zip output file at {path:?}") + })?); + (out, false) + } + Err(e) => { + return Err(e).wrap_err_with(|| { + format!( + "unexpected error reading zip output file for append at {path:?}" + ) + }); + } + } + } else { + writeln!(err, "writing compressed zip to output file path {path:?}").unwrap(); + let out = OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create output file at {}", path.display()) + })?); + (out, false) + } + } + OutputType::Stdout { allow_tty } => { + writeln!( + err, + "writing to stdout and buffering compressed zip in memory" + ) + .unwrap(); + if io::stdout().is_terminal() && !allow_tty { + /* TODO: maybe figure out some way to ensure --stdout is still the correct flag */ + return Err(CommandError::InvalidArg( + "stdout is a tty, but --stdout was not set".to_string(), + )); + } + let out = OutputHandle::InMem(Cursor::new(Vec::new())); + (out, false) + } + }; + let mut writer = if do_append { + ZipWriter::new_append(out) + .wrap_err("failed to initialize zip writer from existing zip file for append")? + } else { + ZipWriter::new(out) + }; + + if let Some(comment) = archive_comment { + writeln!(err, "comment was provided: {comment:?}").unwrap(); + let comment = comment.into_encoded_bytes(); + writer.set_raw_comment(comment.into()); + } + + let mut options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .large_file(false); + writeln!(err, "default zip entry options: {options:?}").unwrap(); + let mut last_name: Option = None; + let mut symlink_flag: bool = false; + + for arg in args.into_iter() { + match arg { + CompressionArg::CompressionMethod(method) => { + let method = match method { + CompressionMethodArg::Stored => CompressionMethod::Stored, + CompressionMethodArg::Deflate => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethodArg::Zstd => CompressionMethod::Zstd, + }; + writeln!(err, "setting compression method {method:?}").unwrap(); + options = options.compression_method(method); + } + CompressionArg::Level(CompressionLevel(level)) => { + writeln!(err, "setting compression level {level:?}").unwrap(); + options = options.compression_level(Some(level)); + } + CompressionArg::UnixPermissions(UnixPermissions(mode)) => { + writeln!(err, "setting file mode {mode:#o}").unwrap(); + options = options.unix_permissions(mode); + } + CompressionArg::LargeFile(large_file) => { + writeln!(err, "setting large file flag to {large_file:?}").unwrap(); + options = options.large_file(large_file); + } + CompressionArg::Name(name) => { + writeln!(err, "setting name of next entry to {name:?}").unwrap(); + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "got two names before an entry: {last_name} and {name}" + ))); + } + last_name = Some(name); + } + CompressionArg::Dir => { + writeln!(err, "writing dir entry").unwrap(); + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before dir entry".to_string(), + )); + } + let dirname = last_name.take().ok_or_else(|| { + CommandError::InvalidArg("no name provided before dir entry".to_string()) + })?; + writer + .add_directory(&dirname, options) + .wrap_err_with(|| format!("failed to create dir entry {dirname}"))?; + } + CompressionArg::Symlink => { + writeln!(err, "setting symlink flag for next entry").unwrap(); + if symlink_flag { + /* TODO: make this a warning? */ + return Err(CommandError::InvalidArg( + "symlink flag provided twice before entry".to_string(), + )); + } + symlink_flag = true; + } + CompressionArg::Immediate(data) => { + let name = last_name.take().ok_or_else(|| { + CommandError::InvalidArg(format!( + "no name provided for immediate data {data:?}" + )) + })?; + /* It's highly unlikely any OS allows process args of this length, so even though + * we're using rust's env::args_os() and it would be very impressive for an attacker + * to get CLI args to overflow, it seems likely to be inefficient in any case, and + * very unlikely to be useful, so exit with a clear error. */ + if data.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "length of immediate data argument is {}; use a file for inputs over {} bytes", + data.len(), + ZIP64_BYTES_THR + ))); + }; + if symlink_flag { + /* This is a symlink entry. */ + let target = data.into_string().map_err(|target| { + CommandError::InvalidArg(format!( + "failed to decode immediate symlink target {target:?}" + )) + })?; + writeln!( + err, + "writing immediate symlink entry with name {name:?} and target {target:?}" + ) + .unwrap(); + /* TODO: .add_symlink() should support OsString targets! */ + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to created symlink entry {name}->{target}") + })?; + symlink_flag = false; + } else { + /* This is a file entry. */ + writeln!( + err, + "writing immediate file entry with name {name:?} and data {data:?}" + ) + .unwrap(); + let data = data.into_encoded_bytes(); + writer + .start_file(&name, options) + .wrap_err_with(|| format!("failed to create file entry {name}"))?; + writer.write_all(data.as_ref()).wrap_err_with(|| { + format!( + "failed writing immediate data of length {} to file entry {name}", + data.len() + ) + })?; + } + } + CompressionArg::FilePath(path) => { + let name = last_name + .take() + .unwrap_or_else(|| path_to_string(&path).into()); + if symlink_flag { + /* This is a symlink entry. */ + let target: String = + path_to_string(fs::read_link(&path).wrap_err_with(|| { + format!("failed to read symlink from path {}", path.display()) + })?) + .into(); + /* Similarly to immediate data arguments, we're simply not going to support + * symlinks over this length, which should be impossible anyway. */ + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {name} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!(err, "writing symlink entry from path {path:?} with name {name:?} and target {target:?}").unwrap(); + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to create symlink entry for {name}->{target}") + })?; + symlink_flag = false; + } else { + /* This is a file entry. */ + writeln!( + err, + "writing file entry from path {path:?} with name {name:?}" + ) + .unwrap(); + let mut f = fs::File::open(&path).wrap_err_with(|| { + format!("error opening file for {name} at {}", path.display()) + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + writeln!(err, "entry is {input_len} bytes long").unwrap(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&name, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {name}"))?; + io::copy(&mut f, &mut writer).wrap_err_with(|| { + format!("error copying content for {name} from file {f:?}") + })?; + } + } + CompressionArg::RecursiveDirPath(r) => { + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before recursive dir entry".to_string(), + )); + } + writeln!( + err, + "writing recursive dir entries for path {r:?} with name {last_name:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, last_name.take(), &r, &mut writer, options)?; + } + } + } + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag remaining after all entry flags processed".to_string(), + )); + } + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "name {last_name} remaining after all entry flags processed" + ))); + } + + for pos_arg in positional_paths.into_iter() { + let file_type = fs::symlink_metadata(&pos_arg) + .wrap_err_with(|| format!("failed to read metadata from path {}", pos_arg.display()))? + .file_type(); + if file_type.is_symlink() { + let target = fs::read_link(&pos_arg).wrap_err_with(|| { + format!("failed to read symlink content from {}", pos_arg.display()) + })?; + writeln!( + err, + "writing positional symlink entry with path {pos_arg:?} and target {target:?}" + ) + .unwrap(); + writer + .add_symlink_from_path(&pos_arg, &target, options) + .wrap_err_with(|| { + format!( + "failed to create symlink entry for {}->{}", + pos_arg.display(), + target.display() + ) + })?; + } else if file_type.is_file() { + writeln!(err, "writing positional file entry with path {pos_arg:?}").unwrap(); + let mut f = fs::File::open(&pos_arg) + .wrap_err_with(|| format!("failed to open file at {}", pos_arg.display()))?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file_from_path(&pos_arg, maybe_large_file_options) + .wrap_err_with(|| format!("failed to create file entry {}", pos_arg.display()))?; + io::copy(&mut f, &mut writer) + .wrap_err_with(|| format!("failed to copy file contents from {f:?}"))?; + } else { + assert!(file_type.is_dir()); + writeln!( + err, + "writing positional recursive dir entry for {pos_arg:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, None, &pos_arg, &mut writer, options)?; + } + } + + let handle = writer + .finish() + .wrap_err("failed to write zip to output handle")?; + match handle { + OutputHandle::File(f) => { + let archive_len: u64 = f + .metadata() + .wrap_err_with(|| format!("failed reading metadata from file {f:?}"))? + .len(); + writeln!(err, "file archive {f:?} was {archive_len} bytes").unwrap(); + mem::drop(f); /* Superfluous explicit drop. */ + } + OutputHandle::InMem(mut cursor) => { + let archive_len: u64 = cursor.position(); + writeln!(err, "in-memory archive was {archive_len} bytes").unwrap(); + cursor.rewind().wrap_err("failed to rewind cursor")?; + let mut stdout = io::stdout().lock(); + io::copy(&mut cursor, &mut stdout) + .wrap_err("failed to copy {archive_len} byte archive to stdout")?; + } + } + + Ok(()) +} diff --git a/cli/src/extract.rs b/cli/src/extract.rs new file mode 100644 index 000000000..f5aaa28c7 --- /dev/null +++ b/cli/src/extract.rs @@ -0,0 +1,195 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fs, + io::{self, Read, Write}, + rc::Rc, +}; + +use zip::read::{ZipArchive, ZipFile}; + +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub mod entries; +pub mod matcher; +pub mod named_outputs; +pub mod receiver; +pub mod transform; +use entries::{IterateEntries, StreamInput, ZipFileInput}; +use receiver::{CompiledEntrySpec, EntryData, EntryKind, EntryReceiver, ExtractEntry}; + +fn maybe_process_symlink<'a, 't>( + entry: &mut ZipFile<'a>, + err: &Rc>, + symlink_target: &'t mut Vec, +) -> Result, CommandError> { + let (kind, size) = { + /* FIXME: the ZipFile<'a> struct contains a *mutable* reference to the parent archive, + * and this actually imposes a mutable reference upon any references to the + * immutable ZipFileData contents. This means we cannot have any immutable + * references to the ZipFileData contents at the same time as a mutable + * reference. What this means here is that we have to create a temporary EntryData + * struct and then immediately throw it away in order to be able to read the entry + * contents with io::Read. ZipEntry<'a, R> from + * https://github.com/zip-rs/zip2/pull/233 avoids this issue!!! */ + let data = EntryData::from_entry(&entry); + (data.kind, data.uncompressed_size) + }; + if !matches!(kind, EntryKind::Symlink) { + return Ok(None); + } + + /* We can't read the entry name from EntryData because we can't have any immutable + * references to ZipFileData like the name at the same time we use the entry as + * a reader! That means our log message here is very unclear! */ + writeln!(&mut err.borrow_mut(), "reading symlink target").unwrap(); + /* Re-use the vector allocation, but make sure to avoid re-using the symlink data from + * a previous iteration. */ + symlink_target.clear(); + entry + .read_to_end(symlink_target) + .wrap_err("failed to read symlink target from zip archive entry")?; + debug_assert_eq!(symlink_target.len(), size.try_into().unwrap()); + Ok(Some(symlink_target)) +} + +fn process_entry<'a, 'w, 'c, 'it>( + mut entry: ZipFile<'a>, + err: &Rc>, + compiled_specs: impl Iterator>, + copy_buf: &mut [u8], + symlink_target: &mut Vec, + deduped_concat_writers: &mut Vec<&'c Rc>>, + matching_handles: &mut Vec>, +) -> Result<(), CommandError> +where + 'w: 'it, + 'it: 'c, +{ + deduped_concat_writers.clear(); + matching_handles.clear(); + + let symlink_target = maybe_process_symlink(&mut entry, err, symlink_target)?; + /* We dropped any mutable handles to the entry, so now we can access its metadata again. */ + let data = EntryData::from_entry(&entry); + + let mut deduped_matching_extracts: Vec<(&'c Rc, Vec>)> = + Vec::new(); + for matching_spec in compiled_specs.filter_map(|spec| spec.try_match_and_transform(&data)) { + if matching_spec.is_nested_duplicate(deduped_concat_writers, &mut deduped_matching_extracts) + { + writeln!(&mut err.borrow_mut(), "skipping repeated output").unwrap(); + } + } + + matching_handles.extend( + deduped_matching_extracts + .into_iter() + .flat_map(|(recv, names)| names.into_iter().map(move |n| (recv, n))) + .map(|(recv, name)| recv.generate_entry_handle(&data, symlink_target.as_deref(), name)) + .collect::, _>>()? + .into_iter() + .flatten(), + ); + + let mut read_len: usize; + loop { + read_len = entry.read(copy_buf).wrap_err("read of entry failed")?; + if read_len == 0 { + break; + } + let cur_data: &[u8] = ©_buf[..read_len]; + for concat_writer in deduped_concat_writers.iter() { + concat_writer + .borrow_mut() + .write_all(cur_data) + .wrap_err("failed to write data to concat output")?; + } + for extract_writer in matching_handles.iter_mut() { + extract_writer + .write_all(cur_data) + .wrap_err("failed to write data to extract output")?; + } + } + + Ok(()) +} + +pub fn execute_extract(err: impl Write, extract: Extract) -> Result<(), CommandError> { + let Extract { + output_specs, + entry_specs, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = extract; + let err = Rc::new(RefCell::new(err)); + + writeln!(&mut err.borrow_mut(), "entry specs: {entry_specs:?}").unwrap(); + let compiled_specs = + named_outputs::process_entry_and_output_specs(err.clone(), entry_specs, output_specs)?; + writeln!(&mut err.borrow_mut(), "compiled specs: {compiled_specs:?}").unwrap(); + + let mut copy_buf: Vec = vec![0u8; 1024 * 16]; + let mut symlink_target: Vec = Vec::new(); + + let mut deduped_concat_writers: Vec<&Rc>> = Vec::new(); + let mut matching_handles: Vec> = Vec::new(); + + if stdin_stream { + writeln!(&mut err.borrow_mut(), "extracting from stdin").unwrap(); + let mut stdin = StreamInput::new(io::stdin().lock()); + + while let Some(entry) = stdin.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + for p in zip_paths.into_iter() { + writeln!( + &mut err.borrow_mut(), + "extracting from zip input file {p:?}", + ) + .unwrap(); + let zip = fs::File::open(&p) + .wrap_err_with(|| format!("failed to open zip input file path {p:?}")) + .and_then(|f| { + ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive for file {p:?}")) + })?; + let mut zip_entries = ZipFileInput::new(Box::new(zip)); + + while let Some(entry) = zip_entries.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + /* Finalize all extract entries. */ + for spec in compiled_specs.into_iter() { + match spec { + CompiledEntrySpec::Concat(_) => (), + CompiledEntrySpec::Extract(ExtractEntry { recv, .. }) => { + recv.finalize_entries()?; + } + } + } + + Ok(()) +} diff --git a/cli/src/extract/entries.rs b/cli/src/extract/entries.rs new file mode 100644 index 000000000..bb46fb79b --- /dev/null +++ b/cli/src/extract/entries.rs @@ -0,0 +1,132 @@ +use std::{fs, io, ops}; + +use zip::{ + read::{read_zipfile_from_stream, ZipFile}, + ZipArchive, +}; + +use crate::{CommandError, WrapCommandErr}; + +pub trait IterateEntries { + fn next_entry(&mut self) -> Result, CommandError>; +} + +pub struct ReadChecker { + inner: R, + bytes_read: u64, +} + +impl ReadChecker { + pub const fn current_bytes_read(&self) -> u64 { + self.bytes_read + } +} + +impl ReadChecker +where + R: io::Read, +{ + pub fn exhaust(mut self) -> io::Result<(R, u64)> { + io::copy(&mut self, &mut io::sink())?; + let Self { inner, bytes_read } = self; + Ok((inner, bytes_read)) + } +} + +impl io::Read for ReadChecker +where + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = self.inner.read(buf)?; + let num_read: u64 = n.try_into().unwrap(); + self.bytes_read += num_read; + Ok(n) + } +} + +pub struct StreamInput { + inner: ReadChecker, + entries_read: usize, +} + +impl StreamInput { + pub fn new(inner: R) -> Self { + Self { + inner: ReadChecker { + inner, + bytes_read: 0, + }, + entries_read: 0, + } + } + + pub fn into_inner(self) -> (ReadChecker, usize) { + let Self { + inner, + entries_read, + } = self; + (inner, entries_read) + } +} + +impl IterateEntries for StreamInput +where + R: io::Read, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if let Some(entry) = read_zipfile_from_stream(&mut self.inner) + .wrap_err("failed to read zip entries from stdin")? + { + self.entries_read += 1; + Ok(Some(entry)) + } else { + Ok(None) + } + } +} + +#[derive(Debug)] +pub struct ZipFileInput { + inner: A, + file_counter: usize, +} + +impl ZipFileInput { + pub fn new(inner: A) -> Self { + Self { + inner, + file_counter: 0, + } + } +} + +impl ZipFileInput +where + A: ops::Deref>, +{ + pub fn remaining(&self) -> usize { + self.inner.len() - self.file_counter + } + + pub fn none_left(&self) -> bool { + self.remaining() == 0 + } +} + +impl IterateEntries for ZipFileInput +where + A: ops::DerefMut>, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if self.none_left() { + return Ok(None); + } + let prev_counter = self.file_counter; + self.file_counter += 1; + self.inner + .by_index(prev_counter) + .map(Some) + .wrap_err_with(|| format!("failed to read entry #{prev_counter} from zip",)) + } +} diff --git a/cli/src/extract/matcher.rs b/cli/src/extract/matcher.rs new file mode 100644 index 000000000..9e3eb463f --- /dev/null +++ b/cli/src/extract/matcher.rs @@ -0,0 +1,528 @@ +use std::{borrow::Cow, fmt}; + +#[cfg(feature = "glob")] +use glob; +#[cfg(feature = "rx")] +use regex; + +use zip::CompressionMethod; + +use super::receiver::{EntryData, EntryKind}; +use super::transform::ComponentSplit; +use crate::{args::extract::*, CommandError}; + +#[inline(always)] +fn process_component_selector<'s>(sel: ComponentSelector, name: &'s str) -> Option<&'s str> { + ComponentSplit::split_by_component_selector(sel, name).map(|split| match split { + ComponentSplit::LeftAnchored { selected_left, .. } => selected_left, + ComponentSplit::RightAnchored { selected_right, .. } => selected_right, + ComponentSplit::Whole(s) => s, + }) +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SearchAnchoring { + #[default] + Unanchored, + LeftAnchored, + RightAnchored, + DoublyAnchored, +} + +impl SearchAnchoring { + pub const fn from_prefix_suffix_flags(prefix_anchored: bool, suffix_anchored: bool) -> Self { + match (prefix_anchored, suffix_anchored) { + (true, true) => Self::DoublyAnchored, + (true, false) => Self::LeftAnchored, + (false, true) => Self::RightAnchored, + (false, false) => Self::Unanchored, + } + } + + pub fn wrap_regex_pattern<'s>(self, pattern: &'s str) -> Cow<'s, str> { + match self { + Self::Unanchored => Cow::Borrowed(pattern), + Self::LeftAnchored => Cow::Owned(format!("^(?:{pattern})")), + Self::RightAnchored => Cow::Owned(format!("(?:{pattern})$")), + Self::DoublyAnchored => Cow::Owned(format!("^(?:{pattern})$")), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CaseSensitivity { + #[default] + Sensitive, + Insensitive, +} + +impl CaseSensitivity { + pub const fn from_case_insensitive_flag(case_insensitive: bool) -> Self { + match case_insensitive { + true => Self::Insensitive, + false => Self::Sensitive, + } + } + + pub fn string_equal(self, a: &str, b: &str) -> bool { + match self { + Self::Insensitive => a.eq_ignore_ascii_case(b), + Self::Sensitive => a == b, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MatchModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, +} + +impl MatchModifiers { + pub fn from_flags(flags: PatternModifierFlags) -> Result { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + if multiple_matches { + return Err(CommandError::InvalidArg(format!( + "multimatch modifier :g is unused in match expressions: {flags:?}" + ))); + } + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Ok(Self { anchoring, case }) + } +} + +trait NameMatcher: fmt::Debug { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized; + fn matches(&self, input: &str) -> bool; +} + +#[derive(Debug)] +struct LiteralMatcher { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, +} + +impl NameMatcher for LiteralMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + }) + } + + fn matches(&self, input: &str) -> bool { + if input.len() < self.lit.len() { + return false; + } + match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Insensitive => input.to_ascii_uppercase().contains(&self.lit), + CaseSensitivity::Sensitive => input.contains(&self.lit), + }, + SearchAnchoring::DoublyAnchored => self.case.string_equal(&self.lit, input), + SearchAnchoring::LeftAnchored => { + let prefix = &input[..self.lit.len()]; + self.case.string_equal(&self.lit, prefix) + } + SearchAnchoring::RightAnchored => { + let suffix = &input[(input.len() - self.lit.len())..]; + self.case.string_equal(&self.lit, suffix) + } + } + } +} + +#[derive(Debug)] +#[cfg(feature = "glob")] +struct GlobMatcher { + pat: glob::Pattern, + glob_opts: glob::MatchOptions, +} + +#[cfg(feature = "glob")] +impl NameMatcher for GlobMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { anchoring, case } = opts; + if !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "anchored search with :p or :s is incompatible with glob patterns: {opts:?}" + ))); + } + let glob_opts = glob::MatchOptions { + case_sensitive: match case { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + }, + ..Default::default() + }; + let pat = glob::Pattern::new(&pattern).map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct glob matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, glob_opts }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.matches_with(input, self.glob_opts) + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexMatcher { + pat: regex::Regex, +} + +#[cfg(feature = "rx")] +impl NameMatcher for RegexMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Sensitive => false, + CaseSensitivity::Insensitive => true, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.is_match(input) + } +} + +pub trait EntryMatcher: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn matches(&self, entry: &EntryData) -> bool; +} + +#[derive(Debug, Copy, Clone)] +enum TrivialMatcher { + True, + False, +} + +impl EntryMatcher for TrivialMatcher { + type Arg = TrivialPredicate where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialPredicate::True => Self::True, + TrivialPredicate::False => Self::False, + }) + } + + fn matches(&self, _entry: &EntryData) -> bool { + match self { + Self::True => true, + Self::False => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum EntryTypeMatcher { + File, + Dir, + Symlink, +} + +impl EntryMatcher for EntryTypeMatcher { + type Arg = EntryType where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + EntryType::File => Self::File, + EntryType::Dir => Self::Dir, + EntryType::Symlink => Self::Symlink, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match (self, entry.kind) { + (Self::File, EntryKind::File) => true, + (Self::Dir, EntryKind::Dir) => true, + (Self::Symlink, EntryKind::Symlink) => true, + _ => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum NonSpecificMethods { + Any, + Known, +} + +impl EntryMatcher for NonSpecificMethods { + type Arg = NonSpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + NonSpecificCompressionMethodArg::Any => Self::Any, + NonSpecificCompressionMethodArg::Known => Self::Known, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Any => true, + Self::Known => { + SpecificCompressionMethodArg::KNOWN_COMPRESSION_METHODS.contains(&entry.compression) + } + } + } +} + +#[derive(Debug)] +struct SpecificMethods { + specific_method: CompressionMethod, +} + +impl EntryMatcher for SpecificMethods { + type Arg = SpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + specific_method: arg.translate_to_zip(), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + self.specific_method == entry.compression + } +} + +#[derive(Debug, Copy, Clone)] +enum DepthLimit { + Max(usize), + Min(usize), +} + +impl EntryMatcher for DepthLimit { + type Arg = DepthLimitArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + DepthLimitArg::Max(max) => Self::Max(max.into()), + DepthLimitArg::Min(min) => Self::Min(min.into()), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + let num_components = entry.name.split('/').count(); + match self { + Self::Max(max) => num_components <= *max, + Self::Min(min) => num_components >= *min, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum Size { + Max(u64), + Min(u64), +} + +impl EntryMatcher for Size { + type Arg = SizeArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + SizeArg::Max(max) => Self::Max(max), + SizeArg::Min(min) => Self::Min(min), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Max(max) => entry.uncompressed_size <= *max, + Self::Min(min) => entry.uncompressed_size >= *min, + } + } +} + +#[derive(Debug)] +struct PatternMatcher { + matcher: Box, + comp_sel: ComponentSelector, +} + +impl EntryMatcher for PatternMatcher { + type Arg = MatchArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let MatchArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + } = arg; + + let opts = MatchModifiers::from_flags(modifiers)?; + let matcher: Box = match pat_sel { + PatternSelectorType::Glob => { + #[cfg(feature = "glob")] + { + Box::new(GlobMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "glob"))] + { + return Err(CommandError::InvalidArg(format!( + "glob patterns were requested, but this binary was built without the \"glob\" feature: {pattern:?}" + ))); + } + } + + PatternSelectorType::Literal => Box::new(LiteralMatcher::create(pattern, opts)?), + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { matcher, comp_sel }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match process_component_selector(self.comp_sel, entry.name) { + None => false, + Some(s) => self.matcher.matches(s), + } + } +} + +#[derive(Debug)] +pub enum CompiledMatcher { + Primitive(Box), + Negated(Box), + And { + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, +} + +impl CompiledMatcher { + fn create_primitive(arg: Predicate) -> Result { + Ok(Self::Primitive(match arg { + Predicate::Trivial(arg) => Box::new(TrivialMatcher::from_arg(arg)?), + Predicate::EntryType(arg) => Box::new(EntryTypeMatcher::from_arg(arg)?), + Predicate::CompressionMethod(method_arg) => match method_arg { + CompressionMethodArg::NonSpecific(arg) => { + Box::new(NonSpecificMethods::from_arg(arg)?) + } + CompressionMethodArg::Specific(arg) => Box::new(SpecificMethods::from_arg(arg)?), + }, + Predicate::DepthLimit(arg) => Box::new(DepthLimit::from_arg(arg)?), + Predicate::Size(arg) => Box::new(Size::from_arg(arg)?), + Predicate::Match(arg) => Box::new(PatternMatcher::from_arg(arg)?), + })) + } +} + +impl EntryMatcher for CompiledMatcher { + type Arg = MatchExpression where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + MatchExpression::PrimitivePredicate(pred) => Self::create_primitive(pred)?, + MatchExpression::Negated(arg) => Self::Negated(Box::new(Self::from_arg(*arg)?)), + MatchExpression::And { + explicit: _, + left, + right, + } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::And { left, right } + } + MatchExpression::Or { left, right } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::Or { left, right } + } + MatchExpression::Grouped(inner) => Self::from_arg(*inner)?, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Primitive(m) => m.matches(entry), + Self::Negated(m) => !m.matches(entry), + Self::And { left, right } => left.matches(entry) && right.matches(entry), + Self::Or { left, right } => left.matches(entry) || right.matches(entry), + } + } +} diff --git a/cli/src/extract/named_outputs.rs b/cli/src/extract/named_outputs.rs new file mode 100644 index 000000000..535cde155 --- /dev/null +++ b/cli/src/extract/named_outputs.rs @@ -0,0 +1,347 @@ +use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, + fs, + io::{self, Seek, Write}, + path::PathBuf, + rc::Rc, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::receiver::{ + CompiledEntrySpec, ConcatEntry, EntryReceiver, ExtractEntry, FilesystemReceiver, +}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub fn process_entry_and_output_specs<'w>( + err: Rc>, + entry_specs: impl IntoIterator, + output_specs: OutputSpecs, +) -> Result>, CommandError> { + let mut entry_specs: Vec = entry_specs + .into_iter() + .map(ParsedEntrySpecArg::from_entry_spec) + .collect::>()?; + if entry_specs.is_empty() { + entry_specs.push(ParsedEntrySpecArg { + matcher: None, + transforms: None, + output_name: OutputName::default_name(), + }); + } + let parsed_outputs = ParsedNamedOutputs::from_output_specs(err, output_specs)?; + parsed_outputs.process_entry_specs_for_outputs(entry_specs) +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct OutputName(pub String); + +impl OutputName { + pub fn default_name() -> Self { + Self("default".to_string()) + } +} + +struct ParsedEntrySpecArg { + pub matcher: Option, + pub transforms: Option, + pub output_name: OutputName, +} + +impl ParsedEntrySpecArg { + pub fn from_entry_spec(spec: EntrySpec) -> Result { + let EntrySpec { + match_expr, + name_transforms, + content_transform, + } = spec; + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let transforms = if name_transforms.is_empty() { + None + } else { + Some(CompiledTransformer::from_arg(name_transforms)?) + }; + let output_name = match content_transform { + ContentTransform::Extract { name } => name + .map(OutputName) + .unwrap_or_else(OutputName::default_name), + }; + Ok(Self { + matcher, + transforms, + output_name, + }) + } +} + +struct NamedOutputsBuilder<'w, W> { + err: Rc>, + concats: HashMap>>, + extracts: HashMap>, + seen_stdout: bool, + seen_files: HashSet, + seen_dirs: HashSet, + seen_names: HashSet, +} + +impl<'w, W> NamedOutputsBuilder<'w, W> { + pub fn new(err: Rc>) -> Self { + Self { + err, + concats: HashMap::new(), + extracts: HashMap::new(), + seen_stdout: false, + seen_files: HashSet::new(), + seen_dirs: HashSet::new(), + seen_names: HashSet::new(), + } + } + + pub fn into_tables( + self, + ) -> ( + HashMap>>, + HashMap>, + ) { + let Self { + concats, extracts, .. + } = self; + (concats, extracts) + } + + fn add_name( + &mut self, + name: OutputName, + f: impl FnOnce() -> Result, + ) -> Result { + if self.seen_names.contains(&name) { + return Err(CommandError::InvalidArg(format!( + "output name {name:?} provided more than once" + ))); + } + + let ret = f()?; + + assert!(self.seen_names.insert(name)); + + Ok(ret) + } + + fn add_concat( + &mut self, + name: OutputName, + handle: impl Write + 'w, + ) -> Result<(), CommandError> { + /* This should be assured by the check against self.seen_names. */ + assert!(!self.concats.contains_key(&name)); + + let handle = Rc::new(RefCell::new(handle)); + + assert!(self.concats.insert(name, handle).is_none()); + + Ok(()) + } + + pub fn add_stdout(&mut self, name: OutputName) -> Result<(), CommandError> { + if self.seen_stdout { + return Err(CommandError::InvalidArg( + "--stdout output provided for more than one receiver".to_string(), + )); + } + + let handle = self.add_name(name.clone(), || Ok(io::stdout()))?; + self.add_concat(name, handle)?; + + self.seen_stdout = true; + Ok(()) + } + + fn add_seen_file(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing path {path:?} failed"))?; + + if self.seen_files.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output file path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_files.insert(canon_path)); + + Ok(()) + } + + pub fn add_file( + &mut self, + path: PathBuf, + append: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let handle = self.add_name(name.clone(), || { + let mut f: fs::File = if append { + fs::OpenOptions::new() + .write(true) + .create(true) + .open(&path) + .wrap_err_with(|| format!("failed to open file for append at {path:?}"))? + } else { + fs::File::create(&path) + .wrap_err_with(|| format!("failed to open file with truncation at {path:?}"))? + }; + f.seek(io::SeekFrom::End(0)) + .wrap_err_with(|| format!("failed to seek to end of opened file {f:?}"))?; + Ok(f) + })?; + self.add_seen_file(path)?; + self.add_concat(name, handle)?; + Ok(()) + } + + fn add_seen_dir(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing dir path {path:?} failed"))?; + if self.seen_dirs.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output dir path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_dirs.insert(canon_path)); + + Ok(()) + } + + fn add_extract( + &mut self, + name: OutputName, + handle: impl EntryReceiver + 'w, + ) -> Result<(), CommandError> { + assert!(!self.extracts.contains_key(&name)); + + let handle = Rc::new(handle); + + assert!(self.extracts.insert(name, handle).is_none()); + + Ok(()) + } +} + +impl<'w, W> NamedOutputsBuilder<'w, W> +where + W: Write + 'w, +{ + pub fn add_dir( + &mut self, + output_dir: PathBuf, + mkdir: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let err = self.err.clone(); + let handle = self.add_name(name.clone(), || { + if mkdir { + fs::create_dir_all(&output_dir).wrap_err_with(|| { + format!("failed to create output directory {output_dir:?}") + })?; + }; + Ok(FilesystemReceiver::new(err, output_dir.clone())) + })?; + self.add_seen_dir(output_dir.clone())?; + self.add_extract(name, handle)?; + Ok(()) + } +} + +struct ParsedNamedOutputs<'w> { + concats: HashMap>>, + extracts: HashMap>, +} + +impl<'w> ParsedNamedOutputs<'w> { + pub fn process_entry_specs_for_outputs( + self, + args: impl IntoIterator, + ) -> Result>, CommandError> { + args.into_iter() + .map(|arg| self.lookup_entry_spec_arg(arg)) + .collect() + } + + fn lookup_entry_spec_arg( + &self, + arg: ParsedEntrySpecArg, + ) -> Result, CommandError> { + let ParsedEntrySpecArg { + matcher, + transforms, + output_name, + } = arg; + if let Some(stream) = self.concats.get(&output_name) { + if transforms.is_some() { + return Err(CommandError::InvalidArg(format!( + "entry name transforms do not apply to concat output {output_name:?}" + ))); + } + return Ok(CompiledEntrySpec::Concat(ConcatEntry { + matcher, + stream: stream.clone(), + })); + } + let Some(recv) = self.extracts.get(&output_name) else { + return Err(CommandError::InvalidArg(format!( + "output name {output_name:?} was not found" + ))); + }; + Ok(CompiledEntrySpec::Extract(ExtractEntry { + matcher, + transforms, + recv: recv.clone(), + })) + } + + pub fn from_output_specs( + err: Rc>, + spec: OutputSpecs, + ) -> Result { + let OutputSpecs { default, named } = spec; + + let mut builder = NamedOutputsBuilder::new(err); + + if let Some(default) = default { + let name = OutputName::default_name(); + match default { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + for NamedOutput { name, output } in named.into_iter() { + let name = OutputName(name); + match output { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + + let (concats, extracts) = builder.into_tables(); + Ok(Self { concats, extracts }) + } +} diff --git a/cli/src/extract/receiver.rs b/cli/src/extract/receiver.rs new file mode 100644 index 000000000..6495ccd60 --- /dev/null +++ b/cli/src/extract/receiver.rs @@ -0,0 +1,386 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fmt, fs, + io::{self, Write}, + mem, + path::{Path, PathBuf}, + rc::Rc, +}; + +use zip::{ + extra_fields::{ExtendedTimestamp, ExtraField}, + read::ZipFile, + CompressionMethod, DateTime, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{CommandError, WrapCommandErr}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum EntryKind { + File, + Dir, + Symlink, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct EntryData<'a> { + pub name: &'a str, + pub kind: EntryKind, + pub compression: CompressionMethod, + pub unix_mode: Option, + pub comment: &'a str, + pub uncompressed_size: u64, + pub compressed_size: u64, + pub local_header_start: u64, + pub content_start: u64, + pub central_header_start: u64, + pub crc32: u32, + pub last_modified_time: Option, + pub extended_timestamp: Option, +} + +impl<'a> EntryData<'a> { + #[inline(always)] + pub fn from_entry<'b>(entry: &'a ZipFile<'b>) -> Self { + Self { + name: entry.name(), + kind: if entry.is_dir() { + EntryKind::Dir + } else if entry.is_symlink() { + EntryKind::Symlink + } else { + EntryKind::File + }, + compression: entry.compression(), + unix_mode: entry.unix_mode(), + comment: entry.comment(), + uncompressed_size: entry.size(), + compressed_size: entry.compressed_size(), + local_header_start: entry.header_start(), + content_start: entry.data_start(), + central_header_start: entry.central_header_start(), + crc32: entry.crc32(), + last_modified_time: entry.last_modified(), + extended_timestamp: entry + .extra_data_fields() + .find_map(|f| match f { + ExtraField::ExtendedTimestamp(ts) => Some(ts), + }) + .cloned(), + } + } + + #[inline(always)] + pub const fn content_end(&self) -> u64 { + self.content_start + self.compressed_size + } +} + +pub struct ConcatEntry<'w> { + pub matcher: Option, + pub stream: Rc>, +} + +impl<'w> fmt::Debug for ConcatEntry<'w> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ConcatEntry {{ matcher: {:?}, stream: {:p} }}", + &self.matcher, &self.stream + ) + } +} + +impl<'w> ConcatEntry<'w> { + pub fn do_match<'a>(&self, data: &EntryData<'a>) -> Option<&Rc>> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + Some(&self.stream) + } else { + None + } + } +} + +#[derive(Debug)] +pub struct ExtractEntry<'w> { + pub matcher: Option, + pub transforms: Option, + pub recv: Rc, +} + +impl<'w> ExtractEntry<'w> { + pub fn do_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option<(Cow<'a, str>, &Rc)> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + let new_name = self + .transforms + .as_ref() + .map(|t| t.transform_name(data.name)) + .unwrap_or_else(|| Cow::Borrowed(data.name)); + Some((new_name, &self.recv)) + } else { + None + } + } +} + +#[derive(Debug)] +pub enum CompiledEntrySpec<'w> { + Concat(ConcatEntry<'w>), + Extract(ExtractEntry<'w>), +} + +impl<'w> CompiledEntrySpec<'w> { + pub fn try_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option> { + match self { + Self::Concat(c) => c.do_match(data).map(MatchingEntrySpec::Concat), + Self::Extract(e) => e + .do_match_and_transform(data) + .map(|(n, p)| MatchingEntrySpec::Extract(n, p)), + } + } +} + +pub enum MatchingEntrySpec<'a, 'c, 'w> { + Concat(&'c Rc>), + Extract(Cow<'a, str>, &'c Rc), +} + +impl<'a, 'c, 'w> MatchingEntrySpec<'a, 'c, 'w> { + /* Split output handles for concat, and split generated handles by extract source and + * name. use Rc::ptr_eq() to split, and Cow::<'s, str>::eq() with str AsRef. */ + pub fn is_nested_duplicate( + self, + deduped_concat_writers: &mut Vec<&'c Rc>>, + deduped_matching_extracts: &mut Vec<(&'c Rc, Vec>)>, + ) -> bool { + match self { + MatchingEntrySpec::Concat(concat_writer) => { + if deduped_concat_writers + .iter() + .any(|p| Rc::ptr_eq(p, &concat_writer)) + { + true + } else { + deduped_concat_writers.push(concat_writer); + false + } + } + MatchingEntrySpec::Extract(name, extract_receiver) => { + if let Some((_, names)) = deduped_matching_extracts + .iter_mut() + .find(|(p, _)| Rc::ptr_eq(p, &extract_receiver)) + { + if names.iter().any(|n| n.as_ref() == name.as_ref()) { + true + } else { + names.push(name); + false + } + } else { + deduped_matching_extracts.push((extract_receiver, vec![name])); + false + } + } + } + } +} + +pub trait EntryReceiver: fmt::Debug { + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError>; + + fn finalize_entries(&self) -> Result<(), CommandError>; +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg(unix)] +struct PermsEntry { + path: PathBuf, + mode: u32, +} + +pub struct FilesystemReceiver { + err: Rc>, + output_dir: PathBuf, + #[cfg(unix)] + perms_to_set: RefCell>, +} + +impl FilesystemReceiver { + pub fn new(err: Rc>, output_dir: PathBuf) -> Self { + Self { + err, + output_dir, + #[cfg(unix)] + perms_to_set: RefCell::new(Vec::new()), + } + } +} + +impl fmt::Debug for FilesystemReceiver { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "FilesystemReceiver {{ output_dir: {:?} }}", + &self.output_dir + ) + } +} + +impl FilesystemReceiver +where + W: Write, +{ + #[cfg(unix)] + fn create_or_overwrite_symlink( + err: &mut impl Write, + target: &[u8], + full_output_path: &Path, + ) -> Result<(), CommandError> { + use std::{ + ffi::OsStr, + os::unix::{ffi::OsStrExt, fs::symlink}, + }; + let target = OsStr::from_bytes(target); + writeln!(err, "entry is symlink to {target:?}, creating").unwrap(); + /* The stdlib symlink function has no functionality like OpenOptions to + * truncate a symlink if it already exists, so we have to do that ourselves + * here. */ + if let Err(e) = symlink(target, full_output_path) { + let e = match e.kind() { + io::ErrorKind::AlreadyExists => { + writeln!(err, "a file already existed at the symlink target {full_output_path:?}, removing") + .unwrap(); + fs::remove_file(full_output_path).wrap_err_with(|| { + format!("failed to remove file at symlink target {full_output_path:?}") + })?; + writeln!( + err, + "successfully removed file entry, creating symlink again" + ) + .unwrap(); + symlink(target, full_output_path).err() + } + _ => Some(e), + }; + if let Some(e) = e { + return Err(e).wrap_err_with(|| { + format!( + "failed to create symlink at {full_output_path:?} with target {target:?}" + ) + }); + } + } + Ok(()) + } +} + +impl EntryReceiver for FilesystemReceiver +where + W: Write, +{ + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError> { + let mut err = self.err.borrow_mut(); + let full_output_path = self.output_dir.join(name.as_ref()); + writeln!( + err, + "receiving entry {} with name {name} and writing to path {full_output_path:?}", + data.name + ) + .unwrap(); + + match data.kind { + EntryKind::Dir => { + writeln!(err, "entry is directory, creating").unwrap(); + fs::create_dir_all(&full_output_path).wrap_err_with(|| { + format!("failed to create directory entry at {full_output_path:?}") + })?; + } + EntryKind::Symlink => { + let target = symlink_target.expect("we should have generated this"); + + #[cfg(unix)] + Self::create_or_overwrite_symlink(&mut *err, target, &full_output_path)?; + #[cfg(not(unix))] + todo!("TODO: cannot create symlink for entry {name} on non-unix yet!"); + } + EntryKind::File => { + writeln!(err, "entry is file, creating").unwrap(); + if let Some(containing_dir) = full_output_path.parent() { + fs::create_dir_all(containing_dir).wrap_err_with(|| { + format!("failed to create parent dirs for file at {full_output_path:?}") + })?; + } else { + writeln!(err, "entry had no parent dir (in root dir?)").unwrap(); + } + let outfile = fs::File::create(&full_output_path) + .wrap_err_with(|| format!("failed to create file at {full_output_path:?}"))?; + return Ok(Some(Box::new(outfile))); + } + } + + #[cfg(unix)] + if let Some(mode) = data.unix_mode { + writeln!( + err, + "storing unix mode {mode} for path {full_output_path:?}" + ) + .unwrap(); + self.perms_to_set.borrow_mut().push(PermsEntry { + path: full_output_path, + mode, + }); + } + + Ok(None) + } + + fn finalize_entries(&self) -> Result<(), CommandError> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut perms_to_set = mem::take(&mut *self.perms_to_set.borrow_mut()); + perms_to_set.sort_unstable(); + writeln!( + &mut self.err.borrow_mut(), + "perms to set (these are done in reverse order): {perms_to_set:?}" + ) + .unwrap(); + for PermsEntry { path, mode } in perms_to_set.into_iter().rev() { + let perms = fs::Permissions::from_mode(mode); + fs::set_permissions(&path, perms.clone()) + .wrap_err_with(|| format!("error setting perms {perms:?} for path {path:?}"))?; + } + } + Ok(()) + } +} diff --git a/cli/src/extract/transform.rs b/cli/src/extract/transform.rs new file mode 100644 index 000000000..9494da36d --- /dev/null +++ b/cli/src/extract/transform.rs @@ -0,0 +1,707 @@ +use std::{borrow::Cow, collections::VecDeque, fmt, ops, path::Path, str}; + +#[cfg(feature = "rx")] +use regex; + +use super::matcher::{CaseSensitivity, SearchAnchoring}; +use crate::{args::extract::*, CommandError}; + +pub trait NameTransformer: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug, Copy, Clone)] +enum Trivial { + Identity, +} + +impl NameTransformer for Trivial { + type Arg = TrivialTransform where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialTransform::Identity => Self::Identity, + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + match self { + Self::Identity => Cow::Borrowed(name), + } + } +} + +#[derive(Debug)] +struct StripComponents { + num_components_to_strip: usize, +} + +impl NameTransformer for StripComponents { + type Arg = u8 where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + num_components_to_strip: arg.into(), + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* If no directory components, then nothing to strip. */ + if !name.contains('/') { + return Cow::Borrowed(name); + } + /* We allow stripping 0 components, which does nothing. */ + if self.num_components_to_strip == 0 { + return Cow::Borrowed(name); + } + /* Pop off prefix components until only one is left or we have stripped all the + * requested prefix components. */ + let mut remaining_to_strip = self.num_components_to_strip; + let mut separator_indices: VecDeque = + name.match_indices('/').map(|(i, _)| i).collect(); + debug_assert!(separator_indices.len() > 0); + /* Always keep the final separator, as regardless of how many we strip, we want + * to keep the basename in all cases. */ + while separator_indices.len() > 1 && remaining_to_strip > 0 { + let _ = separator_indices.pop_front().unwrap(); + remaining_to_strip -= 1; + } + debug_assert!(separator_indices.len() > 0); + let leftmost_remaining_separator_index: usize = separator_indices.pop_front().unwrap(); + Cow::Borrowed(&name[(leftmost_remaining_separator_index + 1)..]) + } +} + +#[derive(Debug)] +struct AddPrefix { + prefix_to_add: String, +} + +impl NameTransformer for AddPrefix { + type Arg = String where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { prefix_to_add: arg }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* We allow an empty prefix, which means to do nothing. */ + if self.prefix_to_add.is_empty() { + return Cow::Borrowed(name); + } + Cow::Owned(format!("{}/{}", self.prefix_to_add, name)) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Multiplicity { + #[default] + Single, + All, +} + +impl Multiplicity { + pub const fn from_multiple_matches_flag(multiple_matches: bool) -> Self { + match multiple_matches { + true => Self::All, + false => Self::Single, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReplaceModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, + pub multi: Multiplicity, +} + +impl ReplaceModifiers { + pub const fn from_flags(flags: PatternModifierFlags) -> Self { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + let multi = Multiplicity::from_multiple_matches_flag(multiple_matches); + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Self { + anchoring, + case, + multi, + } + } +} + +trait PatternTransformer: fmt::Debug { + type Replacement + where + Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized; + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug)] +struct LiteralTransformer { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, + multi: Multiplicity, + rep: String, +} + +impl LiteralTransformer { + fn format_single_replacement<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_index: usize, + ) -> Cow<'s, str> { + /* If the replacement is empty, we have the opportunity to return a borrowed Cow. */ + if rep.is_empty() { + /* Remove the prefix alone! */ + if match_index == 0 { + return Cow::Borrowed(&input[lit_len..]); + } + /* Remove the suffix alone! */ + if match_index == input.len() - lit_len { + return Cow::Borrowed(&input[..match_index]); + } + } + /* Otherwise, we allocate a new string. */ + Cow::Owned(format!( + "{}{}{}", + &input[..match_index], + rep, + &input[(match_index + lit_len)..] + )) + } + + fn replace_single_anchored<'s>( + input: &'s str, + lit: &str, + rep: &str, + range: ops::Range, + case: CaseSensitivity, + ) -> Cow<'s, str> { + let sub = &input[range.clone()]; + if case.string_equal(lit, sub) { + Self::format_single_replacement(input, lit.len(), rep, range.start) + } else { + Cow::Borrowed(input) + } + } + + fn replace_single_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + match input.find(lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn replace_single_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + match input.to_ascii_uppercase().find(&lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn format_multiple_replacements<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_indices: Vec, + ) -> Cow<'s, str> { + if match_indices.is_empty() { + return Cow::Borrowed(input); + } + if match_indices.len() == 1 { + return Self::format_single_replacement(input, lit_len, rep, match_indices[0]); + } + let expected_len: usize = + input.len() - (lit_len * match_indices.len()) + (rep.len() * match_indices.len()); + let mut ret = String::with_capacity(expected_len); + let mut last_source_position: usize = 0; + for i in match_indices.into_iter() { + ret.push_str(&input[last_source_position..i]); + ret.push_str(rep); + last_source_position = i + lit_len; + } + assert_eq!(ret.len(), expected_len); + Cow::Owned(ret) + } + + fn replace_multiple_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input.match_indices(lit).map(|(i, _)| i).collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } + + fn replace_multiple_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input + .to_ascii_uppercase() + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + .match_indices(&lit) + .map(|(i, _)| i) + .collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } +} + +impl PatternTransformer for LiteralTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + + if matches!(multi, Multiplicity::All) && !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "multimatch replacement with :g is not supported with anchoring flags :p or :s for literal transforms: {opts:?} {pattern:?}" + ))); + } + + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + multi, + rep, + }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + /* Empty replacement or literal is allowed, it just does nothing. */ + if self.lit.is_empty() || input.is_empty() { + return Cow::Borrowed(input); + } + /* Can't match input longer than the literal. */ + if self.lit.len() > input.len() { + return Cow::Borrowed(input); + } + + match self.multi { + Multiplicity::Single => match self.anchoring { + SearchAnchoring::DoublyAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..input.len(), + self.case, + ), + SearchAnchoring::LeftAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..self.lit.len(), + self.case, + ), + SearchAnchoring::RightAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + (input.len() - self.lit.len())..input.len(), + self.case, + ), + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_single_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_single_icase(input, &self.lit, &self.rep) + } + }, + }, + Multiplicity::All => match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_multiple_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_multiple_icase(input, &self.lit, &self.rep) + } + }, + _ => unreachable!("checked during construction"), + }, + } + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexpTransformer { + pat: regex::Regex, + multi: Multiplicity, + rep: String, +} + +#[cfg(feature = "rx")] +impl PatternTransformer for RegexpTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Insensitive => true, + CaseSensitivity::Sensitive => false, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex replacer from search pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, multi, rep }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + match self.multi { + Multiplicity::Single => self.pat.replace(input, &self.rep), + Multiplicity::All => self.pat.replace_all(input, &self.rep), + } + } +} + +pub enum ComponentSplit<'s> { + LeftAnchored { + selected_left: &'s str, + right: &'s str, + }, + RightAnchored { + left: &'s str, + selected_right: &'s str, + }, + Whole(&'s str), +} + +impl<'s> ComponentSplit<'s> { + #[inline(always)] + pub fn split_by_component_selector(sel: ComponentSelector, name: &'s str) -> Option { + let path = Path::new(name); + match sel { + ComponentSelector::Path => Some(ComponentSplit::Whole(name)), + ComponentSelector::Basename => path + .file_name() + .map(|bname| bname.to_str().unwrap()) + .map(|bname| name.split_at(name.len() - bname.len())) + .map(|(pfx, bname)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: bname, + }), + ComponentSelector::Dirname => path + .parent() + .map(|p| p.to_str().unwrap()) + /* "a".parent() becomes Some(""), which we want to treat as no parent */ + .filter(|s| !s.is_empty()) + .map(|dirname| name.split_at(dirname.len())) + .map(|(dirname, sfx)| ComponentSplit::LeftAnchored { + selected_left: dirname, + right: sfx, + }), + ComponentSelector::FileExtension => path + .extension() + .map(|ext| ext.to_str().unwrap()) + .map(|ext| name.split_at(name.len() - ext.len())) + .map(|(pfx, ext)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: ext, + }), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum SubstringAnchoring { + RetainsLeftAnchor, + RetainsRightAnchor, + RetainsBothAnchors, + LosesBothAnchors, +} + +impl SubstringAnchoring { + #[inline(always)] + pub fn analyze<'s, 't>(parent: &'s str, sub: &'t str) -> Self + where + 't: 's, + { + let p = parent.as_bytes().as_ptr_range(); + let s = sub.as_bytes().as_ptr_range(); + assert!(s.start >= p.start); + assert!(s.end <= p.end); + if p.start == s.start { + if p.end == s.end { + debug_assert_eq!(parent, sub); + Self::RetainsBothAnchors + } else { + Self::RetainsLeftAnchor + } + } else { + if p.end == s.end { + Self::RetainsRightAnchor + } else { + Self::LosesBothAnchors + } + } + } + + #[inline(always)] + pub fn split_then_transform_then_reformulate<'s>( + input: &'s str, + split: impl FnOnce(&'s str) -> Option>, + transform: impl FnOnce(&'s str) -> Cow<'s, str>, + ) -> Cow<'s, str> { + let components = match split(input) { + /* If the given name doesn't have the specified component, return it unchanged. */ + None => return Cow::Borrowed(input), + Some(s) => s, + }; + match components { + /* If there was no splitting (the whole path was selected), then we don't need to do + * any work to hook things back up! */ + ComponentSplit::Whole(s) => transform(s), + /* If there was splitting, we need to do more work. */ + ComponentSplit::LeftAnchored { + selected_left, + right, + } => match transform(selected_left) { + /* If we reallocated, then we have to reallocate the whole thing, so reuse the + * returned String. */ + Cow::Owned(mut new_left) => { + new_left.push_str(right); + Cow::Owned(new_left) + } + /* If no reallocation, we now have to figure out whether the result is still + * contiguous. */ + Cow::Borrowed(left_sub) => match Self::analyze(selected_left, left_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsRightAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left_sub, right)) + } + _ => Cow::Owned(format!("{}{}", left_sub, right)), + }, + }, + ComponentSplit::RightAnchored { + left, + selected_right, + } => match transform(selected_right) { + Cow::Owned(mut new_right) => { + new_right.insert_str(0, left); + Cow::Owned(new_right) + } + Cow::Borrowed(right_sub) => match Self::analyze(selected_right, right_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsLeftAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left, right_sub)) + } + _ => Cow::Owned(format!("{}{}", left, right_sub)), + }, + }, + } + } + + #[inline(always)] + fn join_adjacent_strings<'s, 't>(parent: &'s str, left: &'t str, right: &'t str) -> &'s str + where + 't: 's, + { + let parent_range = parent.as_bytes().as_ptr_range(); + let left = left.as_bytes().as_ptr_range(); + debug_assert!(left.start >= parent_range.start && left.end <= parent_range.end); + let right = right.as_bytes().as_ptr_range(); + debug_assert!(right.start >= parent_range.start && right.end <= parent_range.end); + debug_assert_eq!(left.end, right.start); + let start_offset = (left.start as usize) - (parent_range.start as usize); + let end_offset = (parent_range.end as usize) - (right.end as usize); + &parent[start_offset..(parent.len() - end_offset)] + } +} + +#[derive(Debug)] +struct ComponentTransformer { + pattern_trans: Box, + comp_sel: ComponentSelector, +} + +impl NameTransformer for ComponentTransformer { + type Arg = TransformArg where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let TransformArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + replacement_spec, + } = arg; + + let opts = ReplaceModifiers::from_flags(modifiers); + let pattern_trans: Box = match pat_sel { + PatternSelectorType::Glob => { + return Err(CommandError::InvalidArg(format!( + "glob patterns are not supported for name transformations: {pattern:?}" + ))); + } + PatternSelectorType::Literal => { + Box::new(LiteralTransformer::create(pattern, opts, replacement_spec)?) + } + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexpTransformer::create(pattern, opts, replacement_spec)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { + pattern_trans, + comp_sel, + }) + } + + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + SubstringAnchoring::split_then_transform_then_reformulate( + name, + move |name| ComponentSplit::split_by_component_selector(self.comp_sel, name), + |name| self.pattern_trans.replace(name), + ) + } +} + +#[derive(Debug)] +pub struct CompiledTransformer { + transformers: Vec>, +} + +impl CompiledTransformer { + fn make_single(trans: NameTransform) -> Result, CommandError> { + Ok(match trans { + NameTransform::Trivial(arg) => Box::new(Trivial::from_arg(arg)?), + NameTransform::Basic(basic_trans) => match basic_trans { + BasicTransform::StripComponents(arg) => Box::new(StripComponents::from_arg(arg)?), + BasicTransform::AddPrefix(arg) => Box::new(AddPrefix::from_arg(arg)?), + }, + NameTransform::Complex(complex_trans) => match complex_trans { + ComplexTransform::Transform(arg) => Box::new(ComponentTransformer::from_arg(arg)?), + }, + }) + } +} + +impl NameTransformer for CompiledTransformer { + type Arg = Vec where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + assert!(!arg.is_empty()); + Ok(Self { + transformers: arg + .into_iter() + .map(Self::make_single) + .collect::>()?, + }) + } + + /// Transform the name from the zip entry, maintaining a few invariants: + /// 1. If the transformations all return substrings (no prefixing, non-empty replacements, or + /// empty replacements that lead to non-contiguous input chunks), return a slice of the + /// original input, pointing back to the ZipFile's memory location with associated lifetime. + /// 2. If some intermediate transformation requires an allocation (e.g. adding a prefix), do + /// not perform intermediate reallocations for subsequent substring-only transformations. + /// - TODO: The returned string may be reallocated from the initial allocation exactly once + /// at the end, if substring-only transformations reduced its length. This is because Cow + /// can only describe a substring of the original input or an entirely new allocated + /// string, as opposed to a more general sort of string view wrapper. + fn transform_name<'s>(&self, mut original_name: &'s str) -> Cow<'s, str> { + let mut newly_allocated_name: Option = None; + let mut newly_allocated_str: Option<&str> = None; + for transformer in self.transformers.iter() { + match newly_allocated_str { + Some(s) => match transformer.transform_name(s) { + Cow::Borrowed(t) => { + let _ = newly_allocated_str.replace(t); + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_some()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + None => match transformer.transform_name(original_name) { + Cow::Borrowed(t) => { + original_name = t; + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_none()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + } + } + + if newly_allocated_name.is_none() { + /* If we have never allocated anything new, just return the substring of the original + * name! */ + Cow::Borrowed(original_name) + } else { + let subref = newly_allocated_str.unwrap(); + /* If the active substring is the same length as the backing string, assume it's + * unchanged, so we can return the backing string without reallocating. */ + if subref.len() == newly_allocated_name.as_ref().unwrap().len() { + Cow::Owned(newly_allocated_name.unwrap()) + } else { + let reallocated_string = subref.to_string(); + Cow::Owned(reallocated_string) + } + } + } +} diff --git a/cli/src/info.rs b/cli/src/info.rs new file mode 100644 index 000000000..4a206bdce --- /dev/null +++ b/cli/src/info.rs @@ -0,0 +1,167 @@ +use std::{ + fs, + io::{self, Write}, + path::PathBuf, +}; + +use zip::read::ZipArchive; + +use crate::{ + args::{extract::InputSpec, info::*}, + extract::{ + entries::{IterateEntries, StreamInput, ZipFileInput}, + matcher::{CompiledMatcher, EntryMatcher}, + receiver::EntryData, + }, + CommandError, WrapCommandErr, +}; + +mod directives; +mod formats; +use directives::{ + archive::{ + compiled::{CompiledArchiveDirective, CompiledArchiveFormat}, + ArchiveData, + }, + compiled::CompiledFormatSpec, + entry::compiled::{CompiledEntryDirective, CompiledEntryFormat}, +}; + +pub struct ArchiveWithPath { + pub path: PathBuf, + pub len: u64, + pub archive: ZipArchive, +} + +impl ArchiveWithPath { + pub fn open(path: PathBuf) -> Result { + let f = fs::File::open(&path) + .wrap_err_with(|| format!("failed to open zip input file path {:?}", &path))?; + let len = f + .metadata() + .wrap_err("failed to extract file metadata")? + .len(); + let archive = ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive from file {:?}", &path))?; + Ok(Self { path, len, archive }) + } +} + +fn format_entry_info( + mut err: impl Write, + entry_formatter: &CompiledFormatSpec, + matcher: Option<&CompiledMatcher>, + mut output_stream: impl Write, + source: &mut impl IterateEntries, +) -> Result<(), CommandError> { + if entry_formatter.is_empty() { + writeln!( + &mut err, + "empty entry format, skipping reading from any entries" + ) + .unwrap(); + return Ok(()); + } + + while let Some(entry) = source.next_entry()? { + let data = EntryData::from_entry(&entry); + if matcher.as_ref().is_some_and(|m| !m.matches(&data)) { + writeln!(&mut err, "matcher ignored entry: {:?}", data.name).unwrap(); + continue; + } + entry_formatter.execute_format(data, &mut output_stream)?; + } + Ok(()) +} + +fn format_archive_info( + mut err: impl Write, + archive_formatter: &CompiledFormatSpec, + mut output_stream: impl Write, + zip: ArchiveData, +) -> Result<(), CommandError> { + if archive_formatter.is_empty() { + writeln!(&mut err, "empty archive format, skipping archive overview").unwrap(); + return Ok(()); + } + + archive_formatter.execute_format(zip, &mut output_stream)?; + Ok(()) +} + +pub fn execute_info(mut err: impl Write, args: Info) -> Result<(), CommandError> { + let Info { + format_spec, + match_expr, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = args; + + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let (archive_formatter, entry_formatter) = match format_spec { + FormatSpec::Compact => todo!(), + FormatSpec::Extended => todo!(), + FormatSpec::Custom { overview, entry } => ( + CompiledFormatSpec::from_spec::(overview)?, + CompiledFormatSpec::from_spec::(entry)?, + ), + }; + let mut output_stream = io::stdout().lock(); + + if stdin_stream { + let mut stdin = StreamInput::new(io::stdin().lock()); + + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut stdin, + )?; + + let (stdin, num_entries) = stdin.into_inner(); + /* NB: The read_zipfile_from_stream() method overruns the size of a single local header into + * the CDE after reading the last input. There are unstable APIs to address this, but for + * now just rely on that internal knowledge. See e.g. zip::read::stream on master or + * zip::unstable::read in https://github.com/zip-rs/zip2/pull/233. */ + let cde_start = stdin.current_bytes_read() - 30; + let (_stdin, stream_length) = stdin + .exhaust() + .wrap_err("failed to exhaust all of stdin after reading all zip entries")?; + + let data = ArchiveData { + path: None, + stream_length, + num_entries, + comment: None, + first_entry_start: Some(0), + central_directory_start: Some(cde_start), + }; + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + for p in zip_paths.into_iter() { + let mut zip = ArchiveWithPath::open(p)?; + + { + let mut zip_entry_counter = ZipFileInput::new(&mut zip.archive); + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut zip_entry_counter, + )?; + } + + let data = ArchiveData::from_archive_with_path(&zip); + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + Ok(()) +} diff --git a/cli/src/info/directives.rs b/cli/src/info/directives.rs new file mode 100644 index 000000000..e4e3e5bfd --- /dev/null +++ b/cli/src/info/directives.rs @@ -0,0 +1,703 @@ +use std::{ + fmt, + io::{self, Write}, +}; + +use super::formats::FormatValue; +use crate::{ + args::info::{ParseableDirective, ParseableFormatComponent, ParseableFormatSpec}, + CommandError, WrapCommandErr, +}; + +pub trait Writeable { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error>; +} + +impl Writeable for S +where + S: fmt::Display, +{ + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + write!(out, "{}", self) + } +} + +pub trait FormatDirective { + type Data<'a>; + type FieldType: FormatValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a>; + fn value_formatter(&self) -> Self::FieldType; + + fn format_field<'a>( + &self, + data: Self::Data<'a>, + ) -> Result<::Output<'a>, ::E> + { + self.value_formatter() + .format_value(self.extract_field(data)) + } +} + +/// Wrap a [`FormatDirective`] and write it to a stream. This isn't directly type-eraseable, but it +/// removes one layer of polymorphism to enable us to do that in a subsequent wrapper trait. +pub trait DirectiveFormatter { + type Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; +} + +impl DirectiveFormatter for FD +where + FD: FormatDirective, + for<'a> <::FieldType as FormatValue>::Output<'a>: Writeable + fmt::Debug, + <::FieldType as FormatValue>::E: fmt::Display, +{ + type Data<'a> = ::Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + let output = self + .format_field(data) + .map_err(|e| CommandError::InvalidData(format!("error formatting field: {e}")))?; + output + .write_to(out) + .wrap_err_with(|| format!("failed to write output to stream: {output:?}")) + } +} + +pub mod compiled { + use super::*; + + enum CompiledFormatComponent { + Directive(F), + ContiguousLiteral(String), + } + + impl CompiledFormatComponent + where + F: DirectiveFormatter, + { + pub fn write_component<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::Directive(d) => d.write_directive(data, &mut out), + Self::ContiguousLiteral(lit) => out + .write_all(lit.as_bytes()) + .wrap_err_with(|| format!("failed to write literal {lit:?} to output")), + } + } + } + + pub trait CompiledFormat { + type Spec: ParseableDirective; + type Fmt: DirectiveFormatter; + + fn from_directive_spec(spec: Self::Spec) -> Result; + } + + pub struct CompiledFormatSpec { + components: Vec>, + } + + impl CompiledFormatSpec { + pub fn is_empty(&self) -> bool { + self.components.is_empty() + } + } + + impl CompiledFormatSpec + where + F: DirectiveFormatter, + { + pub fn from_spec( + spec: ParseableFormatSpec<::Spec>, + ) -> Result + where + CF: CompiledFormat, + { + let ParseableFormatSpec { + components: spec_components, + } = spec; + + let mut components: Vec> = Vec::new(); + for c in spec_components.into_iter() { + match c { + ParseableFormatComponent::Directive(d) => { + let d = CF::from_directive_spec(d)?; + components.push(CompiledFormatComponent::Directive(d)); + } + ParseableFormatComponent::Escaped(s) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(s); + } + _ => { + components + .push(CompiledFormatComponent::ContiguousLiteral(s.to_string())); + } + }, + ParseableFormatComponent::Literal(new_lit) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(new_lit.as_str()); + } + _ => { + components.push(CompiledFormatComponent::ContiguousLiteral(new_lit)); + } + }, + } + } + + Ok(Self { components }) + } + + pub fn execute_format<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> + where + ::Data<'a>: Clone, + { + for c in self.components.iter() { + c.write_component(data.clone(), &mut out)? + } + Ok(()) + } + } +} + +pub mod entry { + use super::{ + super::formats::{ + BinaryNumericValue, BinaryStringValue, ByteSizeValue, CompressionMethodValue, + FileTypeValue, FormatValue, NameString, OffsetValue, TimestampValue, UnixModeValue, + }, + FormatDirective, + }; + use crate::extract::receiver::EntryData; + + pub struct EntryNameField(pub NameString); + + impl FormatDirective for EntryNameField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = NameString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.name + } + fn value_formatter(&self) -> NameString { + self.0 + } + } + + pub struct FileTypeField(pub FileTypeValue); + + impl FormatDirective for FileTypeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = FileTypeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.kind + } + fn value_formatter(&self) -> FileTypeValue { + self.0 + } + } + + pub struct EntryCommentField(pub BinaryStringValue); + + impl FormatDirective for EntryCommentField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.comment.as_bytes()) + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct LocalHeaderStartField(pub OffsetValue); + + impl FormatDirective for LocalHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.local_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct ContentStartField(pub OffsetValue); + + impl FormatDirective for ContentStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct UncompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for UncompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.uncompressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct CompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for CompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct ContentEndField(pub OffsetValue); + + impl FormatDirective for ContentEndField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_end()) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralHeaderStartField(pub OffsetValue); + + impl FormatDirective for CentralHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.central_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CompressionMethodField(pub CompressionMethodValue); + + impl FormatDirective for CompressionMethodField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = CompressionMethodValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compression + } + fn value_formatter(&self) -> CompressionMethodValue { + self.0 + } + } + + pub struct UnixModeField(pub UnixModeValue); + + impl FormatDirective for UnixModeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = UnixModeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.unix_mode + } + fn value_formatter(&self) -> UnixModeValue { + self.0 + } + } + + pub struct Crc32Field(pub BinaryNumericValue); + + impl FormatDirective for Crc32Field { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryNumericValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.crc32 + } + fn value_formatter(&self) -> BinaryNumericValue { + self.0 + } + } + + pub struct TimestampField(pub TimestampValue); + + impl FormatDirective for TimestampField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = TimestampValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.last_modified_time + } + fn value_formatter(&self) -> TimestampValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::EntryFormatDirective, CommandError}; + + use std::io::Write; + + /// Used for type erasure by removing the lifetime-bounded associated type. + trait EntryDirectiveFormatter { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl EntryDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = &'a EntryData<'a>>, + { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + /// This re-implements the generic trait using the type-erased boxed vtable. + pub struct CompiledEntryDirective(Box); + + impl DirectiveFormatter for CompiledEntryDirective { + type Data<'a> = EntryData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_entry_directive(&data, out) + } + } + + pub struct CompiledEntryFormat; + + impl CompiledFormat for CompiledEntryFormat { + type Spec = EntryFormatDirective; + type Fmt = CompiledEntryDirective; + + fn from_directive_spec( + spec: EntryFormatDirective, + ) -> Result { + Ok(CompiledEntryDirective(match spec { + EntryFormatDirective::Name => Box::new(EntryNameField(NameString)), + EntryFormatDirective::FileType(f) => Box::new(FileTypeField(FileTypeValue(f))), + EntryFormatDirective::CompressedSize(f) => { + Box::new(CompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UncompressedSize(f) => { + Box::new(UncompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UnixMode(f) => Box::new(UnixModeField(UnixModeValue(f))), + EntryFormatDirective::CompressionMethod(f) => { + Box::new(CompressionMethodField(CompressionMethodValue(f))) + } + EntryFormatDirective::Comment(f) => { + Box::new(EntryCommentField(BinaryStringValue(f))) + } + EntryFormatDirective::LocalHeaderStart(f) => { + Box::new(LocalHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentStart(f) => { + Box::new(ContentStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentEnd(f) => { + Box::new(ContentEndField(OffsetValue(f))) + } + EntryFormatDirective::CentralHeaderStart(f) => { + Box::new(CentralHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::CrcValue(f) => { + Box::new(Crc32Field(BinaryNumericValue(f))) + } + EntryFormatDirective::Timestamp(f) => { + Box::new(TimestampField(TimestampValue(f))) + } + })) + } + } + } +} + +pub mod archive { + use super::{ + super::{ + formats::{ + BinaryStringValue, ByteSizeValue, DecimalNumberValue, FormatValue, OffsetValue, + PathString, + }, + ArchiveWithPath, + }, + FormatDirective, + }; + + use std::path::Path; + + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct ArchiveData<'a> { + pub path: Option<&'a Path>, + pub stream_length: u64, + pub num_entries: usize, + pub comment: Option<&'a [u8]>, + pub first_entry_start: Option, + pub central_directory_start: Option, + } + + impl<'a> ArchiveData<'a> { + pub fn from_archive_with_path(zip: &'a ArchiveWithPath) -> Self { + Self { + path: Some(zip.path.as_path()), + stream_length: zip.len, + num_entries: zip.archive.len(), + comment: Some(zip.archive.comment()), + first_entry_start: Some(zip.archive.offset()), + central_directory_start: Some(zip.archive.central_directory_start()), + } + } + } + + pub struct ArchiveNameField(pub PathString); + + impl FormatDirective for ArchiveNameField { + type Data<'a> = ArchiveData<'a>; + type FieldType = PathString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.path + } + fn value_formatter(&self) -> PathString { + self.0 + } + } + + pub struct ArchiveSizeField(pub ByteSizeValue); + + impl FormatDirective for ArchiveSizeField { + type Data<'a> = ArchiveData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.stream_length + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct NumEntriesField(pub DecimalNumberValue); + + impl FormatDirective for NumEntriesField { + type Data<'a> = ArchiveData<'a>; + type FieldType = DecimalNumberValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.num_entries.try_into().unwrap() + } + fn value_formatter(&self) -> DecimalNumberValue { + self.0 + } + } + + pub struct ArchiveCommentField(pub BinaryStringValue); + + impl FormatDirective for ArchiveCommentField { + type Data<'a> = ArchiveData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.comment + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct FirstEntryStartField(pub OffsetValue); + + impl FormatDirective for FirstEntryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.first_entry_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralDirectoryStartField(pub OffsetValue); + + impl FormatDirective for CentralDirectoryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.central_directory_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::ArchiveOverviewFormatDirective, CommandError}; + + use std::io::Write; + + trait ArchiveDirectiveFormatter { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl ArchiveDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = ArchiveData<'a>>, + { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + pub struct CompiledArchiveDirective(Box); + + impl DirectiveFormatter for CompiledArchiveDirective { + type Data<'a> = ArchiveData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_archive_directive(data, out) + } + } + + pub struct CompiledArchiveFormat; + + impl CompiledFormat for CompiledArchiveFormat { + type Spec = ArchiveOverviewFormatDirective; + type Fmt = CompiledArchiveDirective; + + fn from_directive_spec( + spec: ArchiveOverviewFormatDirective, + ) -> Result { + Ok(CompiledArchiveDirective(match spec { + ArchiveOverviewFormatDirective::ArchiveName => { + Box::new(ArchiveNameField(PathString)) + } + ArchiveOverviewFormatDirective::TotalSize(f) => { + Box::new(ArchiveSizeField(ByteSizeValue(f))) + } + ArchiveOverviewFormatDirective::NumEntries => { + Box::new(NumEntriesField(DecimalNumberValue)) + } + ArchiveOverviewFormatDirective::ArchiveComment(f) => { + Box::new(ArchiveCommentField(BinaryStringValue(f))) + } + ArchiveOverviewFormatDirective::FirstEntryStart(f) => { + Box::new(FirstEntryStartField(OffsetValue(f))) + } + ArchiveOverviewFormatDirective::CentralDirectoryStart(f) => { + Box::new(CentralDirectoryStartField(OffsetValue(f))) + } + })) + } + } + } +} diff --git a/cli/src/info/formats.rs b/cli/src/info/formats.rs new file mode 100644 index 000000000..a320fb122 --- /dev/null +++ b/cli/src/info/formats.rs @@ -0,0 +1,425 @@ +use std::{ + convert::Infallible, + fmt, + io::{self, Write}, + path, +}; + +use zip::{CompressionMethod, DateTime}; + +use super::directives::Writeable; +use crate::{args::info::*, extract::receiver::EntryKind}; + +pub trait FormatValue { + type Input<'a>; + type Output<'a>; + type E; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E>; +} + +#[derive(Copy, Clone)] +pub struct NameString; + +impl FormatValue for NameString { + type Input<'a> = &'a str; + type Output<'a> = &'a str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct PathString; + +#[derive(Debug)] +pub enum PathWriter<'a> { + Path(path::Display<'a>), + None, +} + +impl<'a> fmt::Display for PathWriter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Path(p) => path::Display::fmt(p, f), + Self::None => write!(f, ""), + } + } +} + +impl FormatValue for PathString { + type Input<'a> = Option<&'a path::Path>; + type Output<'a> = PathWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match input { + Some(p) => PathWriter::Path(p.display()), + None => PathWriter::None, + }) + } +} + +#[derive(Copy, Clone)] +pub struct FileTypeValue(pub FileTypeFormat); + +impl FormatValue for FileTypeValue { + type Input<'a> = EntryKind; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + FileTypeFormat::Full => match input { + EntryKind::File => "file", + EntryKind::Dir => "directory", + EntryKind::Symlink => "symlink", + }, + FileTypeFormat::Abbreviated => match input { + EntryKind::File => "-", + EntryKind::Dir => "d", + EntryKind::Symlink => "l", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct CompressionMethodValue(pub CompressionMethodFormat); + +impl FormatValue for CompressionMethodValue { + type Input<'a> = CompressionMethod; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + CompressionMethodFormat::Full => match input { + CompressionMethod::Stored => "stored", + CompressionMethod::Deflated => "deflate", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "deflate64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bzip2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zstd", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lzma", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "unknown", + }, + CompressionMethodFormat::Abbreviated => match input { + CompressionMethod::Stored => "stor", + CompressionMethod::Deflated => "defl", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "df64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bz2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zst", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lz", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "?", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct UnixModeValue(pub UnixModeFormat); + +impl UnixModeValue { + const S_IRUSR: u32 = 256; + const S_IWUSR: u32 = 128; + const S_IXUSR: u32 = 64; + + const S_IRGRP: u32 = 32; + const S_IWGRP: u32 = 16; + const S_IXGRP: u32 = 8; + + const S_IROTH: u32 = 4; + const S_IWOTH: u32 = 2; + const S_IXOTH: u32 = 1; + + const UNKNOWN_MODE_BITS: [u8; 9] = [b'?'; 9]; + + fn pretty_format_mode_bits(mode: u32) -> [u8; 9] { + let mut ret = [b'-'; 9]; + + if mode & Self::S_IRUSR == Self::S_IRUSR { + ret[0] = b'r'; + } + if mode & Self::S_IWUSR == Self::S_IWUSR { + ret[1] = b'w'; + } + if mode & Self::S_IXUSR == Self::S_IXUSR { + ret[2] = b'x'; + } + + if mode & Self::S_IRGRP == Self::S_IRGRP { + ret[3] = b'r'; + } + if mode & Self::S_IWGRP == Self::S_IWGRP { + ret[4] = b'w'; + } + if mode & Self::S_IXGRP == Self::S_IXGRP { + ret[5] = b'x'; + } + + if mode & Self::S_IROTH == Self::S_IROTH { + ret[6] = b'r'; + } + if mode & Self::S_IWOTH == Self::S_IWOTH { + ret[7] = b'w'; + } + if mode & Self::S_IXOTH == Self::S_IXOTH { + ret[8] = b'x'; + } + + ret + } +} + +#[derive(Debug)] +pub enum ModeValueWriter { + Octal(Option), + Pretty([u8; 9]), +} + +impl Writeable for ModeValueWriter { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::Octal(mode) => match mode { + Some(bits) => write!(out, "{:o}", bits), + None => write!(out, "?"), + }, + Self::Pretty(bits) => out.write_all(bits.as_ref()), + } + } +} + +impl FormatValue for UnixModeValue { + type Input<'a> = Option; + type Output<'a> = ModeValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + UnixModeFormat::Octal => ModeValueWriter::Octal(input), + UnixModeFormat::Pretty => ModeValueWriter::Pretty(match input { + Some(bits) => Self::pretty_format_mode_bits(bits), + None => Self::UNKNOWN_MODE_BITS, + }), + }) + } +} + +#[derive(Copy, Clone)] +pub struct ByteSizeValue(pub ByteSizeFormat); + +#[derive(Debug)] +pub enum ByteSizeWriter { + FullDecimal(u64), +} + +impl fmt::Display for ByteSizeWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::FullDecimal(n) => write!(f, "{}", n), + } + } +} + +impl FormatValue for ByteSizeValue { + type Input<'a> = u64; + type Output<'a> = ByteSizeWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + ByteSizeFormat::FullDecimal => ByteSizeWriter::FullDecimal(input), + ByteSizeFormat::HumanAbbreviated => todo!("human abbreviated byte sizes"), + }) + } +} + +#[derive(Copy, Clone)] +pub struct DecimalNumberValue; + +impl FormatValue for DecimalNumberValue { + type Input<'a> = u64; + type Output<'a> = u64; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct OffsetValue(pub OffsetFormat); + +#[derive(Debug)] +pub enum OffsetWriter { + Unknown, + Decimal(u64), + Hexadecimal(u64), +} + +impl fmt::Display for OffsetWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Unknown => write!(f, "?"), + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for OffsetValue { + type Input<'a> = Option; + type Output<'a> = OffsetWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(OffsetWriter::Unknown), + Some(input) => input, + }; + Ok(match self.0 { + OffsetFormat::Decimal => OffsetWriter::Decimal(input), + OffsetFormat::Hexadecimal => OffsetWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryNumericValue(pub BinaryNumericValueFormat); + +#[derive(Debug)] +pub enum BinaryNumericValueWriter { + Decimal(u32), + Hexadecimal(u32), +} + +impl fmt::Display for BinaryNumericValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for BinaryNumericValue { + type Input<'a> = u32; + type Output<'a> = BinaryNumericValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + BinaryNumericValueFormat::Decimal => BinaryNumericValueWriter::Decimal(input), + BinaryNumericValueFormat::Hexadecimal => BinaryNumericValueWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryStringValue(pub BinaryStringFormat); + +#[derive(Debug)] +pub enum BinaryStringWriter<'a> { + ReplaceNonUnicode(&'a [u8]), + EscapeAscii(&'a [u8]), + WriteExactly(&'a [u8]), +} + +impl<'a> BinaryStringWriter<'a> { + const INVALID_CHUNK_BUFS: [&'static str; 4] = ["", "�", "��", "���"]; +} + +impl<'a> Writeable for BinaryStringWriter<'a> { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::ReplaceNonUnicode(s) => { + for chunk in s.utf8_chunks() { + write!(out, "{}", chunk.valid())?; + /* The length of invalid bytes is never longer than 3. */ + write!(out, "{}", Self::INVALID_CHUNK_BUFS[chunk.invalid().len()])?; + } + Ok(()) + } + Self::EscapeAscii(s) => { + if s.is_empty() { + return write!(out, "\"\""); + } + write!(out, "\" ")?; + for b in s.iter().copied() { + write!(out, "{} ", b.escape_ascii())?; + } + write!(out, "\"")?; + Ok(()) + } + Self::WriteExactly(s) => out.write_all(s), + } + } +} + +impl FormatValue for BinaryStringValue { + type Input<'a> = Option<&'a [u8]>; + type Output<'a> = BinaryStringWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = input.unwrap_or(&[]); + Ok(match self.0 { + BinaryStringFormat::PrintAsString => BinaryStringWriter::ReplaceNonUnicode(input), + BinaryStringFormat::EscapeAscii => BinaryStringWriter::EscapeAscii(input), + BinaryStringFormat::WriteBinaryContents => BinaryStringWriter::WriteExactly(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct TimestampValue(pub TimestampFormat); + +#[derive(Debug)] +pub enum TimestampValueWriter { + None, + DateOnly(DateTime), + TimeOnly(DateTime), + DateAndTime(DateTime), +} + +impl fmt::Display for TimestampValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::None => write!(f, "?"), + Self::DateOnly(d) => write!(f, "{}-{}-{}", d.year(), d.month(), d.day()), + Self::TimeOnly(t) => write!(f, "{}:{}:{}", t.hour(), t.minute(), t.second()), + Self::DateAndTime(dt) => { + write!( + f, + "{}-{}-{} {}:{}:{}", + dt.year(), + dt.month(), + dt.day(), + dt.hour(), + dt.minute(), + dt.second() + ) + } + } + } +} + +impl FormatValue for TimestampValue { + type Input<'a> = Option; + type Output<'a> = TimestampValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(TimestampValueWriter::None), + Some(input) => input, + }; + Ok(match self.0 { + TimestampFormat::DateOnly => TimestampValueWriter::DateOnly(input), + TimestampFormat::TimeOnly => TimestampValueWriter::TimeOnly(input), + TimestampFormat::DateAndTime => TimestampValueWriter::DateAndTime(input), + }) + } +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 000000000..24db1aaae --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,175 @@ +//! ??? + +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +use std::{fs, io}; + +pub mod args; +pub mod compress; +pub mod extract; +pub mod info; + +pub enum ErrHandle { + Output(W), + NoOutput, +} + +impl io::Write for ErrHandle +where + W: io::Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::Output(w) => w.write(buf), + Self::NoOutput => Ok(buf.len()), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::Output(w) => w.flush(), + Self::NoOutput => Ok(()), + } + } +} + +pub enum OutputHandle { + File(fs::File), + InMem(io::Cursor>), +} + +impl io::Read for OutputHandle { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::File(f) => f.read(buf), + Self::InMem(c) => c.read(buf), + } + } +} + +impl io::Write for OutputHandle { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::File(f) => f.write(buf), + Self::InMem(c) => c.write(buf), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::File(f) => f.flush(), + Self::InMem(c) => c.flush(), + } + } +} + +impl io::Seek for OutputHandle { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match self { + Self::File(f) => f.seek(pos), + Self::InMem(c) => c.seek(pos), + } + } +} + +#[derive(Debug)] +pub enum CommandError { + InvalidArg(String), + InvalidData(String), + Io(String, io::Error), + Zip(String, zip::result::ZipError), +} + +pub trait WrapCommandErr: Sized { + fn wrap_err(self, context: &str) -> Result { + self.wrap_err_with(|| context.to_string()) + } + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result; +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Io(f(), e)) + } +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Zip(f(), e)) + } +} + +pub mod driver { + use std::env; + use std::io::{self, Write}; + use std::process; + + use super::args::{ArgParseError, CommandFormat, ZipCli, ZipCommand}; + use super::{CommandError, ErrHandle}; + + pub trait ExecuteCommand: CommandFormat { + fn execute(self, err: impl Write) -> Result<(), CommandError>; + + fn do_main(self, mut err: impl Write) -> ! + where + Self: Sized, + { + writeln!(&mut err, "{} args: {:?}", Self::COMMAND_NAME, &self).unwrap(); + match self.execute(err) { + Ok(()) => process::exit(ZipCli::NON_FAILURE_EXIT_CODE), + Err(e) => match e { + CommandError::InvalidArg(msg) => { + let msg = Self::generate_brief_help_text(&msg); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::InvalidData(msg) => { + let msg = format!("error processing zip data: {msg}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::Io(context, e) => { + let msg = format!("i/o error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + CommandError::Zip(context, e) => { + let msg = format!("zip error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + }, + } + } + } + + pub fn main() { + let ZipCli { verbose, command } = match ZipCli::parse_argv(env::args_os()) { + Ok(cli) => cli, + Err(e) => match e { + ArgParseError::StdoutMessage(msg) => { + io::stdout() + .write_all(msg.as_bytes()) + .expect("couldn't write message to stdout"); + process::exit(ZipCli::NON_FAILURE_EXIT_CODE); + } + ArgParseError::StderrMessage(msg) => { + /* If we can't write anything to stderr, no use aborting, so just exit. */ + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + }, + }; + let err = if verbose { + ErrHandle::Output(io::stderr()) + } else { + ErrHandle::NoOutput + }; + + match command { + ZipCommand::Info(info) => info.do_main(err), + ZipCommand::Extract(extract) => extract.do_main(err), + ZipCommand::Compress(compress) => compress.do_main(err), + } + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/fuzz/fuzz_targets/fuzz_write.rs b/fuzz/fuzz_targets/fuzz_write.rs index 53653a60b..c3cc9089a 100755 --- a/fuzz/fuzz_targets/fuzz_write.rs +++ b/fuzz/fuzz_targets/fuzz_write.rs @@ -1,12 +1,12 @@ #![no_main] use arbitrary::Arbitrary; -use core::fmt::{Debug}; +use core::fmt::Debug; use libfuzzer_sys::fuzz_target; use replace_with::replace_with_or_abort; use std::fmt::{Arguments, Formatter, Write}; -use std::io::{Cursor, Seek, SeekFrom}; use std::io::Write as IoWrite; +use std::io::{Cursor, Seek, SeekFrom}; use std::path::PathBuf; use tikv_jemallocator::Jemalloc; use zip::result::{ZipError, ZipResult}; @@ -93,22 +93,36 @@ fn do_operation<'k>( flush_on_finish_file: bool, files_added: &mut usize, stringifier: &mut impl Write, - panic_on_error: bool + panic_on_error: bool, ) -> Result<(), Box> { writer.set_flush_on_finish_file(flush_on_finish_file); - let FileOperation { basic, mut path, reopen} = operation; + let FileOperation { + basic, + mut path, + reopen, + } = operation; match basic { BasicFileOperation::WriteNormalFile { - contents, mut options, .. + contents, + mut options, + .. } => { let uncompressed_size = contents.iter().map(|chunk| chunk.len()).sum::(); if uncompressed_size >= u32::MAX as usize { options = options.large_file(true); } if options == FullFileOptions::default() { - writeln!(stringifier, "writer.start_file_from_path({:?}, Default::default())?;", path)?; + writeln!( + stringifier, + "writer.start_file_from_path({:?}, Default::default())?;", + path + )?; } else { - writeln!(stringifier, "writer.start_file_from_path({:?}, {:?})?;", path, options)?; + writeln!( + stringifier, + "writer.start_file_from_path({:?}, {:?})?;", + path, options + )?; } writer.start_file_from_path(&*path, options)?; for chunk in contents.iter() { @@ -118,12 +132,20 @@ fn do_operation<'k>( *files_added += 1; } BasicFileOperation::WriteDirectory(options) => { - writeln!(stringifier, "writer.add_directory_from_path(&{:?}, {:?})?;", path, options)?; + writeln!( + stringifier, + "writer.add_directory_from_path(&{:?}, {:?})?;", + path, options + )?; writer.add_directory_from_path(&*path, options.to_owned())?; *files_added += 1; } BasicFileOperation::WriteSymlinkWithTarget { target, options } => { - writeln!(stringifier, "writer.add_symlink_from_path(&{:?}, {:?}, {:?});", path, target, options)?; + writeln!( + stringifier, + "writer.add_symlink_from_path(&{:?}, {:?}, {:?});", + path, target, options + )?; writer.add_symlink_from_path(&*path, target, options.to_owned())?; *files_added += 1; } @@ -132,8 +154,20 @@ fn do_operation<'k>( return Ok(()); }; deduplicate_paths(&mut path, &base_path); - do_operation(writer, *base, false, flush_on_finish_file, files_added, stringifier, panic_on_error)?; - writeln!(stringifier, "writer.shallow_copy_file_from_path({:?}, {:?});", base_path, path)?; + do_operation( + writer, + *base, + false, + flush_on_finish_file, + files_added, + stringifier, + panic_on_error, + )?; + writeln!( + stringifier, + "writer.shallow_copy_file_from_path({:?}, {:?});", + base_path, path + )?; writer.shallow_copy_file_from_path(&*base_path, &*path)?; *files_added += 1; } @@ -142,38 +176,65 @@ fn do_operation<'k>( return Ok(()); }; deduplicate_paths(&mut path, &base_path); - do_operation(writer, *base, false, flush_on_finish_file, files_added, stringifier, panic_on_error)?; - writeln!(stringifier, "writer.deep_copy_file_from_path({:?}, {:?});", base_path, path)?; + do_operation( + writer, + *base, + false, + flush_on_finish_file, + files_added, + stringifier, + panic_on_error, + )?; + writeln!( + stringifier, + "writer.deep_copy_file_from_path({:?}, {:?});", + base_path, path + )?; writer.deep_copy_file_from_path(&*base_path, path)?; *files_added += 1; } - BasicFileOperation::MergeWithOtherFile { operations, initial_junk } => { + BasicFileOperation::MergeWithOtherFile { + operations, + initial_junk, + } => { if initial_junk.is_empty() { - writeln!(stringifier, "let sub_writer = {{\n\ - let mut writer = ZipWriter::new(Cursor::new(Vec::new()));")?; + writeln!( + stringifier, + "let sub_writer = {{\n\ + let mut writer = ZipWriter::new(Cursor::new(Vec::new()));" + )?; } else { - writeln!(stringifier, - "let sub_writer = {{\n\ + writeln!( + stringifier, + "let sub_writer = {{\n\ let mut initial_junk = Cursor::new(vec!{:?});\n\ initial_junk.seek(SeekFrom::End(0))?; - let mut writer = ZipWriter::new(initial_junk);", initial_junk)?; + let mut writer = ZipWriter::new(initial_junk);", + initial_junk + )?; } let mut initial_junk = Cursor::new(initial_junk.into_vec()); initial_junk.seek(SeekFrom::End(0))?; let mut other_writer = zip::ZipWriter::new(initial_junk); let mut inner_files_added = 0; - operations.into_vec().into_iter().for_each(|(operation, abort)| { - let _ = do_operation( - &mut other_writer, - operation, - abort, - false, - &mut inner_files_added, - stringifier, - panic_on_error - ); - }); - writeln!(stringifier, "writer\n}};\nwriter.merge_archive(sub_writer.finish_into_readable()?)?;")?; + operations + .into_vec() + .into_iter() + .for_each(|(operation, abort)| { + let _ = do_operation( + &mut other_writer, + operation, + abort, + false, + &mut inner_files_added, + stringifier, + panic_on_error, + ); + }); + writeln!( + stringifier, + "writer\n}};\nwriter.merge_archive(sub_writer.finish_into_readable()?)?;" + )?; writer.merge_archive(other_writer.finish_into_readable()?)?; *files_added += inner_files_added; } @@ -193,15 +254,19 @@ fn do_operation<'k>( match reopen { ReopenOption::DoNotReopen => { writeln!(stringifier, "writer")?; - return Ok(()) - }, + return Ok(()); + } ReopenOption::ViaFinish => { let old_comment = writer.get_raw_comment().to_owned(); - writeln!(stringifier, "let mut writer = ZipWriter::new_append(writer.finish()?)?;")?; + writeln!( + stringifier, + "let mut writer = ZipWriter::new_append(writer.finish()?)?;" + )?; replace_with_or_abort(writer, |old_writer: zip::ZipWriter>>| { (|| -> ZipResult>>> { zip::ZipWriter::new_append(old_writer.finish()?) - })().unwrap_or_else(|_| { + })() + .unwrap_or_else(|_| { if panic_on_error { panic!("Failed to create new ZipWriter") } @@ -214,11 +279,15 @@ fn do_operation<'k>( } ReopenOption::ViaFinishIntoReadable => { let old_comment = writer.get_raw_comment().to_owned(); - writeln!(stringifier, "let mut writer = ZipWriter::new_append(writer.finish()?)?;")?; + writeln!( + stringifier, + "let mut writer = ZipWriter::new_append(writer.finish()?)?;" + )?; replace_with_or_abort(writer, |old_writer| { (|| -> ZipResult>>> { zip::ZipWriter::new_append(old_writer.finish()?) - })().unwrap_or_else(|_| { + })() + .unwrap_or_else(|_| { if panic_on_error { panic!("Failed to create new ZipWriter") } @@ -231,7 +300,7 @@ fn do_operation<'k>( Ok(()) } -impl <'k> FuzzTestCase<'k> { +impl<'k> FuzzTestCase<'k> { fn execute(self, stringifier: &mut impl Write, panic_on_error: bool) -> ZipResult<()> { let mut initial_junk = Cursor::new(self.initial_junk.into_vec()); initial_junk.seek(SeekFrom::End(0))?; @@ -253,7 +322,7 @@ impl <'k> FuzzTestCase<'k> { self.flush_on_finish_file, &mut files_added, stringifier, - panic_on_error + panic_on_error, ); } if final_reopen { @@ -265,14 +334,21 @@ impl <'k> FuzzTestCase<'k> { } } -impl <'k> Debug for FuzzTestCase<'k> { +impl<'k> Debug for FuzzTestCase<'k> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { if self.initial_junk.is_empty() { - writeln!(f, "let mut writer = ZipWriter::new(Cursor::new(Vec::new()));")?; + writeln!( + f, + "let mut writer = ZipWriter::new(Cursor::new(Vec::new()));" + )?; } else { - writeln!(f, "let mut initial_junk = Cursor::new(vec!{:?});\n\ + writeln!( + f, + "let mut initial_junk = Cursor::new(vec!{:?});\n\ initial_junk.seek(SeekFrom::End(0))?;\n\ - let mut writer = ZipWriter::new(initial_junk);", &self.initial_junk)?; + let mut writer = ZipWriter::new(initial_junk);", + &self.initial_junk + )?; } let _ = self.clone().execute(f, false); Ok(()) diff --git a/src/compression.rs b/src/compression.rs index 83a7669bd..02c264641 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -10,7 +10,7 @@ use std::{fmt, io}; /// /// When creating ZIP files, you may choose the method to use with /// [`crate::write::FileOptions::compression_method`] -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[non_exhaustive] pub enum CompressionMethod { diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index 1cc0f1de4..0cf794c3c 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -4,7 +4,7 @@ use std::io::Read; /// extended timestamp, as described in -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtendedTimestamp { mod_time: Option, ac_time: Option, diff --git a/src/write.rs b/src/write.rs index 48276cb9d..a8bd4ecd5 100644 --- a/src/write.rs +++ b/src/write.rs @@ -253,6 +253,7 @@ impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { } /// Metadata for a file to be written +/* TODO: add accessors for this data as well so options can be introspected! */ #[derive(Clone, Debug, Copy, Eq, PartialEq)] pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) compression_method: CompressionMethod, @@ -780,6 +781,8 @@ impl ZipWriter { } } +/* TODO: consider a ZipWriter which works with just a Write bound to support streaming output? This + * would require some work, but is possible in the protocol. */ impl ZipWriter { /// Initializes the archive. /// @@ -1396,6 +1399,7 @@ impl ZipWriter { /// implementations may materialize a symlink as a regular file, possibly with the /// content incorrectly set to the symlink target. For maximum portability, consider /// storing a regular file instead. + /* TODO: support OsStr instead of just str, for non-unicode paths. */ pub fn add_symlink( &mut self, name: N,