From d441ebbf627bbbe62a95d14111ab096bb064e3ab Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 11 Aug 2022 13:28:47 -0500 Subject: [PATCH 1/3] refactor: Move off of IndexSet/HashSet This dropped `.text` by 14KB Anything in debug asserts or help/usage output doesn't matter for performance but I wouldn't be surprised if this was comparable since the container sizes we are talking about are relatively small. --- src/builder/debug_asserts.rs | 3 +- src/output/help.rs | 4 +- src/output/usage.rs | 15 +++--- src/parser/validator.rs | 3 +- src/util/flat_set.rs | 97 ++++++++++++++++++++++++++++++++++++ src/util/mod.rs | 2 + 6 files changed, 112 insertions(+), 12 deletions(-) create mode 100644 src/util/flat_set.rs diff --git a/src/builder/debug_asserts.rs b/src/builder/debug_asserts.rs index 520d206f35b..b70cc8f69eb 100644 --- a/src/builder/debug_asserts.rs +++ b/src/builder/debug_asserts.rs @@ -4,6 +4,7 @@ use clap_lex::RawOsStr; use crate::builder::ValueRange; use crate::mkeymap::KeyType; +use crate::util::FlatSet; use crate::util::Id; use crate::ArgAction; use crate::INTERNAL_ERROR_MSG; @@ -308,7 +309,7 @@ pub(crate) fn assert_app(cmd: &Command) { detect_duplicate_flags(&long_flags, "long"); detect_duplicate_flags(&short_flags, "short"); - let mut subs = indexmap::IndexSet::new(); + let mut subs = FlatSet::new(); for sc in cmd.get_subcommands() { assert!( subs.insert(sc.get_name()), diff --git a/src/output/help.rs b/src/output/help.rs index 1a2385a6283..6715d07539d 100644 --- a/src/output/help.rs +++ b/src/output/help.rs @@ -9,10 +9,10 @@ use std::usize; use crate::builder::PossibleValue; use crate::builder::{render_arg_val, Arg, Command}; use crate::output::{fmt::Colorizer, Usage}; +use crate::util::FlatSet; use crate::ArgAction; // Third party -use indexmap::IndexSet; use textwrap::core::display_width; /// `clap` Help Writer. @@ -821,7 +821,7 @@ impl<'help, 'cmd, 'writer> Help<'help, 'cmd, 'writer> { .cmd .get_arguments() .filter_map(|arg| arg.get_help_heading()) - .collect::>(); + .collect::>(); let mut first = if !pos.is_empty() { // Write positional args if any diff --git a/src/output/usage.rs b/src/output/usage.rs index ae7c3b4b5df..792ed53fc57 100644 --- a/src/output/usage.rs +++ b/src/output/usage.rs @@ -1,9 +1,8 @@ -use indexmap::IndexSet; - // Internal use crate::builder::{Arg, ArgPredicate, Command}; use crate::parser::ArgMatcher; use crate::util::ChildGraph; +use crate::util::FlatSet; use crate::util::Id; use crate::INTERNAL_ERROR_MSG; @@ -329,16 +328,16 @@ impl<'help, 'cmd> Usage<'help, 'cmd> { incls: &[Id], matcher: Option<&ArgMatcher>, incl_last: bool, - ) -> IndexSet { + ) -> FlatSet { debug!( "Usage::get_required_usage_from: incls={:?}, matcher={:?}, incl_last={:?}", incls, matcher.is_some(), incl_last ); - let mut ret_val = IndexSet::new(); + let mut ret_val = FlatSet::new(); - let mut unrolled_reqs = IndexSet::new(); + let mut unrolled_reqs = FlatSet::new(); let required_owned; let required = if let Some(required) = self.required { @@ -378,9 +377,9 @@ impl<'help, 'cmd> Usage<'help, 'cmd> { unrolled_reqs ); - let mut required_groups_members = IndexSet::new(); - let mut required_opts = IndexSet::new(); - let mut required_groups = IndexSet::new(); + let mut required_groups_members = FlatSet::new(); + let mut required_opts = FlatSet::new(); + let mut required_groups = FlatSet::new(); let mut required_positionals = Vec::new(); for req in unrolled_reqs.iter().chain(incls.iter()) { if let Some(arg) = self.cmd.find(req) { diff --git a/src/parser/validator.rs b/src/parser/validator.rs index 1470189d68a..cf16b391d00 100644 --- a/src/parser/validator.rs +++ b/src/parser/validator.rs @@ -5,6 +5,7 @@ use crate::output::fmt::Stream; use crate::output::Usage; use crate::parser::{ArgMatcher, ParseState}; use crate::util::ChildGraph; +use crate::util::FlatSet; use crate::util::Id; use crate::INTERNAL_ERROR_MSG; @@ -153,7 +154,7 @@ impl<'help, 'cmd> Validator<'help, 'cmd> { } debug!("Validator::build_conflict_err: name={:?}", name); - let mut seen = std::collections::HashSet::new(); + let mut seen = FlatSet::new(); let conflicts = conflict_ids .iter() .flat_map(|c_id| { diff --git a/src/util/flat_set.rs b/src/util/flat_set.rs new file mode 100644 index 00000000000..fe4eacdaf30 --- /dev/null +++ b/src/util/flat_set.rs @@ -0,0 +1,97 @@ +use std::borrow::Borrow; + +/// Flat (Vec) backed set +/// +/// This preserves insertion order +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct FlatSet { + inner: Vec, +} + +impl FlatSet { + pub(crate) fn new() -> Self { + Default::default() + } + + pub(crate) fn insert(&mut self, value: T) -> bool { + for existing in &self.inner { + if *existing == value { + return false; + } + } + self.inner.push(value); + true + } + + pub fn contains(&self, value: &Q) -> bool + where + T: Borrow, + Q: std::hash::Hash + Eq, + { + for existing in &self.inner { + if existing.borrow() == value { + return true; + } + } + false + } + + pub fn retain(&mut self, f: F) + where + F: FnMut(&T) -> bool, + { + self.inner.retain(f); + } + + pub(crate) fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub(crate) fn iter(&self) -> std::slice::Iter<'_, T> { + self.inner.iter() + } +} + +impl Default for FlatSet { + fn default() -> Self { + Self { + inner: Default::default(), + } + } +} + +impl IntoIterator for FlatSet { + type Item = T; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.inner.into_iter() + } +} + +impl<'s, T: PartialEq + Eq> IntoIterator for &'s FlatSet { + type Item = &'s T; + type IntoIter = std::slice::Iter<'s, T>; + + fn into_iter(self) -> Self::IntoIter { + self.inner.iter() + } +} + +impl Extend for FlatSet { + fn extend>(&mut self, iter: I) { + for value in iter { + self.insert(value); + } + } +} + +impl FromIterator for FlatSet { + fn from_iter>(iter: I) -> Self { + let mut set = Self::new(); + for value in iter { + set.insert(value); + } + set + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index 8adc8db17c4..7fb7f1e1bbb 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,6 @@ #![allow(clippy::single_component_path_imports)] +mod flat_set; mod fnv; mod graph; mod id; @@ -7,6 +8,7 @@ mod str_to_bool; pub use self::fnv::Key; +pub(crate) use self::flat_set::FlatSet; pub(crate) use self::str_to_bool::str_to_bool; pub(crate) use self::str_to_bool::FALSE_LITERALS; pub(crate) use self::str_to_bool::TRUE_LITERALS; From 6e7fd6d4bce1d0460e8ad2df083090c5ed0f0157 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 11 Aug 2022 13:48:23 -0500 Subject: [PATCH 2/3] refactor: Move off of IndexMap/HashMap This dropped 17KB Again, performance shouldn't be too bad as the total number of argument id's passed in by the user shouldn't be huge, with the upper end being 5-15 except for in extreme cases like rustc accepting arguments from cargo via a file. --- Cargo.lock | 1 - Cargo.toml | 3 +- src/builder/command.rs | 4 +- src/parser/arg_matcher.rs | 12 +- src/parser/matches/arg_matches.rs | 6 +- src/parser/validator.rs | 3 +- src/util/flat_map.rs | 190 ++++++++++++++++++++++++++++++ src/util/mod.rs | 3 + 8 files changed, 206 insertions(+), 16 deletions(-) create mode 100644 src/util/flat_map.rs diff --git a/Cargo.lock b/Cargo.lock index b04e2f8503a..d64bdaca16f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -127,7 +127,6 @@ dependencies = [ "clap_derive", "clap_lex", "humantime", - "indexmap", "once_cell", "rustversion", "shlex", diff --git a/Cargo.toml b/Cargo.toml index 35565ebc2de..404f85f28f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,7 +63,7 @@ debug = ["clap_derive/debug", "dep:backtrace"] # Enables debug messages unstable-doc = ["derive", "cargo", "wrap_help", "env", "unicode", "unstable-replace", "unstable-grouped"] # for docs.rs # Used in default -std = ["indexmap/std"] # support for no_std in a backwards-compatible way +std = [] # support for no_std in a backwards-compatible way color = ["dep:atty", "dep:termcolor"] suggestions = ["dep:strsim"] @@ -89,7 +89,6 @@ clap_lex = { path = "./clap_lex", version = "0.2.2" } bitflags = "1.2" textwrap = { version = "0.15.0", default-features = false, features = [] } unicase = { version = "2.6", optional = true } -indexmap = "1.0" strsim = { version = "0.10", optional = true } atty = { version = "0.2", optional = true } termcolor = { version = "1.1.1", optional = true } diff --git a/src/builder/command.rs b/src/builder/command.rs index f3d70c7a0da..593c37ac301 100644 --- a/src/builder/command.rs +++ b/src/builder/command.rs @@ -1,5 +1,4 @@ // Std -use std::collections::HashMap; use std::env; use std::ffi::OsString; use std::fmt; @@ -20,6 +19,7 @@ use crate::output::fmt::Stream; use crate::output::{fmt::Colorizer, Help, HelpWriter, Usage}; use crate::parser::{ArgMatcher, ArgMatches, Parser}; use crate::util::ChildGraph; +use crate::util::FlatMap; use crate::util::{color::ColorChoice, Id, Key}; use crate::{Error, INTERNAL_ERROR_MSG}; @@ -93,7 +93,7 @@ pub struct Command<'help> { g_settings: AppFlags, args: MKeyMap<'help>, subcommands: Vec>, - replacers: HashMap<&'help str, &'help [&'help str]>, + replacers: FlatMap<&'help str, &'help [&'help str]>, groups: Vec>, current_help_heading: Option<&'help str>, current_disp_ord: Option, diff --git a/src/parser/arg_matcher.rs b/src/parser/arg_matcher.rs index 30459145f62..4fb763fe476 100644 --- a/src/parser/arg_matcher.rs +++ b/src/parser/arg_matcher.rs @@ -1,5 +1,4 @@ // Std -use std::collections::HashMap; use std::ffi::OsString; use std::mem; use std::ops::Deref; @@ -10,6 +9,7 @@ use crate::parser::AnyValue; use crate::parser::Identifier; use crate::parser::PendingArg; use crate::parser::{ArgMatches, MatchedArg, SubCommand, ValueSource}; +use crate::util::FlatMap; use crate::util::Id; use crate::INTERNAL_ERROR_MSG; @@ -46,14 +46,14 @@ impl ArgMatcher { "ArgMatcher::get_global_values: global_arg_vec={:?}", global_arg_vec ); - let mut vals_map = HashMap::new(); + let mut vals_map = FlatMap::new(); self.fill_in_global_values(global_arg_vec, &mut vals_map); } fn fill_in_global_values( &mut self, global_arg_vec: &[Id], - vals_map: &mut HashMap, + vals_map: &mut FlatMap, ) { for global_arg in global_arg_vec { if let Some(ma) = self.get(global_arg) { @@ -99,18 +99,18 @@ impl ArgMatcher { } pub(crate) fn remove(&mut self, arg: &Id) { - self.matches.args.swap_remove(arg); + self.matches.args.remove(arg); } pub(crate) fn contains(&self, arg: &Id) -> bool { self.matches.args.contains_key(arg) } - pub(crate) fn arg_ids(&self) -> indexmap::map::Keys { + pub(crate) fn arg_ids(&self) -> std::slice::Iter<'_, Id> { self.matches.args.keys() } - pub(crate) fn entry(&mut self, arg: &Id) -> indexmap::map::Entry { + pub(crate) fn entry(&mut self, arg: &Id) -> crate::util::Entry { self.matches.args.entry(arg.clone()) } diff --git a/src/parser/matches/arg_matches.rs b/src/parser/matches/arg_matches.rs index 79249c85259..1f991c659a6 100644 --- a/src/parser/matches/arg_matches.rs +++ b/src/parser/matches/arg_matches.rs @@ -5,15 +5,13 @@ use std::fmt::Debug; use std::iter::{Cloned, Flatten, Map}; use std::slice::Iter; -// Third Party -use indexmap::IndexMap; - // Internal use crate::parser::AnyValue; use crate::parser::AnyValueId; use crate::parser::MatchedArg; use crate::parser::MatchesError; use crate::parser::ValueSource; +use crate::util::FlatMap; use crate::util::{Id, Key}; use crate::INTERNAL_ERROR_MSG; @@ -68,7 +66,7 @@ pub struct ArgMatches { pub(crate) valid_args: Vec, #[cfg(debug_assertions)] pub(crate) valid_subcommands: Vec, - pub(crate) args: IndexMap, + pub(crate) args: FlatMap, pub(crate) subcommand: Option>, } diff --git a/src/parser/validator.rs b/src/parser/validator.rs index cf16b391d00..545a61758f1 100644 --- a/src/parser/validator.rs +++ b/src/parser/validator.rs @@ -5,6 +5,7 @@ use crate::output::fmt::Stream; use crate::output::Usage; use crate::parser::{ArgMatcher, ParseState}; use crate::util::ChildGraph; +use crate::util::FlatMap; use crate::util::FlatSet; use crate::util::Id; use crate::INTERNAL_ERROR_MSG; @@ -383,7 +384,7 @@ impl<'help, 'cmd> Validator<'help, 'cmd> { #[derive(Default, Clone, Debug)] struct Conflicts { - potential: std::collections::HashMap>, + potential: FlatMap>, } impl Conflicts { diff --git a/src/util/flat_map.rs b/src/util/flat_map.rs new file mode 100644 index 00000000000..9b709d29c48 --- /dev/null +++ b/src/util/flat_map.rs @@ -0,0 +1,190 @@ +use std::borrow::Borrow; + +/// Flat (Vec) backed map +/// +/// This preserves insertion order +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct FlatMap { + keys: Vec, + values: Vec, +} + +impl FlatMap { + pub(crate) fn new() -> Self { + Default::default() + } + + pub(crate) fn insert(&mut self, key: K, mut value: V) -> Option { + for (index, existing) in self.keys.iter().enumerate() { + if *existing == key { + std::mem::swap(&mut self.values[index], &mut value); + return Some(value); + } + } + + self.keys.push(key); + self.values.push(value); + None + } + + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: std::hash::Hash + Eq, + { + for existing in &self.keys { + if existing.borrow() == key { + return true; + } + } + false + } + + pub fn remove(&mut self, key: &Q) -> Option + where + K: Borrow, + Q: std::hash::Hash + Eq, + { + let index = self + .keys + .iter() + .enumerate() + .find_map(|(i, k)| (k.borrow() == key).then(|| i))?; + self.keys.remove(index); + Some(self.values.remove(index)) + } + + pub(crate) fn is_empty(&self) -> bool { + self.keys.is_empty() + } + + pub fn entry(&mut self, key: K) -> Entry { + for (index, existing) in self.keys.iter().enumerate() { + if *existing == key { + return Entry::Occupied(OccupiedEntry { v: self, index }); + } + } + Entry::Vacant(VacantEntry { v: self, key }) + } + + pub fn get(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: std::hash::Hash + Eq, + { + for (index, existing) in self.keys.iter().enumerate() { + if existing.borrow() == k { + return Some(&self.values[index]); + } + } + None + } + + pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> + where + K: Borrow, + Q: std::hash::Hash + Eq, + { + for (index, existing) in self.keys.iter().enumerate() { + if existing.borrow() == k { + return Some(&mut self.values[index]); + } + } + None + } + + pub fn keys(&self) -> std::slice::Iter<'_, K> { + self.keys.iter() + } + + pub fn iter_mut(&mut self) -> IterMut { + IterMut { + keys: self.keys.iter_mut(), + values: self.values.iter_mut(), + } + } +} + +impl Default for FlatMap { + fn default() -> Self { + Self { + keys: Default::default(), + values: Default::default(), + } + } +} + +pub enum Entry<'a, K: 'a, V: 'a> { + Vacant(VacantEntry<'a, K, V>), + Occupied(OccupiedEntry<'a, K, V>), +} + +impl<'a, K: 'a, V: 'a> Entry<'a, K, V> { + pub fn or_insert(self, default: V) -> &'a mut V { + match self { + Entry::Occupied(entry) => &mut entry.v.values[entry.index], + Entry::Vacant(entry) => { + entry.v.keys.push(entry.key); + entry.v.values.push(default); + entry.v.values.last_mut().unwrap() + } + } + } + + pub fn or_insert_with V>(self, default: F) -> &'a mut V { + match self { + Entry::Occupied(entry) => &mut entry.v.values[entry.index], + Entry::Vacant(entry) => { + entry.v.keys.push(entry.key); + entry.v.values.push(default()); + entry.v.values.last_mut().unwrap() + } + } + } +} + +pub struct VacantEntry<'a, K: 'a, V: 'a> { + v: &'a mut FlatMap, + key: K, +} + +pub struct OccupiedEntry<'a, K: 'a, V: 'a> { + v: &'a mut FlatMap, + index: usize, +} + +pub struct IterMut<'a, K: 'a, V: 'a> { + keys: std::slice::IterMut<'a, K>, + values: std::slice::IterMut<'a, V>, +} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + match self.keys.next() { + Some(k) => { + let v = self.values.next().unwrap(); + Some((k, v)) + } + None => None, + } + } + fn size_hint(&self) -> (usize, Option) { + self.keys.size_hint() + } +} + +impl<'a, K, V> DoubleEndedIterator for IterMut<'a, K, V> { + fn next_back(&mut self) -> Option<(&'a K, &'a mut V)> { + match self.keys.next_back() { + Some(k) => { + let v = self.values.next_back().unwrap(); + Some((k, v)) + } + None => None, + } + } +} + +impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> {} diff --git a/src/util/mod.rs b/src/util/mod.rs index 7fb7f1e1bbb..73b7ad81a97 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,6 @@ #![allow(clippy::single_component_path_imports)] +mod flat_map; mod flat_set; mod fnv; mod graph; @@ -8,6 +9,8 @@ mod str_to_bool; pub use self::fnv::Key; +pub(crate) use self::flat_map::Entry; +pub(crate) use self::flat_map::FlatMap; pub(crate) use self::flat_set::FlatSet; pub(crate) use self::str_to_bool::str_to_bool; pub(crate) use self::str_to_bool::FALSE_LITERALS; From e7ced880e22b6b8368e619a7ee6271773c9b938f Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 11 Aug 2022 14:31:31 -0500 Subject: [PATCH 3/3] docs: Fix for 1.63 --- src/builder/value_parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/builder/value_parser.rs b/src/builder/value_parser.rs index bfe03fa419c..322b68d6770 100644 --- a/src/builder/value_parser.rs +++ b/src/builder/value_parser.rs @@ -12,7 +12,7 @@ use crate::parser::AnyValueId; /// use within an application. /// /// See -/// - [`value_parser!`] for automatically selecting an implementation for a given type +/// - [`value_parser!`][crate::value_parser] for automatically selecting an implementation for a given type /// - [`ValueParser::new`] for additional [`TypedValueParser`] that can be used /// /// # Example