From 184689fcfcc072a71eaa1759d46bac1fe70ab3ec Mon Sep 17 00:00:00 2001 From: Josh Kodroff Date: Sat, 18 Apr 2015 22:02:27 -0400 Subject: [PATCH 1/4] Break out Vocabulary/DefaultVocabulary to make Inflector more exensible. fixes #402 --- src/Humanizer.Tests/Humanizer.Tests.csproj | 1 + .../Inflections/DefaultVocabularyTests.cs | 171 ++++++++++++++++ src/Humanizer.Tests/InflectorTests.cs | 150 ++------------ src/Humanizer/Humanizer.csproj | 3 + src/Humanizer/Inflections/Plurality.cs | 21 ++ src/Humanizer/Inflections/Vocabularies.cs | 101 ++++++++++ src/Humanizer/Inflections/Vocabulary.cs | 172 ++++++++++++++++ src/Humanizer/InflectorExtensions.cs | 189 +----------------- 8 files changed, 486 insertions(+), 322 deletions(-) create mode 100644 src/Humanizer.Tests/Inflections/DefaultVocabularyTests.cs create mode 100644 src/Humanizer/Inflections/Plurality.cs create mode 100644 src/Humanizer/Inflections/Vocabularies.cs create mode 100644 src/Humanizer/Inflections/Vocabulary.cs diff --git a/src/Humanizer.Tests/Humanizer.Tests.csproj b/src/Humanizer.Tests/Humanizer.Tests.csproj index 2e9aa81c0..31a469395 100644 --- a/src/Humanizer.Tests/Humanizer.Tests.csproj +++ b/src/Humanizer.Tests/Humanizer.Tests.csproj @@ -69,6 +69,7 @@ + diff --git a/src/Humanizer.Tests/Inflections/DefaultVocabularyTests.cs b/src/Humanizer.Tests/Inflections/DefaultVocabularyTests.cs new file mode 100644 index 000000000..f40b53084 --- /dev/null +++ b/src/Humanizer.Tests/Inflections/DefaultVocabularyTests.cs @@ -0,0 +1,171 @@ +using System.Collections; +using System.Collections.Generic; +using Humanizer.Inflections; +using Xunit; +using Xunit.Extensions; + +namespace Humanizer.Tests.Inflections +{ + public class DefaultVocabularyTests + { + [Theory] + [ClassData(typeof(DefaultVocabularyTestSource))] + public void Pluralize(string singular, string plural) + { + Assert.Equal(plural, Vocabularies.Default.Pluralize(singular)); + } + + [Theory] + [ClassData(typeof(DefaultVocabularyTestSource))] + public void PluralizeWordsWithUnknownPlurality(string singular, string plural) + { + Assert.Equal(plural, Vocabularies.Default.Pluralize(plural, false)); + Assert.Equal(plural, Vocabularies.Default.Pluralize(singular, false)); + } + + [Theory] + [ClassData(typeof(DefaultVocabularyTestSource))] + public void Singularize(string singular, string plural) + { + Assert.Equal(singular, Vocabularies.Default.Singularize(plural)); + } + + [Theory] + [ClassData(typeof(DefaultVocabularyTestSource))] + public void SingularizeWordsWithUnknownSingularity(string singular, string plural) + { + Assert.Equal(singular, Vocabularies.Default.Singularize(singular, false)); + Assert.Equal(singular, Vocabularies.Default.Singularize(plural, false)); + } + } + + class DefaultVocabularyTestSource : IEnumerable + { + public IEnumerator GetEnumerator() + { + yield return new object[] { "search", "searches" }; + yield return new object[] { "switch", "switches" }; + yield return new object[] { "fix", "fixes" }; + yield return new object[] { "box", "boxes" }; + yield return new object[] { "process", "processes" }; + yield return new object[] { "address", "addresses" }; + yield return new object[] { "case", "cases" }; + yield return new object[] { "stack", "stacks" }; + yield return new object[] { "wish", "wishes" }; + yield return new object[] { "fish", "fish" }; + + yield return new object[] { "category", "categories" }; + yield return new object[] { "query", "queries" }; + yield return new object[] { "ability", "abilities" }; + yield return new object[] { "agency", "agencies" }; + yield return new object[] { "movie", "movies" }; + + yield return new object[] { "archive", "archives" }; + + yield return new object[] { "index", "indices" }; + + yield return new object[] { "wife", "wives" }; + yield return new object[] { "safe", "saves" }; + yield return new object[] { "half", "halves" }; + + yield return new object[] { "move", "moves" }; + + yield return new object[] { "salesperson", "salespeople" }; + yield return new object[] { "person", "people" }; + + yield return new object[] { "spokesman", "spokesmen" }; + yield return new object[] { "man", "men" }; + yield return new object[] { "woman", "women" }; + + yield return new object[] { "basis", "bases" }; + yield return new object[] { "diagnosis", "diagnoses" }; + + yield return new object[] { "datum", "data" }; + yield return new object[] { "medium", "media" }; + yield return new object[] { "analysis", "analyses" }; + + yield return new object[] { "node_child", "node_children" }; + yield return new object[] { "child", "children" }; + + yield return new object[] { "experience", "experiences" }; + yield return new object[] { "day", "days" }; + + yield return new object[] { "comment", "comments" }; + yield return new object[] { "foobar", "foobars" }; + yield return new object[] { "newsletter", "newsletters" }; + + yield return new object[] { "old_news", "old_news" }; + yield return new object[] { "news", "news" }; + + yield return new object[] { "series", "series" }; + yield return new object[] { "species", "species" }; + + yield return new object[] { "quiz", "quizzes" }; + + yield return new object[] { "perspective", "perspectives" }; + + yield return new object[] { "ox", "oxen" }; + yield return new object[] { "photo", "photos" }; + yield return new object[] { "buffalo", "buffaloes" }; + yield return new object[] { "tomato", "tomatoes" }; + yield return new object[] { "dwarf", "dwarves" }; + yield return new object[] { "elf", "elves" }; + yield return new object[] { "information", "information" }; + yield return new object[] { "equipment", "equipment" }; + yield return new object[] { "bus", "buses" }; + yield return new object[] { "status", "statuses" }; + yield return new object[] { "status_code", "status_codes" }; + yield return new object[] { "mouse", "mice" }; + + yield return new object[] { "louse", "lice" }; + yield return new object[] { "house", "houses" }; + yield return new object[] { "octopus", "octopi" }; + yield return new object[] { "virus", "viri" }; + yield return new object[] { "alias", "aliases" }; + yield return new object[] { "portfolio", "portfolios" }; + yield return new object[] { "criterion", "criteria" }; + + yield return new object[] { "vertex", "vertices" }; + yield return new object[] { "matrix", "matrices" }; + + yield return new object[] { "axis", "axes" }; + yield return new object[] { "testis", "testes" }; + yield return new object[] { "crisis", "crises" }; + + yield return new object[] { "rice", "rice" }; + yield return new object[] { "shoe", "shoes" }; + + yield return new object[] { "horse", "horses" }; + yield return new object[] { "prize", "prizes" }; + yield return new object[] { "edge", "edges" }; + + /* Tests added by Bas Jansen */ + yield return new object[] { "goose", "geese" }; + yield return new object[] { "deer", "deer" }; + yield return new object[] { "sheep", "sheep" }; + yield return new object[] { "wolf", "wolves" }; + yield return new object[] { "volcano", "volcanoes" }; + yield return new object[] { "aircraft", "aircraft" }; + yield return new object[] { "alumna", "alumnae" }; + yield return new object[] { "alumnus", "alumni" }; + yield return new object[] { "fungus", "fungi" }; + + yield return new object[] { "wave", "waves" }; + + yield return new object[] { "campus", "campuses" }; + + yield return new object[] { "is", "are" }; + + // Units of measurement: + yield return new object[] { "oz", "oz" }; + yield return new object[] { "tsp", "tsp" }; + yield return new object[] { "ml", "ml" }; + yield return new object[] { "l", "l" }; + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +} \ No newline at end of file diff --git a/src/Humanizer.Tests/InflectorTests.cs b/src/Humanizer.Tests/InflectorTests.cs index 071a13fa7..be4592412 100644 --- a/src/Humanizer.Tests/InflectorTests.cs +++ b/src/Humanizer.Tests/InflectorTests.cs @@ -21,45 +21,47 @@ //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -using System.Collections; using System.Collections.Generic; +using Humanizer.Inflections; using Xunit; using Xunit.Extensions; namespace Humanizer.Tests { - public class InflectorTests + public class InflectorTests { public readonly IList PluralTestData = new List(); + // All singular/pluralize tests just need to demonstrate that they're + // delegating to the default vocab. [Theory] - [ClassData(typeof(PluralTestSource))] + [InlineData("mouse", "mice")] public void Pluralize(string singular, string plural) { - Assert.Equal(plural, singular.Pluralize()); + Assert.Equal(plural, Vocabularies.Default.Pluralize(singular)); } [Theory] - [ClassData(typeof(PluralTestSource))] + [InlineData("mouse", "mice")] public void PluralizeWordsWithUnknownPlurality(string singular, string plural) { - Assert.Equal(plural, plural.Pluralize(false)); - Assert.Equal(plural, singular.Pluralize(false)); + Assert.Equal(plural, Vocabularies.Default.Pluralize(singular, false)); + Assert.Equal(plural, Vocabularies.Default.Pluralize(plural, false)); } [Theory] - [ClassData(typeof(PluralTestSource))] + [InlineData("mouse", "mice")] public void Singularize(string singular, string plural) { - Assert.Equal(singular, plural.Singularize()); + Assert.Equal(singular, Vocabularies.Default.Singularize(plural)); } [Theory] - [ClassData(typeof(PluralTestSource))] + [InlineData("mouse", "mice")] public void SingularizeWordsWithUnknownSingularity(string singular, string plural) { - Assert.Equal(singular, singular.Singularize(false)); - Assert.Equal(singular, plural.Singularize(false)); + Assert.Equal(singular, Vocabularies.Default.Singularize(singular, false)); + Assert.Equal(singular, Vocabularies.Default.Singularize(plural, false)); } //Uppercases individual words and removes some characters @@ -135,128 +137,4 @@ public void Underscore(string input, string expectedOuput) Assert.Equal(expectedOuput, input.Underscore()); } } - - class PluralTestSource : IEnumerable - { - public IEnumerator GetEnumerator() - { - yield return new object[] {"search", "searches"}; - yield return new object[] {"switch", "switches"}; - yield return new object[] {"fix", "fixes"}; - yield return new object[] {"box", "boxes"}; - yield return new object[] {"process", "processes"}; - yield return new object[] {"address", "addresses"}; - yield return new object[] {"case", "cases"}; - yield return new object[] {"stack", "stacks"}; - yield return new object[] {"wish", "wishes"}; - yield return new object[] {"fish", "fish"}; - - yield return new object[] {"category", "categories"}; - yield return new object[] {"query", "queries"}; - yield return new object[] {"ability", "abilities"}; - yield return new object[] {"agency", "agencies"}; - yield return new object[] {"movie", "movies"}; - - yield return new object[] {"archive", "archives"}; - - yield return new object[] {"index", "indices"}; - - yield return new object[] {"wife", "wives"}; - yield return new object[] {"safe", "saves"}; - yield return new object[] {"half", "halves"}; - - yield return new object[] {"move", "moves"}; - - yield return new object[] {"salesperson", "salespeople"}; - yield return new object[] {"person", "people"}; - - yield return new object[] {"spokesman", "spokesmen"}; - yield return new object[] {"man", "men"}; - yield return new object[] {"woman", "women"}; - - yield return new object[] {"basis", "bases"}; - yield return new object[] {"diagnosis", "diagnoses"}; - - yield return new object[] {"datum", "data"}; - yield return new object[] {"medium", "media"}; - yield return new object[] {"analysis", "analyses"}; - - yield return new object[] {"node_child", "node_children"}; - yield return new object[] {"child", "children"}; - - yield return new object[] {"experience", "experiences"}; - yield return new object[] {"day", "days"}; - - yield return new object[] {"comment", "comments"}; - yield return new object[] {"foobar", "foobars"}; - yield return new object[] {"newsletter", "newsletters"}; - - yield return new object[] {"old_news", "old_news"}; - yield return new object[] {"news", "news"}; - - yield return new object[] {"series", "series"}; - yield return new object[] {"species", "species"}; - - yield return new object[] {"quiz", "quizzes"}; - - yield return new object[] {"perspective", "perspectives"}; - - yield return new object[] {"ox", "oxen"}; - yield return new object[] {"photo", "photos"}; - yield return new object[] {"buffalo", "buffaloes"}; - yield return new object[] {"tomato", "tomatoes"}; - yield return new object[] {"dwarf", "dwarves"}; - yield return new object[] {"elf", "elves"}; - yield return new object[] {"information", "information"}; - yield return new object[] {"equipment", "equipment"}; - yield return new object[] {"bus", "buses"}; - yield return new object[] {"status", "statuses"}; - yield return new object[] {"status_code", "status_codes"}; - yield return new object[] {"mouse", "mice"}; - - yield return new object[] {"louse", "lice"}; - yield return new object[] {"house", "houses"}; - yield return new object[] {"octopus", "octopi"}; - yield return new object[] {"virus", "viri"}; - yield return new object[] {"alias", "aliases"}; - yield return new object[] {"portfolio", "portfolios"}; - yield return new object[] {"criterion", "criteria"}; - - yield return new object[] {"vertex", "vertices"}; - yield return new object[] {"matrix", "matrices"}; - - yield return new object[] {"axis", "axes"}; - yield return new object[] {"testis", "testes"}; - yield return new object[] {"crisis", "crises"}; - - yield return new object[] {"rice", "rice"}; - yield return new object[] {"shoe", "shoes"}; - - yield return new object[] {"horse", "horses"}; - yield return new object[] {"prize", "prizes"}; - yield return new object[] {"edge", "edges"}; - - /* Tests added by Bas Jansen */ - yield return new object[] {"goose", "geese"}; - yield return new object[] {"deer", "deer"}; - yield return new object[] {"sheep", "sheep"}; - yield return new object[] {"wolf", "wolves"}; - yield return new object[] {"volcano", "volcanoes"}; - yield return new object[] {"aircraft", "aircraft"}; - yield return new object[] {"alumna", "alumnae"}; - yield return new object[] {"alumnus", "alumni"}; - yield return new object[] {"fungus", "fungi"}; - - yield return new object[] {"wave","waves"}; - - yield return new object[] {"campus", "campuses"}; - - yield return new object[] { "is", "are" }; - } - - IEnumerator IEnumerable.GetEnumerator() - { - return GetEnumerator(); - } - } } diff --git a/src/Humanizer/Humanizer.csproj b/src/Humanizer/Humanizer.csproj index a9501157f..b6d878c46 100644 --- a/src/Humanizer/Humanizer.csproj +++ b/src/Humanizer/Humanizer.csproj @@ -57,6 +57,9 @@ + + + diff --git a/src/Humanizer/Inflections/Plurality.cs b/src/Humanizer/Inflections/Plurality.cs new file mode 100644 index 000000000..426b50abd --- /dev/null +++ b/src/Humanizer/Inflections/Plurality.cs @@ -0,0 +1,21 @@ +namespace Humanizer.Inflections +{ + /// + /// Provides hint for Humanizer as to whether a word is singular, plural or with unknown plurality + /// + public enum Plurality + { + /// + /// The word is singular + /// + Singular, + /// + /// The word is plural + /// + Plural, + /// + /// I am unsure of the plurality + /// + CouldBeEither + } +} \ No newline at end of file diff --git a/src/Humanizer/Inflections/Vocabularies.cs b/src/Humanizer/Inflections/Vocabularies.cs new file mode 100644 index 000000000..b2c63f02b --- /dev/null +++ b/src/Humanizer/Inflections/Vocabularies.cs @@ -0,0 +1,101 @@ +namespace Humanizer.Inflections +{ + public class Vocabularies + { + private static Vocabulary _default; + + /// + /// The default vocabulary used for singular/plural irregularities. + /// Rules can be added to this vocabulary and will be picked up by called to Singularize() and Pluralize(). + /// At this time, multiple vocabularies and removing existing rules are not supported. + /// + public static Vocabulary Default + { + get + { + if (_default == null) + BuildDefault(); + + return _default; + } + } + + private static void BuildDefault() + { + _default = new Vocabulary(); + + _default.AddPlural("$", "s"); + _default.AddPlural("s$", "s"); + _default.AddPlural("(ax|test)is$", "$1es"); + _default.AddPlural("(octop|vir|alumn|fung)us$", "$1i"); + _default.AddPlural("(alias|status)$", "$1es"); + _default.AddPlural("(bu)s$", "$1ses"); + _default.AddPlural("(buffal|tomat|volcan)o$", "$1oes"); + _default.AddPlural("([ti])um$", "$1a"); + _default.AddPlural("sis$", "ses"); + _default.AddPlural("(?:([^f])fe|([lr])f)$", "$1$2ves"); + _default.AddPlural("(hive)$", "$1s"); + _default.AddPlural("([^aeiouy]|qu)y$", "$1ies"); + _default.AddPlural("(x|ch|ss|sh)$", "$1es"); + _default.AddPlural("(matr|vert|ind)ix|ex$", "$1ices"); + _default.AddPlural("([m|l])ouse$", "$1ice"); + _default.AddPlural("^(ox)$", "$1en"); + _default.AddPlural("(quiz)$", "$1zes"); + _default.AddPlural("(campus)$", "$1es"); + _default.AddPlural("^is$", "are"); + + _default.AddSingular("s$", ""); + _default.AddSingular("(n)ews$", "$1ews"); + _default.AddSingular("([ti])a$", "$1um"); + _default.AddSingular("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis"); + _default.AddSingular("(^analy)ses$", "$1sis"); + _default.AddSingular("([^f])ves$", "$1fe"); + _default.AddSingular("(hive)s$", "$1"); + _default.AddSingular("(tive)s$", "$1"); + _default.AddSingular("([lr])ves$", "$1f"); + _default.AddSingular("([^aeiouy]|qu)ies$", "$1y"); + _default.AddSingular("(s)eries$", "$1eries"); + _default.AddSingular("(m)ovies$", "$1ovie"); + _default.AddSingular("(x|ch|ss|sh)es$", "$1"); + _default.AddSingular("([m|l])ice$", "$1ouse"); + _default.AddSingular("(bus)es$", "$1"); + _default.AddSingular("(o)es$", "$1"); + _default.AddSingular("(shoe)s$", "$1"); + _default.AddSingular("(cris|ax|test)es$", "$1is"); + _default.AddSingular("(octop|vir|alumn|fung)i$", "$1us"); + _default.AddSingular("(alias|status)es$", "$1"); + _default.AddSingular("^(ox)en", "$1"); + _default.AddSingular("(vert|ind)ices$", "$1ex"); + _default.AddSingular("(matr)ices$", "$1ix"); + _default.AddSingular("(quiz)zes$", "$1"); + _default.AddSingular("(campus)es$", "$1"); + _default.AddSingular("^are$", "is"); + + _default.AddIrregular("person", "people"); + _default.AddIrregular("man", "men"); + _default.AddIrregular("child", "children"); + _default.AddIrregular("sex", "sexes"); + _default.AddIrregular("move", "moves"); + _default.AddIrregular("goose", "geese"); + _default.AddIrregular("alumna", "alumnae"); + _default.AddIrregular("criterion", "criteria"); + _default.AddIrregular("wave", "waves"); + + _default.AddUncountable("equipment"); + _default.AddUncountable("information"); + _default.AddUncountable("rice"); + _default.AddUncountable("money"); + _default.AddUncountable("species"); + _default.AddUncountable("series"); + _default.AddUncountable("fish"); + _default.AddUncountable("sheep"); + _default.AddUncountable("deer"); + _default.AddUncountable("aircraft"); + _default.AddUncountable("oz"); + _default.AddUncountable("tsp"); + _default.AddUncountable("tbsp"); + _default.AddUncountable("ml"); + _default.AddUncountable("l"); + } + } +} \ No newline at end of file diff --git a/src/Humanizer/Inflections/Vocabulary.cs b/src/Humanizer/Inflections/Vocabulary.cs new file mode 100644 index 000000000..a2ec0fbbc --- /dev/null +++ b/src/Humanizer/Inflections/Vocabulary.cs @@ -0,0 +1,172 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; + +namespace Humanizer.Inflections +{ + /// + /// A container for exceptions to simple pluralization/singularization rules. + /// Vocabularies.Default contains an extensive list of rules for US English. + /// At this time, multiple vocabularies and removing existing rules are not supported. + /// + public class Vocabulary + { + private readonly List _plurals = new List(); + private readonly List _singulars = new List(); + private readonly List _uncountables = new List(); + + /// + /// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx, e.g. "person" and "people". + /// + /// The singular form of the irregular word, e.g. "person". + /// The plural form of the irregular word, e.g. "people". + public void AddIrregular(string singular, string plural) + { + AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1)); + AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1)); + } + + /// + /// Adds an uncountable word to the vocabulary, e.g. "fish". Will be ignored when plurality is changed. + /// + /// Word to be added to the list of uncountables. + public void AddUncountable(string word) + { + _uncountables.Add(word.ToLower()); + } + + /// + /// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses" + /// + /// RegEx to be matched, case insensitive, e.g. "(bus)es$" + /// RegEx replacement e.g. "$1" + public void AddPlural(string rule, string replacement) + { + _plurals.Add(new Rule(rule, replacement)); + } + + /// + /// Adds a word to the vocabulary that does not follow trivial rules for singularization, e.g. "vertices/indices -> "vertex/index" + /// + /// RegEx to be matched, case insensitive, e.g. ""(vert|ind)ices$"" + /// RegEx replacement e.g. "$1ex" + public void AddSingular(string rule, string replacement) + { + _singulars.Add(new Rule(rule, replacement)); + } + + /// + /// Pluralizes the provided input considering irregular words + /// + /// Word to be pluralized + /// Normally you call Pluralize on singular words; but if you're unsure call it with Plurality.CouldBeEither + /// + [Obsolete("Use string.Pluralize(bool) instead. This method will be removed in next major release.")] + public string Pluralize(string word, Plurality plurality) + { + return plurality == Plurality.Plural ? word : Pluralize(word, inputIsKnownToBeSingular: false); + } + + /// + /// Pluralizes the provided input considering irregular words + /// + /// Word to be pluralized + /// Normally you call Pluralize on singular words; but if you're unsure call it with false + /// + public string Pluralize(string word, bool inputIsKnownToBeSingular = true) + { + var result = ApplyRules(_plurals, word); + + if (inputIsKnownToBeSingular) + return result; + + var asSingular = ApplyRules(_singulars, word); + var asSingularAsPlural = ApplyRules(_plurals, asSingular); + if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word) + return word; + + return result; + } + + /// + /// Singularizes the provided input considering irregular words + /// + /// Word to be singularized + /// Normally you call Singularize on plural words; but if you're unsure call it with Plurality.CouldBeEither + /// + [Obsolete("Use string.Singularize(bool) instead. This method will be removed in next major release.")] + public string Singularize(string word, Plurality plurality) + { + return plurality == Plurality.Singular ? word : Singularize(word, inputIsKnownToBePlural: false); + } + + /// + /// Singularizes the provided input considering irregular words + /// + /// Word to be singularized + /// Normally you call Singularize on plural words; but if you're unsure call it with false + /// + public string Singularize(string word, bool inputIsKnownToBePlural = true) + { + + var result = ApplyRules(_singulars, word); + + if (inputIsKnownToBePlural) + return result; + + // the Plurality is unknown so we should check all possibilities + var asPlural = ApplyRules(_plurals, word); + var asPluralAsSingular = ApplyRules(_singulars, asPlural); + if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word) + return word; + + return result ?? word; + } + + private string ApplyRules(IList rules, string word) + { + if (word == null) + return null; + + if (IsUncountable(word)) + return word; + + var result = word; + for (int i = rules.Count - 1; i >= 0; i--) + { + if ((result = rules[i].Apply(word)) != null) + { + break; + } + } + return result; + } + + private bool IsUncountable(string word) + { + return _uncountables.Contains(word.ToLower()); + } + + private class Rule + { + private readonly Regex _regex; + private readonly string _replacement; + + public Rule(string pattern, string replacement) + { + _regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptionsUtil.Compiled); + _replacement = replacement; + } + + public string Apply(string word) + { + if (!_regex.IsMatch(word)) + { + return null; + } + + return _regex.Replace(word, _replacement); + } + } + } +} \ No newline at end of file diff --git a/src/Humanizer/InflectorExtensions.cs b/src/Humanizer/InflectorExtensions.cs index d31e5d712..31058aadf 100644 --- a/src/Humanizer/InflectorExtensions.cs +++ b/src/Humanizer/InflectorExtensions.cs @@ -22,153 +22,16 @@ //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. using System; -using System.Collections.Generic; using System.Text.RegularExpressions; +using Humanizer.Inflections; namespace Humanizer { - /// - /// Provides hint for Humanizer as to whether a word is singular, plural or with unknown plurality - /// - public enum Plurality - { - /// - /// The word is singular - /// - Singular, - /// - /// The word is plural - /// - Plural, - /// - /// I am unsure of the plurality - /// - CouldBeEither - } - /// /// Inflector extensions /// public static class InflectorExtensions { - static InflectorExtensions() - { - AddPlural("$", "s"); - AddPlural("s$", "s"); - AddPlural("(ax|test)is$", "$1es"); - AddPlural("(octop|vir|alumn|fung)us$", "$1i"); - AddPlural("(alias|status)$", "$1es"); - AddPlural("(bu)s$", "$1ses"); - AddPlural("(buffal|tomat|volcan)o$", "$1oes"); - AddPlural("([ti])um$", "$1a"); - AddPlural("sis$", "ses"); - AddPlural("(?:([^f])fe|([lr])f)$", "$1$2ves"); - AddPlural("(hive)$", "$1s"); - AddPlural("([^aeiouy]|qu)y$", "$1ies"); - AddPlural("(x|ch|ss|sh)$", "$1es"); - AddPlural("(matr|vert|ind)ix|ex$", "$1ices"); - AddPlural("([m|l])ouse$", "$1ice"); - AddPlural("^(ox)$", "$1en"); - AddPlural("(quiz)$", "$1zes"); - AddPlural("(campus)$", "$1es"); - AddPlural("^is$", "are"); - - AddSingular("s$", ""); - AddSingular("(n)ews$", "$1ews"); - AddSingular("([ti])a$", "$1um"); - AddSingular("((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis"); - AddSingular("(^analy)ses$", "$1sis"); - AddSingular("([^f])ves$", "$1fe"); - AddSingular("(hive)s$", "$1"); - AddSingular("(tive)s$", "$1"); - AddSingular("([lr])ves$", "$1f"); - AddSingular("([^aeiouy]|qu)ies$", "$1y"); - AddSingular("(s)eries$", "$1eries"); - AddSingular("(m)ovies$", "$1ovie"); - AddSingular("(x|ch|ss|sh)es$", "$1"); - AddSingular("([m|l])ice$", "$1ouse"); - AddSingular("(bus)es$", "$1"); - AddSingular("(o)es$", "$1"); - AddSingular("(shoe)s$", "$1"); - AddSingular("(cris|ax|test)es$", "$1is"); - AddSingular("(octop|vir|alumn|fung)i$", "$1us"); - AddSingular("(alias|status)es$", "$1"); - AddSingular("^(ox)en", "$1"); - AddSingular("(vert|ind)ices$", "$1ex"); - AddSingular("(matr)ices$", "$1ix"); - AddSingular("(quiz)zes$", "$1"); - AddSingular("(campus)es$", "$1"); - AddSingular("^are$", "is"); - - AddIrregular("person", "people"); - AddIrregular("man", "men"); - AddIrregular("child", "children"); - AddIrregular("sex", "sexes"); - AddIrregular("move", "moves"); - AddIrregular("goose", "geese"); - AddIrregular("alumna", "alumnae"); - AddIrregular("criterion", "criteria"); - AddIrregular("wave","waves"); - - AddUncountable("equipment"); - AddUncountable("information"); - AddUncountable("rice"); - AddUncountable("money"); - AddUncountable("species"); - AddUncountable("series"); - AddUncountable("fish"); - AddUncountable("sheep"); - AddUncountable("deer"); - AddUncountable("aircraft"); - } - - private class Rule - { - private readonly Regex _regex; - private readonly string _replacement; - - public Rule(string pattern, string replacement) - { - _regex = new Regex(pattern, RegexOptions.IgnoreCase | RegexOptionsUtil.Compiled); - _replacement = replacement; - } - - public string Apply(string word) - { - if (!_regex.IsMatch(word)) - { - return null; - } - - return _regex.Replace(word, _replacement); - } - } - - private static void AddIrregular(string singular, string plural) - { - AddPlural("(" + singular[0] + ")" + singular.Substring(1) + "$", "$1" + plural.Substring(1)); - AddSingular("(" + plural[0] + ")" + plural.Substring(1) + "$", "$1" + singular.Substring(1)); - } - - private static void AddUncountable(string word) - { - Uncountables.Add(word.ToLower()); - } - - private static void AddPlural(string rule, string replacement) - { - Plurals.Add(new Rule(rule, replacement)); - } - - private static void AddSingular(string rule, string replacement) - { - Singulars.Add(new Rule(rule, replacement)); - } - - private static readonly List Plurals = new List(); - private static readonly List Singulars = new List(); - private static readonly List Uncountables = new List(); - /// /// Pluralizes the provided input considering irregular words /// @@ -189,17 +52,7 @@ public static string Pluralize(this string word, Plurality plurality) /// public static string Pluralize(this string word, bool inputIsKnownToBeSingular = true) { - var result = ApplyRules(Plurals, word); - - if (inputIsKnownToBeSingular) - return result; - - var asSingular = ApplyRules(Singulars, word); - var asSingularAsPlural = ApplyRules(Plurals, asSingular); - if (asSingular != null && asSingular != word && asSingular + "s" != word && asSingularAsPlural == word && result != word) - return word; - - return result; + return Vocabularies.Default.Pluralize(word, inputIsKnownToBeSingular); } /// @@ -222,43 +75,7 @@ public static string Singularize(this string word, Plurality plurality) /// public static string Singularize(this string word, bool inputIsKnownToBePlural = true) { - - var result = ApplyRules(Singulars, word); - - if (inputIsKnownToBePlural) - return result; - - // the Plurality is unknown so we should check all possibilities - var asPlural = ApplyRules(Plurals, word); - var asPluralAsSingular = ApplyRules(Singulars, asPlural); - if (asPlural != word && word + "s" != asPlural && asPluralAsSingular == word && result != word) - return word; - - return result ?? word; - } - - private static string ApplyRules(IList rules, string word) - { - if (word == null) - return null; - - if (IsUncountable(word)) - return word; - - var result = word; - for (int i = rules.Count - 1; i >= 0; i--) - { - if ((result = rules[i].Apply(word)) != null) - { - break; - } - } - return result; - } - - private static bool IsUncountable(string word) - { - return Uncountables.Contains(word.ToLower()); + return Vocabularies.Default.Singularize(word, inputIsKnownToBePlural); } /// From ea4f018b5021289ffc226e194f119774f3bbeee5 Mon Sep 17 00:00:00 2001 From: Josh Kodroff Date: Sat, 18 Apr 2015 22:12:14 -0400 Subject: [PATCH 2/4] Add missing XML comment. --- src/Humanizer/Inflections/Vocabularies.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Humanizer/Inflections/Vocabularies.cs b/src/Humanizer/Inflections/Vocabularies.cs index b2c63f02b..d5d8f2488 100644 --- a/src/Humanizer/Inflections/Vocabularies.cs +++ b/src/Humanizer/Inflections/Vocabularies.cs @@ -1,5 +1,8 @@ namespace Humanizer.Inflections { + /// + /// Container for registered Vocabularies. At present, only a single vocabulary is supported: Default. + /// public class Vocabularies { private static Vocabulary _default; From 23f762311a8735418f21eaddcc7794c8869c46e4 Mon Sep 17 00:00:00 2001 From: Josh Kodroff Date: Fri, 8 May 2015 10:52:37 -0400 Subject: [PATCH 3/4] Add documentation, fix broken tests, move default vocab tests to inflector tests. --- readme.md | 20 ++ release_notes.md | 1 + ...provalTest.approve_public_api.approved.txt | 17 ++ src/Humanizer.Tests/Humanizer.Tests.csproj | 5 +- .../Inflections/DefaultVocabularyTests.cs | 171 ------------------ src/Humanizer.Tests/InflectorTests.cs | 143 ++++++++++++++- src/Humanizer.sln.DotSettings | 4 +- src/Humanizer/Humanizer.csproj | 2 +- src/Humanizer/Inflections/Vocabulary.cs | 27 +-- src/Humanizer/{Inflections => }/Plurality.cs | 2 +- 10 files changed, 183 insertions(+), 209 deletions(-) delete mode 100644 src/Humanizer.Tests/Inflections/DefaultVocabularyTests.cs rename src/Humanizer/{Inflections => }/Plurality.cs (93%) diff --git a/readme.md b/readme.md index 1839ab43a..3d4b7972d 100644 --- a/readme.md +++ b/readme.md @@ -18,6 +18,7 @@ Humanizer meets all your .NET needs for manipulating and displaying strings, enu - [Inflector methods](#inflector-methods) - [Pluralize](#pluralize) - [Singularize](#singularize) + - [Adding Words](#adding-words) - [ToQuantity](#toquantity) - [Ordinalize](#ordinalize) - [Titleize](#titleize) @@ -459,6 +460,25 @@ Normally you would call `Singularize` on a plural word but if you're unsure abou The overload of `Singularize` with `plurality` argument is obsolete and will be removed in next major release. +##Adding Words +Sometimes, you may need to add a rule from the singularization/pluralization vocabulary (the examples below are already in the `DefaultVocabluary` used by `Inflector`): + +```C# +// Adds a word to the vocabulary which cannot easily be pluralized/singularized by RegEx: +Vocabularies.Default.AddIrregular("person", "people"); + +// Adds an uncountable word to the vocabulary. Will be ignored when plurality is changed: +Vocabularies.Default.AddUncountable("fish"); + +// Adds a rule to the vocabulary that does not follow trivial rules for pluralization, e.g. "bus" -> "buses" +Vocabularies.Default.AddPlural("bus", "buses"); + +// Adds a rule to the vocabulary that does not follow trivial rules for singularization +// (will match both "vertices" -> "vertex" and "indices" -> "index"): +Vocabularies.Default.AddSingular("(vert|ind)ices$", "$1ex"); + +``` + ####ToQuantity Many times you want to call `Singularize` and `Pluralize` to prefix a word with a number; e.g. "2 requests", "3 men". `ToQuantity` prefixes the provided word with the number and accordingly pluralizes or singularizes the word: diff --git a/release_notes.md b/release_notes.md index 18274410a..9b99aa563 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,6 +1,7 @@ ###In Development [Commits](https://github.com/MehdiK/Humanizer/compare/v1.35.0...master) +- [#408](https://github.com/MehdiK/Humanizer/pull/408): Added support for adding/removing rules from singular/pluralization by adding `Vocabulary` class and `Vocabularies.Default`. ###v1.35.0 - 2015-03-29 - [#399](https://github.com/MehdiK/Humanizer/pull/399): Added support for humanizing DateTimeOffset diff --git a/src/Humanizer.Tests/ApiApprover/PublicApiApprovalTest.approve_public_api.approved.txt b/src/Humanizer.Tests/ApiApprover/PublicApiApprovalTest.approve_public_api.approved.txt index 46ef0744c..1536c1e6b 100644 --- a/src/Humanizer.Tests/ApiApprover/PublicApiApprovalTest.approve_public_api.approved.txt +++ b/src/Humanizer.Tests/ApiApprover/PublicApiApprovalTest.approve_public_api.approved.txt @@ -243,6 +243,23 @@ public class In public System.DateTime TheYear(int year) { } } +public class Vocabularies +{ + public Vocabularies() { } + public Humanizer.Inflections.Vocabulary Default { get; } +} + +public class Vocabulary +{ + public Vocabulary() { } + public void AddIrregular(string singular, string plural) { } + public void AddPlural(string rule, string replacement) { } + public void AddSingular(string rule, string replacement) { } + public void AddUncountable(string word) { } + public string Pluralize(string word, bool inputIsKnownToBeSingular) { } + public string Singularize(string word, bool inputIsKnownToBePlural) { } +} + public class InflectorExtensions { public string Camelize(string input) { } diff --git a/src/Humanizer.Tests/Humanizer.Tests.csproj b/src/Humanizer.Tests/Humanizer.Tests.csproj index 31a469395..3820b4293 100644 --- a/src/Humanizer.Tests/Humanizer.Tests.csproj +++ b/src/Humanizer.Tests/Humanizer.Tests.csproj @@ -69,7 +69,6 @@ - @@ -207,7 +206,9 @@ - + + +