MycroftAI · filips123 · Dec 11, 2020 · Dec 15, 2020 · Dec 16, 2020 · Dec 18, 2020
diff --git a/lingua_franca/format.py b/lingua_franca/format.py
@@ -18,31 +18,63 @@
 import os
 import re
 from collections import namedtuple
-from warnings import warn
 from os.path import join
-
+from warnings import warn
 
 from lingua_franca.bracket_expansion import SentenceTreeParser
 from lingua_franca.internal import localized_function, \
     populate_localized_function_dict, get_active_langs, \
-    get_full_lang_code, get_default_lang, get_default_loc, \
-    is_supported_full_lang, _raise_unsupported_language, \
-    UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \
+    get_full_lang_code, get_default_loc, \
+    is_supported_full_lang, UnsupportedLanguageError, NoneLangWarning, InvalidLangWarning, \
     FunctionNotLocalizedError
 
-
 _REGISTERED_FUNCTIONS = ("nice_number",
                          "nice_time",
                          "pronounce_number",
                          "nice_response",
-                         "nice_duration")
+                         "nice_duration",
+                         "get_plural_category",
+                         "get_plural_form")
 
 populate_localized_function_dict("format", langs=get_active_langs())
 
 
-def _translate_word(name, lang=''):
+def _translate_word(name, amount=1, lang=''):
     """ Helper to get word translations
 
+    Args:
+        name (str): Word name. Returned as the default value if not translated
+        amount (int): Amount of that word. Used for pluralization
+        lang (str): Language code, e.g. "en-us"
+
+    Returns:
+        str: translated version of resource name
+    """
+    from lingua_franca.internal import resolve_resource_file
+    if not lang:
+        if lang is None:
+            warn(NoneLangWarning)
+        lang = get_default_loc()
+
+    lang_code = lang if is_supported_full_lang(lang) else get_full_lang_code(lang)
+    filename = resolve_resource_file(join("text", lang_code, "pluralizations.json"))
+
+    if filename:
+        try:
+            with open(filename, 'r', encoding='utf8') as file:
+                translations = json.load(file)
+                return translations[name][get_plural_category(amount, lang=lang)]
+        except Exception:
+            pass
+    return _translate_word_legacy(name + ('s' if amount > 1 else ''), lang)  # fallback to legacy translation
+
+
+def _translate_word_legacy(name, lang=''):
+    """ Legacy helper to get word translations.
+
+    Do not use this function directly. Remove it once
+    all languages are migrated to the new format.
+
     Args:
         name (str): Word name. Returned as the default value if not translated
         lang (str): Language code, e.g. "en-us"
@@ -96,7 +128,7 @@ def cache(self, lang):
             except FileNotFoundError:
                 # Fallback to English formatting
                 with open(self.config_path + '/en-us/date_time.json',
-                          'r') as lang_config_file:
+                          'r', encoding='utf8') as lang_config_file:
                     self.lang_config[lang] = json.loads(
                         lang_config_file.read())
 
@@ -416,35 +448,23 @@ def nice_duration(duration, lang='', speech=True):
         out = ""
         if days > 0:
             out += pronounce_number(days, lang) + " "
-            if days == 1:
-                out += _translate_word("day", lang)
-            else:
-                out += _translate_word("days", lang)
+            out += _translate_word("day", amount=days, lang=lang)
             out += " "
         if hours > 0:
             if out:
                 out += " "
             out += pronounce_number(hours, lang) + " "
-            if hours == 1:
-                out += _translate_word("hour", lang)
-            else:
-                out += _translate_word("hours", lang)
+            out += _translate_word("hour", amount=hours, lang=lang)
         if minutes > 0:
             if out:
                 out += " "
             out += pronounce_number(minutes, lang) + " "
-            if minutes == 1:
-                out += _translate_word("minute", lang)
-            else:
-                out += _translate_word("minutes", lang)
+            out += _translate_word("minute", amount=minutes, lang=lang)
         if seconds > 0:
             if out:
                 out += " "
             out += pronounce_number(seconds, lang) + " "
-            if seconds == 1:
-                out += _translate_word("second", lang)
-            else:
-                out += _translate_word("seconds", lang)
+            out += _translate_word("second", amount=seconds, lang=lang)
     else:
         # M:SS, MM:SS, H:MM:SS, Dd H:MM:SS format
         out = ""
@@ -487,7 +507,7 @@ def join_list(items, connector, sep=None, lang=''):
     else:
         sep += " "
     return (sep.join(str(item) for item in items[:-1]) +
-            " " + _translate_word(connector, lang) +
+            " " + _translate_word(connector, lang=lang) +
             " " + items[-1])
 
 
@@ -544,3 +564,53 @@ def nice_response(text, lang=''):
         assertEqual(nice_response_de("10 ^ 2"),
                          "10 hoch 2")
     """
+
+
+@localized_function(run_own_code_on=[FunctionNotLocalizedError])
+def get_plural_category(amount, type="cardinal", lang=""):
+    """
+    Get plural category for the specified amount. Category can be one of
+    the categories specified by Unicode CLDR Plural Rules.
+
+    For more details:
+    http://cldr.unicode.org/index/cldr-spec/plural-rules
+    https://unicode-org.github.io/cldr-staging/charts/37/supplemental/language_plural_rules.html
+
+    Args:
+        amount(int or float or pair or list): The amount that is used to
+            determine the category. If type is range, it must contain
+            the start and end numbers.
+        type(str): Either cardinal (default), ordinal or range.
+        lang(str): The BCP-47 code for the language to use, None for default.
+    Returns:
+        (str): The plural category. Either zero, one, two, few, many or other.
+    """
+
+    if type == "cardinal":
+        warn(RuntimeWarning("Pluralization has not been implemented in the specified language. Falling back to "
+                            "basic singular and plural for compatibility with built-in functions."))
+
+        if amount == 1:
+            return "one"
+        else:
+            return "other"
+
+    else:
+        raise FunctionNotLocalizedError("This function has not been implemented in the specified language.")
+
+
+@localized_function()
+def get_plural_form(word, amount, type="cardinal", lang=""):
+    """
+    Get plural form of the specified word for the specified amount.
+
+    Args:
+        word(str): Word to be pluralized.
+        amount(int or float or pair or list): The amount that is used to
+            determine the category. If type is range, it must contain
+            the start and end numbers.
+        type(str): Either cardinal (default), ordinal or range.
+        lang(str): The BCP-47 code for the language to use, None for default.
+    Returns:
+        (str): Pluralized word.
+    """
diff --git a/lingua_franca/lang/format_en.py b/lingua_franca/lang/format_en.py
@@ -384,3 +384,30 @@ def nice_time_en(dt, speech=True, use_24hour=False, use_ampm=False):
                 speak += " a.m."
 
         return speak
+
+
+def get_plural_category_en(amount, type="cardinal"):
+    if type == "cardinal":
+        if amount == 1:
+            return "one"
+        else:
+            return "other"
+
+    elif type == "ordinal":
+        if amount % 10 == 1 and amount % 100 != 11:
+            return "one"
+        elif amount % 10 == 2 and amount % 100 != 12:
+            return "two"
+        elif amount % 10 == 3 and amount % 100 != 13:
+            return "few"
+        else:
+            return "other"
+
+    elif type == "range":
+        if not (isinstance(amount, tuple) or isinstance(amount, list)) or len(amount) != 2:
+            raise ValueError("Argument \"number\" must be tuple|list type with the start and end numbers")
+
+        return "other"
+
+    else:
+        return ValueError("Argument \"type\" must be cardinal|ordinal|range")
diff --git a/lingua_franca/lang/format_sl.py b/lingua_franca/lang/format_sl.py
@@ -417,3 +417,34 @@ def _hour_declension(hour):
                 speak += " a.m."
 
         return speak
+
+
+def get_plural_category_sl(amount, type="cardinal"):
+    if type == "cardinal":
+        if amount % 100 == 1 and amount % 1 == 0:
+            return "one"
+        elif amount % 100 == 2 and amount % 1 == 0:
+            return "two"
+        elif amount % 100 == 3 or amount % 100 == 4 or amount % 1 != 0:
+            return "few"
+        else:
+            return "other"
+
+    elif type == "ordinal":
+        return "other"
+
+    elif type == "range":
+        if not (isinstance(amount, tuple) or isinstance(amount, list)) or len(amount) != 2:
+            raise ValueError("Argument \"number\" must be tuple|list type with the start and end numbers")
+
+        end = get_plural_category_sl(amount[1])
+
+        if end == "one" or end == "few":
+            return "few"
+        elif end == "two":
+            return "two"
+        elif end == "other":
+            return "other"
+
+    else:
+        return ValueError("Argument \"type\" must be cardinal|ordinal|range")
diff --git a/lingua_franca/lang/parse_ca.py b/lingua_franca/lang/parse_ca.py
@@ -236,7 +236,7 @@ def extract_number_ca(text, short_scale=True, ordinals=False):
 
 
 class CatalanNormalizer(Normalizer):
-    with open(resolve_resource_file("text/ca-es/normalize.json")) as f:
+    with open(resolve_resource_file("text/ca-es/normalize.json"), encoding="utf8") as f:
         _default_config = json.load(f)
 
     @staticmethod

diff --git a/lingua_franca/lang/parse_cs.py b/lingua_franca/lang/parse_cs.py
@@ -1579,7 +1579,7 @@ def extract_numbers_cs(text, short_scale=True, ordinals=False):
 
 
 class CzechNormalizer(Normalizer):
-    with open(resolve_resource_file("text/cs-cz/normalize.json"), encoding='utf8') as f:
+    with open(resolve_resource_file("text/cs-cz/normalize.json"), encoding="utf8") as f:
         _default_config = json.load(f)
 
 

diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py
@@ -1470,7 +1470,7 @@ def extract_numbers_en(text, short_scale=True, ordinals=False):
 
 
 class EnglishNormalizer(Normalizer):
-    with open(resolve_resource_file("text/en-us/normalize.json")) as f:
+    with open(resolve_resource_file("text/en-us/normalize.json"), encoding="utf8") as f:
         _default_config = json.load(f)
 
     def numbers_to_digits(self, utterance):

diff --git a/lingua_franca/lang/parse_pt.py b/lingua_franca/lang/parse_pt.py
@@ -199,7 +199,7 @@ def extract_number_pt(text, short_scale=True, ordinals=False):
 
 
 class PortugueseNormalizer(Normalizer):
-    with open(resolve_resource_file("text/pt-pt/normalize.json")) as f:
+    with open(resolve_resource_file("text/pt-pt/normalize.json"), encoding="utf8") as f:
         _default_config = json.load(f)
 
     @staticmethod

diff --git a/lingua_franca/res/text/en-us/and.word b/lingua_franca/res/text/en-us/and.word
diff --git a/lingua_franca/res/text/en-us/day.word b/lingua_franca/res/text/en-us/day.word
diff --git a/lingua_franca/res/text/en-us/days.word b/lingua_franca/res/text/en-us/days.word
diff --git a/lingua_franca/res/text/en-us/hour.word b/lingua_franca/res/text/en-us/hour.word
diff --git a/lingua_franca/res/text/en-us/hours.word b/lingua_franca/res/text/en-us/hours.word
diff --git a/lingua_franca/res/text/en-us/minute.word b/lingua_franca/res/text/en-us/minute.word
diff --git a/lingua_franca/res/text/en-us/minutes.word b/lingua_franca/res/text/en-us/minutes.word
diff --git a/lingua_franca/res/text/en-us/or.word b/lingua_franca/res/text/en-us/or.word
diff --git a/lingua_franca/res/text/en-us/pluralizations.json b/lingua_franca/res/text/en-us/pluralizations.json
@@ -0,0 +1,24 @@
+{
+  "day": {
+    "one": "day",
+    "other": "days"
+  },
+  "hour": {
+    "one": "hour",
+    "other": "hours"
+  },
+  "minute": {
+    "one": "minute",
+    "other": "minutes"
+  },
+  "second": {
+    "one": "second",
+    "other": "seconds"
+  },
+  "and": {
+    "one": "and"
+  },
+  "or": {
+    "one": "or"
+  }
+}
diff --git a/lingua_franca/res/text/en-us/second.word b/lingua_franca/res/text/en-us/second.word
diff --git a/lingua_franca/res/text/en-us/seconds.word b/lingua_franca/res/text/en-us/seconds.word
diff --git a/lingua_franca/res/text/sl-si/and.word b/lingua_franca/res/text/sl-si/and.word
diff --git a/lingua_franca/res/text/sl-si/day.word b/lingua_franca/res/text/sl-si/day.word
diff --git a/lingua_franca/res/text/sl-si/days.word b/lingua_franca/res/text/sl-si/days.word
diff --git a/lingua_franca/res/text/sl-si/hour.word b/lingua_franca/res/text/sl-si/hour.word
diff --git a/lingua_franca/res/text/sl-si/hours.word b/lingua_franca/res/text/sl-si/hours.word
diff --git a/lingua_franca/res/text/sl-si/minute.word b/lingua_franca/res/text/sl-si/minute.word
diff --git a/lingua_franca/res/text/sl-si/minutes.word b/lingua_franca/res/text/sl-si/minutes.word
diff --git a/lingua_franca/res/text/sl-si/or.word b/lingua_franca/res/text/sl-si/or.word
diff --git a/lingua_franca/res/text/sl-si/pluralizations.json b/lingua_franca/res/text/sl-si/pluralizations.json
@@ -0,0 +1,32 @@
+{
+  "day": {
+    "one": "dan",
+    "two": "dneva",
+    "few": "dnevi",
+    "other": "dni"
+  },
+  "hour": {
+    "one": "ura",
+    "two": "uri",
+    "few": "ure",
+    "other": "ur"
+  },
+  "minute": {
+    "one": "minuta",
+    "two": "minuti",
+    "few": "minute",
+    "other": "minut"
+  },
+  "second": {
+    "one": "sekunda",
+    "two": "sekundi",
+    "few": "sekunde",
+    "other": "sekund"
+  },
+  "and": {
+    "one": "in"
+  },
+  "or": {
+    "one": "ali"
+  }
+}