From 40ac1d12b4c228d219d61a3e8e796d306f83dcd7 Mon Sep 17 00:00:00 2001 From: Teo Kajander Date: Mon, 23 Oct 2023 14:39:51 +0300 Subject: [PATCH] Add support of local numbering systems for number symbols - Load number symbols for multiple number systems from cldr data - Add numbering_systems and default_numbering_system properties for Locale - Use default numbering system of the locale for formatting number symbols Fixes partially issue https://github.com/python-babel/babel/issues/446 --- babel/core.py | 28 +++++++++++++++++++++++++--- babel/numbers.py | 18 ++++++++++++------ scripts/import_cldr.py | 25 ++++++++++++++++++++----- tests/test_core.py | 24 ++++++++++++++++++++++-- tests/test_numbers.py | 12 ++++++++++++ 5 files changed, 91 insertions(+), 16 deletions(-) diff --git a/babel/core.py b/babel/core.py index f63b97b65..52c13a159 100644 --- a/babel/core.py +++ b/babel/core.py @@ -149,7 +149,7 @@ class Locale: `Locale` objects provide access to a collection of locale data, such as territory and language names, number and date format patterns, and more: - >>> locale.number_symbols['decimal'] + >>> locale.number_symbols['latn']['decimal'] u'.' If a locale is requested for which no locale data is available, an @@ -625,16 +625,38 @@ def currency_symbols(self) -> localedata.LocaleDataDict: @property def number_symbols(self) -> localedata.LocaleDataDict: - """Symbols used in number formatting. + """Symbols used in number formatting by number system. .. note:: The format of the value returned may change between Babel versions. - >>> Locale('fr', 'FR').number_symbols['decimal'] + >>> Locale('fr', 'FR').number_symbols["latn"]['decimal'] u',' + >>> Locale('fa', 'IR').number_symbols["arabext"]['decimal'] + u'٫' + >>> Locale('fa', 'IR').number_symbols["latn"]['decimal'] + u'.' """ return self._data['number_symbols'] + @property + def numbering_systems(self) -> localedata.LocaleDataDict: + """Mapping of numbering systems. + >>> Locale('el', 'GR').numbering_systems['default'] + u'latn' + >>> Locale('el', 'GR').numbering_systems['traditional'] + [u'grek'] + """ + return self._data['numbering_systems'] + + @property + def default_numbering_system(self) -> str: + """The default numbering systems used by the locale. + >>> Locale('el', 'GR').default_numbering_system + u'latn' + """ + return self.numbering_systems['default'] + @property def decimal_formats(self) -> localedata.LocaleDataDict: """Locale patterns for decimal number formatting. diff --git a/babel/numbers.py b/babel/numbers.py index e0df40cce..11c769234 100644 --- a/babel/numbers.py +++ b/babel/numbers.py @@ -325,7 +325,8 @@ def get_decimal_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('decimal', '.') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('decimal', '.') def get_plus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: @@ -336,7 +337,8 @@ def get_plus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('plusSign', '+') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('plusSign', '+') def get_minus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: @@ -347,7 +349,8 @@ def get_minus_sign_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('minusSign', '-') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('minusSign', '-') def get_exponential_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: @@ -358,7 +361,8 @@ def get_exponential_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('exponential', 'E') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('exponential', 'E') def get_group_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: @@ -369,7 +373,8 @@ def get_group_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('group', ',') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('group', ',') def get_infinity_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: @@ -380,7 +385,8 @@ def get_infinity_symbol(locale: Locale | str | None = LC_NUMERIC) -> str: :param locale: the `Locale` object or locale identifier """ - return Locale.parse(locale).number_symbols.get('infinity', '∞') + parsed_locale = Locale.parse(locale) + return parsed_locale.number_symbols[parsed_locale.default_numbering_system].get('infinity', '∞') def format_number(number: float | decimal.Decimal | str, locale: Locale | str | None = LC_NUMERIC) -> str: diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index 493787407..8d9285cbe 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -420,6 +420,7 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): parse_interval_formats(data, calendar) parse_number_symbols(data, tree) + parse_numbering_systems(data, tree) parse_decimal_formats(data, tree) parse_scientific_formats(data, tree) parse_percent_formats(data, tree) @@ -739,14 +740,28 @@ def parse_calendar_datetime_skeletons(data, calendar): def parse_number_symbols(data, tree): number_symbols = data.setdefault('number_symbols', {}) - for symbol_elem in tree.findall('.//numbers/symbols'): - if _should_skip_number_elem(data, symbol_elem): # TODO: Support other number systems + for symbol_system_elem in tree.findall('.//numbers/symbols'): + number_system = symbol_system_elem.get('numberSystem') + if not number_system: continue - for elem in symbol_elem.findall('./*'): - if _should_skip_elem(elem): + for symbol_element in symbol_system_elem.findall('./*'): + if _should_skip_elem(symbol_element): continue - number_symbols[elem.tag] = str(elem.text) + + number_symbols.setdefault(number_system, {})[symbol_element.tag] = str(symbol_element.text) + + +def parse_numbering_systems(data, tree): + numbering_systems = data.setdefault('numbering_systems', {}) + default_number_system_node = tree.find('.//numbers/defaultNumberingSystem') + + if default_number_system_node is not None: + numbering_systems['default'] = str(default_number_system_node.text) + + other_numbering_systems_node = tree.find('.//numbers/otherNumberingSystems') or [] + for system in other_numbering_systems_node: + numbering_systems.setdefault(system.tag, []).append(str(system.text)) def parse_decimal_formats(data, tree): diff --git a/tests/test_core.py b/tests/test_core.py index aa370131d..130c44849 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -19,7 +19,7 @@ def test_locale_provides_access_to_cldr_locale_data(): locale = Locale('en', 'US') assert locale.display_name == 'English (United States)' - assert locale.number_symbols['decimal'] == '.' + assert locale.number_symbols["latn"]['decimal'] == '.' def test_locale_repr(): @@ -162,7 +162,27 @@ def test_currency_symbols_property(self): assert Locale('es', 'CO').currency_symbols['USD'] == 'US$' def test_number_symbols_property(self): - assert Locale('fr', 'FR').number_symbols['decimal'] == ',' + assert Locale('fr', 'FR').number_symbols["latn"]['decimal'] == ',' + assert Locale('ar', 'IL').number_symbols["arab"]['percentSign'] == '٪\u061c' + assert Locale('ar', 'IL').number_symbols["latn"]['percentSign'] == '\u200e%\u200e' + + def test_numbering_systems_property(self): + assert Locale('fr', 'FR').numbering_systems['default'] == 'latn' + assert Locale('fr', 'FR').numbering_systems['native'] == ['latn'] + assert 'traditional' not in Locale('fr', 'FR').numbering_systems + + assert Locale('el', 'GR').numbering_systems['default'] == 'latn' + assert Locale('el', 'GR').numbering_systems['native'] == ['latn'] + assert Locale('el', 'GR').numbering_systems['traditional'] == ['grek'] + + def test_default_numbering_systems_property(self): + assert Locale('en', 'GB').default_numbering_system == 'latn' + assert Locale('ar', 'EG').default_numbering_system == 'arab' + + @pytest.mark.all_locales + def test_all_locales_have_default_numbering_system(self, locale): + locale = Locale.parse(locale) + assert locale.default_numbering_system def test_decimal_formats(self): assert Locale('en', 'US').decimal_formats[None].pattern == '#,##0.###' diff --git a/tests/test_numbers.py b/tests/test_numbers.py index 3674453f6..93a3642a4 100644 --- a/tests/test_numbers.py +++ b/tests/test_numbers.py @@ -317,23 +317,35 @@ def test_get_territory_currencies(): def test_get_decimal_symbol(): assert numbers.get_decimal_symbol('en_US') == '.' + assert numbers.get_decimal_symbol('sv_SE') == ',' + assert numbers.get_decimal_symbol('ar_EG') == '٫' def test_get_plus_sign_symbol(): assert numbers.get_plus_sign_symbol('en_US') == '+' + assert numbers.get_plus_sign_symbol('he_IL') == '\u200e+' def test_get_minus_sign_symbol(): assert numbers.get_minus_sign_symbol('en_US') == '-' assert numbers.get_minus_sign_symbol('nl_NL') == '-' + assert numbers.get_minus_sign_symbol('he_IL') == '\u200e-' def test_get_exponential_symbol(): assert numbers.get_exponential_symbol('en_US') == 'E' + assert numbers.get_exponential_symbol('ja_JP') == 'E' + assert numbers.get_exponential_symbol('ar_EG') == 'اس' def test_get_group_symbol(): assert numbers.get_group_symbol('en_US') == ',' + assert numbers.get_group_symbol('sv') == "\xa0" + assert numbers.get_group_symbol('ar_EG') == '٬' + + +def test_get_infinity_symbol(): + assert numbers.get_infinity_symbol('en_US') == '∞' def test_decimal_precision():