Skip to content

Commit

Permalink
Compile noninteger TR35 operands to zeroes when emitting Gettext
Browse files Browse the repository at this point in the history
  • Loading branch information
akx committed Jan 4, 2016
1 parent 3aa3f29 commit 1090e42
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
30 changes: 22 additions & 8 deletions babel/plural.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,14 @@ def __init__(self, rules):
found = set()
self.abstract = []
for key, expr in sorted(list(rules)):
if key == 'other':
continue
if key not in _plural_tags:
raise ValueError('unknown tag %r' % key)
elif key in found:
raise ValueError('tag %r defined twice' % key)
found.add(key)
self.abstract.append((key, _Parser(expr).ast))
ast = _Parser(expr).ast
if ast:
self.abstract.append((key, ast))

def __repr__(self):
rules = self.rules
Expand Down Expand Up @@ -303,7 +303,7 @@ class RuleError(Exception):
.format(_VARS))),
('value', re.compile(r'\d+')),
('symbol', re.compile(r'%|,|!=|=')),
('ellipsis', re.compile(r'\.\.'))
('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
]


Expand Down Expand Up @@ -388,6 +388,11 @@ class _Parser(object):

def __init__(self, string):
self.tokens = tokenize_rule(string)
if not self.tokens:
# If the pattern is only samples, it's entirely possible
# no stream of tokens whatsoever is generated.
self.ast = None
return
self.ast = self.condition()
if self.tokens:
raise RuleError('Expected end of rule, got %r' %
Expand Down Expand Up @@ -480,6 +485,9 @@ def _unary_compiler(tmpl):
return lambda self, x: tmpl % self.compile(x)


compile_zero = lambda x: '0'


class _Compiler(object):
"""The compilers are able to transform the expressions into multiple
output formats.
Expand Down Expand Up @@ -526,6 +534,12 @@ def compile_relation(self, method, expr, range_list):
class _GettextCompiler(_Compiler):
"""Compile into a gettext plural expression."""

compile_i = _Compiler.compile_n
compile_v = compile_zero
compile_w = compile_zero
compile_f = compile_zero
compile_t = compile_zero

def compile_relation(self, method, expr, range_list):
rv = []
expr = self.compile(expr)
Expand All @@ -552,10 +566,10 @@ class _JavaScriptCompiler(_GettextCompiler):
# XXX: presently javascript does not support any of the
# fraction support and basically only deals with integers.
compile_i = lambda x: 'parseInt(n, 10)'
compile_v = lambda x: '0'
compile_w = lambda x: '0'
compile_f = lambda x: '0'
compile_t = lambda x: '0'
compile_v = compile_zero
compile_w = compile_zero
compile_f = compile_zero
compile_t = compile_zero

def compile_relation(self, method, expr, range_list):
code = _GettextCompiler.compile_relation(
Expand Down
16 changes: 15 additions & 1 deletion tests/test_plural.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import unittest
import pytest

from babel import plural
from babel import plural, localedata
from babel._compat import Decimal


Expand Down Expand Up @@ -254,3 +254,17 @@ def test_extract_operands(source, n, i, v, w, f, t):
source = Decimal(source) if isinstance(source, str) else source
assert (plural.extract_operands(source) ==
Decimal(n), i, v, w, f, t)


@pytest.mark.parametrize('locale', ('ru', 'pl'))
def test_gettext_compilation(locale):
# Test that new plural form elements introduced in recent CLDR versions
# are compiled "down" to `n` when emitting Gettext rules.
ru_rules = localedata.load(locale)['plural_form'].rules
chars = 'ivwft'
# Test that these rules are valid for this test; i.e. that they contain at least one
# of the gettext-unsupported characters.
assert any((" " + ch + " ") in rule for ch in chars for rule in ru_rules.values())
# Then test that the generated value indeed does not contain these.
ru_rules_gettext = plural.to_gettext(ru_rules)
assert not any(ch in ru_rules_gettext for ch in chars)

1 comment on commit 1090e42

@sils
Copy link
Member

@sils sils commented on 1090e42 Jan 22, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unack, as per discussion on https://gitter.im/python-babel/babel?at=56a2775884cccde9258ae31f this can and should be split up.

Please sign in to comment.