Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JSX support for message extraction #310

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,8 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
"""
from babel.messages.jslexer import tokenize, unquote_string
funcname = message_lineno = None
Expand All @@ -472,7 +474,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
last_token = None
call_stack = -1

for token in tokenize(fileobj.read().decode(encoding)):
for token in tokenize(fileobj.read().decode(encoding), jsx=options.get("jsx", True)):
if token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
Expand Down
10 changes: 8 additions & 2 deletions babel/messages/jslexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
([eE][-+]?\d+)? |
(0x[a-fA-F0-9]+)
)''')),
('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)),
('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
('string', re.compile(r'''(?xs)(
'(?:[^'\\]*(?:\\.[^'\\]*)*)' |
Expand Down Expand Up @@ -127,8 +128,11 @@ def unquote_string(string):
return u''.join(result)


def tokenize(source):
"""Tokenize a JavaScript source. Returns a generator of tokens.
def tokenize(source, jsx=True):
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.

:param jsx: Enable (limited) JSX parsing.
"""
may_divide = False
pos = 0
Expand All @@ -138,6 +142,8 @@ def tokenize(source):
while pos < end:
# handle regular rules first
for token_type, rule in rules:
if not jsx and token_type and 'jsx' in token_type:
continue
match = rule.match(source, pos)
if match is not None:
break
Expand Down
91 changes: 0 additions & 91 deletions tests/messages/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,97 +388,6 @@ def test_extract_strip_comment_tags(self):
u'a prefix too'], messages[1][2])


class ExtractJavaScriptTestCase(unittest.TestCase):

def test_simple_extract(self):
buf = BytesIO(b"""\
msg1 = _('simple')
msg2 = gettext('simple')
msg3 = ngettext('s', 'p', 42)
""")
messages = \
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS,
[], {}))

self.assertEqual([(1, 'simple', [], None),
(2, 'simple', [], None),
(3, ('s', 'p'), [], None)], messages)

def test_various_calls(self):
buf = BytesIO(b"""\
msg1 = _(i18n_arg.replace(/"/, '"'))
msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2)
msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2)
msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2)
msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1))
msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1))
msg7 = _(hello.there)
msg8 = gettext('Rabbit')
msg9 = dgettext('wiki', model.addPage())
msg10 = dngettext(domain, 'Page', 'Pages', 3)
""")
messages = \
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [],
{}))
self.assertEqual([(5, (u'bunny', u'bunnies'), [], None),
(8, u'Rabbit', [], None),
(10, (u'Page', u'Pages'), [], None)], messages)

def test_message_with_line_comment(self):
buf = BytesIO(u"""\
// NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
self.assertEqual(u'Bonjour à tous', messages[0][2])
self.assertEqual([u'NOTE: hello'], messages[0][3])

def test_message_with_multiline_comment(self):
buf = BytesIO(u"""\
/* NOTE: hello
and bonjour
and servus */
msg = _('Bonjour à tous')
""".encode('utf-8'))
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
self.assertEqual(u'Bonjour à tous', messages[0][2])
self.assertEqual([u'NOTE: hello', 'and bonjour', ' and servus'], messages[0][3])

def test_ignore_function_definitions(self):
buf = BytesIO(b"""\
function gettext(value) {
return translations[language][value] || value;
}""")

messages = list(extract.extract_javascript(buf, ('gettext',), [], {}))
self.assertEqual(messages, [])

def test_misplaced_comments(self):
buf = BytesIO(b"""\
/* NOTE: this won't show up */
foo()

/* NOTE: this will */
msg = _('Something')

// NOTE: this will show up
// too.
msg = _('Something else')

// NOTE: but this won't
bar()

_('no comment here')
""")
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
self.assertEqual(u'Something', messages[0][2])
self.assertEqual([u'NOTE: this will'], messages[0][3])
self.assertEqual(u'Something else', messages[1][2])
self.assertEqual([u'NOTE: this will show up', 'too.'], messages[1][3])
self.assertEqual(u'no comment here', messages[2][2])
self.assertEqual([], messages[2][3])


class ExtractTestCase(unittest.TestCase):

def test_invalid_filter(self):
Expand Down
124 changes: 124 additions & 0 deletions tests/messages/test_js_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# -- encoding: UTF-8 --
import pytest
from babel._compat import BytesIO
from babel.messages import extract


def test_simple_extract():
buf = BytesIO(b"""\
msg1 = _('simple')
msg2 = gettext('simple')
msg3 = ngettext('s', 'p', 42)
""")
messages = \
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS,
[], {}))

assert messages == [(1, 'simple', [], None),
(2, 'simple', [], None),
(3, ('s', 'p'), [], None)]


def test_various_calls():
buf = BytesIO(b"""\
msg1 = _(i18n_arg.replace(/"/, '"'))
msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2)
msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2)
msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2)
msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1))
msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1))
msg7 = _(hello.there)
msg8 = gettext('Rabbit')
msg9 = dgettext('wiki', model.addPage())
msg10 = dngettext(domain, 'Page', 'Pages', 3)
""")
messages = \
list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [],
{}))
assert messages == [
(5, (u'bunny', u'bunnies'), [], None),
(8, u'Rabbit', [], None),
(10, (u'Page', u'Pages'), [], None)
]


def test_message_with_line_comment():
buf = BytesIO(u"""\
// NOTE: hello
msg = _('Bonjour à tous')
""".encode('utf-8'))
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
assert messages[0][2] == u'Bonjour à tous'
assert messages[0][3] == [u'NOTE: hello']


def test_message_with_multiline_comment():
buf = BytesIO(u"""\
/* NOTE: hello
and bonjour
and servus */
msg = _('Bonjour à tous')
""".encode('utf-8'))
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
assert messages[0][2] == u'Bonjour à tous'
assert messages[0][3] == [u'NOTE: hello', 'and bonjour', ' and servus']


def test_ignore_function_definitions():
buf = BytesIO(b"""\
function gettext(value) {
return translations[language][value] || value;
}""")

messages = list(extract.extract_javascript(buf, ('gettext',), [], {}))
assert not messages


def test_misplaced_comments():
buf = BytesIO(b"""\
/* NOTE: this won't show up */
foo()

/* NOTE: this will */
msg = _('Something')

// NOTE: this will show up
// too.
msg = _('Something else')

// NOTE: but this won't
bar()

_('no comment here')
""")
messages = list(extract.extract_javascript(buf, ('_',), ['NOTE:'], {}))
assert messages[0][2] == u'Something'
assert messages[0][3] == [u'NOTE: this will']
assert messages[1][2] == u'Something else'
assert messages[1][3] == [u'NOTE: this will show up', 'too.']
assert messages[2][2] == u'no comment here'
assert messages[2][3] == []


JSX_SOURCE = b"""
class Foo {
render() {
const value = gettext("hello");
return (
<option value="val1">{ i18n._('String1') }</option>
<option value="val2">{ i18n._('String 2') }</option>
<option value="val3">{ i18n._('String 3') }</option>
);
}
"""
EXPECTED_JSX_MESSAGES = ["hello", "String1", "String 2", "String 3"]


@pytest.mark.parametrize("jsx_enabled", (False, True))
def test_jsx_extraction(jsx_enabled):
buf = BytesIO(JSX_SOURCE)
messages = [m[2] for m in extract.extract_javascript(buf, ('_', 'gettext'), [], {"jsx": jsx_enabled})]
if jsx_enabled:
assert messages == EXPECTED_JSX_MESSAGES
else:
assert messages != EXPECTED_JSX_MESSAGES