Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

javascript extract improvements #939

Merged
59 changes: 53 additions & 6 deletions babel/messages/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
:license: BSD, see LICENSE for more details.
"""
import ast
import io
import os
from os.path import relpath
import sys
from os.path import relpath
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING

from babel.util import parse_encoding, parse_future_flags, pathmatch
Expand Down Expand Up @@ -532,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags):
return None


def extract_javascript(fileobj, keywords, comment_tags, options):
def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1):
"""Extract messages from JavaScript source code.

:param fileobj: the seekable, file-like object the messages should be
Expand All @@ -544,7 +545,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
:param options: a dictionary of additional options (optional)
Supported options are:
* `jsx` -- set to false to disable JSX/E4X support.
* `template_string` -- set to false to disable ES6 template string support.
* `template_string` -- if `True`, supports gettext(`key`)
* `parse_template_string` -- if `True` will parse the
contents of javascript
template strings.
:param lineno: line number offset (for parsing embedded fragments)
"""
from babel.messages.jslexer import Token, tokenize, unquote_string
funcname = message_lineno = None
Expand All @@ -556,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
last_token = None
call_stack = -1
dotted = any('.' in kw for kw in keywords)

for token in tokenize(
fileobj.read().decode(encoding),
jsx=options.get("jsx", True),
template_string=options.get("template_string", True),
dotted=dotted
dotted=dotted,
lineno=lineno
):
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
funcname and # have a keyword...
Expand All @@ -573,7 +578,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
call_stack = 0
token = Token('operator', ')', token.lineno)

if token.type == 'operator' and token.value == '(':
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno):
yield item

elif token.type == 'operator' and token.value == '(':
if funcname:
message_lineno = token.lineno
call_stack += 1
Expand Down Expand Up @@ -665,3 +674,41 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
funcname = token.value

last_token = token


def parse_template_string(template_string, keywords, comment_tags, options, lineno=1):
"""Parse JavaScript template string.

:param template_string: the template string to be parsed
:param keywords: a list of keywords (i.e. function names) that should be
recognized as translation functions
:param comment_tags: a list of translator tags to search for and include
in the results
:param options: a dictionary of additional options (optional)
:param lineno: starting line number (optional)
"""
from babel.messages.jslexer import line_re
prev_character = None
level = 0
inside_str = False
expression_contents = ''
for character in template_string[1:-1]:
if not inside_str and character in ('"', "'", '`'):
inside_str = character
elif inside_str == character and prev_character != r'\\':
inside_str = False
if level:
expression_contents += character
if not inside_str:
if character == '{' and prev_character == '$':
level += 1
elif level and character == '}':
level -= 1
if level == 0 and expression_contents:
expression_contents = expression_contents[0:-1]
fake_file_obj = io.BytesIO(expression_contents.encode())
for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno):
yield item
lineno += len(line_re.findall(expression_contents))
expression_contents = ''
prev_character = character
4 changes: 2 additions & 2 deletions babel/messages/jslexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,17 +151,17 @@ def unquote_string(string):
return u''.join(result)


def tokenize(source, jsx=True, dotted=True, template_string=True):
def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1):
johanneswilm marked this conversation as resolved.
Show resolved Hide resolved
"""
Tokenize JavaScript/JSX source. Returns a generator of tokens.

:param jsx: Enable (limited) JSX parsing.
:param dotted: Read dotted names as single name token.
:param template_string: Support ES6 template strings
:param lineno: starting line number (optional)
"""
may_divide = False
pos = 0
lineno = 1
end = len(source)
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)

Expand Down
39 changes: 39 additions & 0 deletions tests/messages/test_js_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,42 @@ def test_template_string_tag_usage():
)

assert messages == [(1, 'Tag template, wow', [], None)]


def test_inside_template_string():
buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Hello', [], None)]


def test_inside_template_string_with_linebreaks():
buf = BytesIO(b"""\
const userName = gettext('Username')
const msg = `${
gettext('Hello')
} ${userName} ${
gettext('Are you having a nice day?')
}`
const msg2 = `${
gettext('Howdy')
} ${userName} ${
gettext('Are you doing ok?')
}`
""")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)]


def test_inside_nested_template_string():
buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`")
messages = list(
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
)

assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]