From 3ae55131230aa059374603b6fee0d9513a973961 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sun, 11 Feb 2024 00:44:39 +0100 Subject: [PATCH] Implement new CSS syntax draft --- tests/test_tinycss2.py | 7 +- tinycss2/parser.py | 166 +++++++++++++++++++++++------------------ 2 files changed, 99 insertions(+), 74 deletions(-) diff --git a/tests/test_tinycss2.py b/tests/test_tinycss2.py index d67e8b9..d07aa69 100644 --- a/tests/test_tinycss2.py +++ b/tests/test_tinycss2.py @@ -5,7 +5,7 @@ import pytest from tinycss2 import ( - parse_component_value_list, parse_declaration_list, + parse_blocks_contents, parse_component_value_list, parse_declaration_list, parse_one_component_value, parse_one_declaration, parse_one_rule, parse_rule_list, parse_stylesheet, parse_stylesheet_bytes, serialize) from tinycss2.ast import ( @@ -112,6 +112,11 @@ def test_declaration_list(input): return parse_declaration_list(input, **SKIP) +@json_test() +def test_blocks_contents(input): + return parse_blocks_contents(input, **SKIP) + + @json_test() def test_one_declaration(input): return parse_one_declaration(input, skip_comments=True) diff --git a/tinycss2/parser.py b/tinycss2/parser.py index a97aa55..11dd519 100644 --- a/tinycss2/parser.py +++ b/tinycss2/parser.py @@ -14,7 +14,6 @@ def _to_token_iterator(input, skip_comments=False): :returns: An iterator yielding :term:`component values`. """ - # Accept ASCII-only byte strings on Python 2, with implicit conversion. if isinstance(input, str): input = parse_component_value_list(input, skip_comments) return iter(input) @@ -85,7 +84,15 @@ def parse_one_declaration(input, skip_comments=False): return _parse_declaration(first_token, tokens) -def _parse_declaration(first_token, tokens): +def _consume_remnants(input, nested): + for token in input: + if token == ';': + return + elif nested and token == '}': + return + + +def _parse_declaration(first_token, tokens, nested=True): """Parse a declaration. Consume :obj:`tokens` until the end of the declaration or the first error. @@ -94,6 +101,8 @@ def _parse_declaration(first_token, tokens): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. + :type nested: :obj:`bool` + :param nested: Whether the declaration is nested or top-level. :returns: A :class:`~tinycss2.ast.Declaration` or :class:`~tinycss2.ast.ParseError`. @@ -101,21 +110,27 @@ def _parse_declaration(first_token, tokens): """ name = first_token if name.type != 'ident': - return ParseError(name.source_line, name.source_column, 'invalid', - 'Expected for declaration name, got %s.' - % name.type) + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + f'Expected for declaration name, got {name.type}.') colon = _next_significant(tokens) if colon is None: - return ParseError(name.source_line, name.source_column, 'invalid', - "Expected ':' after declaration name, got EOF") + _consume_remnants(tokens, nested) + return ParseError( + name.source_line, name.source_column, 'invalid', + "Expected ':' after declaration name, got EOF") elif colon != ':': - return ParseError(colon.source_line, colon.source_column, 'invalid', - "Expected ':' after declaration name, got %s." - % colon.type) + _consume_remnants(tokens, nested) + return ParseError( + colon.source_line, colon.source_column, 'invalid', + "Expected ':' after declaration name, got {colon.type}.") value = [] state = 'value' + contains_non_whitespace = False + contains_simple_block = False for i, token in enumerate(tokens): if state == 'value' and token == '!': state = 'bang' @@ -125,17 +140,33 @@ def _parse_declaration(first_token, tokens): state = 'important' elif token.type not in ('whitespace', 'comment'): state = 'value' + if token.type == '{} block': + if contains_non_whitespace: + contains_simple_block = True + else: + contains_non_whitespace = True + else: + contains_non_whitespace = True value.append(token) if state == 'important': del value[bang_position:] + # TODO: Handle custom property names + + if contains_simple_block and contains_non_whitespace: + return ParseError( + colon.source_line, colon.source_column, 'invalid', + 'Declaration contains {} block') + + # TODO: Handle unicode-range + return Declaration(name.source_line, name.source_column, name.value, name.lower_value, value, state == 'important') -def _consume_declaration_in_list(first_token, tokens): - """Like :func:`_parse_declaration`, but stop at the first ``;``.""" +def _consume_blocks_content(first_token, tokens): + """Consume declaration or nested rule.""" declaration_tokens = [] semicolon_token = [] if first_token != ';' and first_token.type != '{} block': @@ -146,18 +177,20 @@ def _consume_declaration_in_list(first_token, tokens): declaration_tokens.append(token) if token.type == '{} block': break - declaration = _parse_declaration(first_token, iter(declaration_tokens)) + declaration = _parse_declaration( + first_token, iter(declaration_tokens), nested=True) if declaration.type == 'declaration': return declaration else: tokens = chain(declaration_tokens, semicolon_token, tokens) - return _consume_rule(first_token, tokens, stop_token=';', nested=True) + return _consume_qualified_rule( + first_token, tokens, stop_token=';', nested=True) -def _consume_declaration_in_list_deprecated(first_token, tokens): +def _consume_declaration_in_list(first_token, tokens): """Like :func:`_parse_declaration`, but stop at the first ``;``. - Deprecated, use :func:`_consume_declaration_in_list` instead. + Deprecated, use :func:`_consume_blocks_content` instead. """ other_declaration_tokens = [] @@ -216,7 +249,7 @@ def parse_blocks_contents(input, skip_comments=False, skip_whitespace=False): elif token.type == 'at-keyword': result.append(_consume_at_rule(token, tokens)) elif token != ';': - result.append(_consume_declaration_in_list(token, tokens)) + result.append(_consume_blocks_content(token, tokens)) return result @@ -266,10 +299,9 @@ def parse_declaration_list(input, skip_comments=False, skip_whitespace=False): if not skip_comments: result.append(token) elif token.type == 'at-keyword': - result.append(_consume_at_rule_deprecated(token, tokens)) + result.append(_consume_at_rule(token, tokens)) elif token != ';': - result.append( - _consume_declaration_in_list_deprecated(token, tokens)) + result.append(_consume_declaration_in_list(token, tokens)) return result @@ -310,6 +342,9 @@ def parse_one_rule(input, skip_comments=False): def parse_rule_list(input, skip_comments=False, skip_whitespace=False): """Parse a non-top-level :diagram:`rule list`. + Deprecated and removed from CSS Syntax. Use :func:`parse_blocks_content` + instead. + This is used for parsing the :attr:`~tinycss2.ast.AtRule.content` of nested rules like ``@media``. This differs from :func:`parse_stylesheet` in that @@ -347,8 +382,7 @@ def parse_rule_list(input, skip_comments=False, skip_whitespace=False): if not skip_comments: result.append(token) else: - result.append(_consume_rule( - token, tokens, stop_token=';', nested=True)) + result.append(_consume_rule(token, tokens)) return result @@ -397,14 +431,7 @@ def parse_stylesheet(input, skip_comments=False, skip_whitespace=False): return result -def _rule_error(token, name): - """Create rule parse error raised because of given token.""" - return ParseError( - token.source_line, token.source_column, 'invalid', - f'{name} reached before {{}} block for a qualified rule.') - - -def _consume_rule(first_token, tokens, nested=False, stop_token=None): +def _consume_rule(first_token, tokens): """Parse a qualified rule or at-rule. Consume just enough of :obj:`tokens` for this rule. @@ -413,38 +440,15 @@ def _consume_rule(first_token, tokens, nested=False, stop_token=None): :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. - :type nested: :obj:`bool` - :param nested: Whether the rule is nested or top-level. - :type stop_token: :class:`~tinycss2.ast.Node` - :param stop_token: A token that ends rule parsing when met. :returns: A :class:`~tinycss2.ast.QualifiedRule`, :class:`~tinycss2.ast.AtRule`, or :class:`~tinycss2.ast.ParseError`. """ - if first_token == stop_token: - return _rule_error(first_token, 'Stop token') if first_token.type == 'at-keyword': return _consume_at_rule(first_token, tokens) - if first_token.type == '{} block': - prelude = [] - block = first_token - else: - prelude = [first_token] - for token in tokens: - if token == stop_token: - return _rule_error(token, 'Stop token') - if token.type == '{} block': - block = token - # TODO: handle special case for CSS variables (using "nested") - # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule - break - prelude.append(token) - else: - return _rule_error(prelude[-1], 'EOF') - return QualifiedRule(first_token.source_line, first_token.source_column, - prelude, block.content) + return _consume_qualified_rule(first_token, tokens) def _consume_at_rule(at_keyword, tokens): @@ -478,30 +482,46 @@ def _consume_at_rule(at_keyword, tokens): at_keyword.value, at_keyword.lower_value, prelude, content) -def _consume_at_rule_deprecated(at_keyword, tokens): - """Parse an at-rule. +def _rule_error(token, name): + """Create rule parse error raised because of given token.""" + return ParseError( + token.source_line, token.source_column, 'invalid', + f'{name} reached before {{}} block for a qualified rule.') + - Deprecated, use :func:`_consume_at_rule` instead. +def _consume_qualified_rule(first_token, tokens, nested=False, + stop_token=None): + """Consume a qualified rule. Consume just enough of :obj:`tokens` for this rule. - :type at_keyword: :class:`AtKeywordToken` - :param at_keyword: The at-rule keyword token starting this rule. + :type first_token: :term:`component value` + :param first_token: The first component value of the rule. :type tokens: :term:`iterator` :param tokens: An iterator yielding :term:`component values`. - :returns: - A :class:`~tinycss2.ast.QualifiedRule`, - or :class:`~tinycss2.ast.ParseError`. + :type nested: :obj:`bool` + :param nested: Whether the rule is nested or top-level. + :type stop_token: :class:`~tinycss2.ast.Node` + :param stop_token: A token that ends rule parsing when met. """ - prelude = [] - content = None - for token in tokens: - if token.type == '{} block': - content = token.content - break - elif token == ';': - break - prelude.append(token) - return AtRule(at_keyword.source_line, at_keyword.source_column, - at_keyword.value, at_keyword.lower_value, prelude, content) + if first_token == stop_token: + return _rule_error(first_token, 'Stop token') + if first_token.type == '{} block': + prelude = [] + block = first_token + else: + prelude = [first_token] + for token in tokens: + if token == stop_token: + return _rule_error(token, 'Stop token') + if token.type == '{} block': + block = token + # TODO: handle special case for CSS variables (using "nested") + # https://drafts.csswg.org/css-syntax-3/#consume-qualified-rule + break + prelude.append(token) + else: + return _rule_error(prelude[-1], 'EOF') + return QualifiedRule(first_token.source_line, first_token.source_column, + prelude, block.content)