From 327f6e0c2cd76f8733ed3630859e4245a37477cf Mon Sep 17 00:00:00 2001 From: Wliu <50Wliu@users.noreply.github.com> Date: Wed, 20 Sep 2017 11:54:00 +0200 Subject: [PATCH 1/4] Showcase off-by-one error in language-html --- spec/fixtures/css.cson | 2071 ++++++++++++++++++++++++++++ spec/fixtures/css.json | 629 --------- spec/fixtures/html-css-inline.cson | 78 ++ spec/grammar-spec.coffee | 29 +- 4 files changed, 2177 insertions(+), 630 deletions(-) create mode 100644 spec/fixtures/css.cson delete mode 100644 spec/fixtures/css.json create mode 100644 spec/fixtures/html-css-inline.cson diff --git a/spec/fixtures/css.cson b/spec/fixtures/css.cson new file mode 100644 index 0000000..679b888 --- /dev/null +++ b/spec/fixtures/css.cson @@ -0,0 +1,2071 @@ +'scopeName': 'source.css' +'name': 'CSS' +'fileTypes': [ + 'css' + 'css.erb' +] +'firstLineMatch': '''(?xi) + # Emacs modeline + -\\*-(?:\\s*(?=[^:;\\s]+\\s*-\\*-)|(?:.*?[;\\s]|(?<=-\\*-))mode\\s*:\\s*) + css + (?=[\\s;]|(?]?\\d+|m)?|\\sex)(?=:(?=\\s*set?\\s[^\\n:]+:)|:(?!\\s*set?\\s))(?:(?:\\s|\\s*:\\s*)\\w*(?:\\s*=(?:[^\\n\\\\\\s]|\\\\.)*)?)*[\\s:](?:filetype|ft|syntax)\\s*= + css + (?=\\s|:|$) +''' +'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#escapes' + } + { + 'include': '#combinators' + } + { + 'include': '#selector' + } + { + 'include': '#at-rules' + } + { + 'include': '#rule-list' + } +] +'repository': + 'at-rules': + 'patterns': [ + { + # @charset, with possible preceding BOM sequence + 'begin': '\\A(?:\\xEF\\xBB\\xBF)?(?i:(?=\\s*@charset\\b))' + 'end': ';|(?=$)' + 'endCaptures': + '0': + 'name': 'punctuation.terminator.rule.css' + 'name': 'meta.at-rule.charset.css' + 'patterns': [ + { + 'captures': + '1': + 'name': 'invalid.illegal.not-lowercase.charset.css' + '2': + 'name': 'invalid.illegal.leading-whitespace.charset.css' + '3': + 'name': 'invalid.illegal.no-whitespace.charset.css' + '4': + 'name': 'invalid.illegal.whitespace.charset.css' + '5': + 'name': 'invalid.illegal.not-double-quoted.charset.css' + '6': + 'name': 'invalid.illegal.unclosed-string.charset.css' + '7': + 'name': 'invalid.illegal.unexpected-characters.charset.css' + 'match': '''(?x) # Possible errors: + \\G + ((?!@charset)@\\w+) # Not lowercase (@charset is case-sensitive) + | + \\G(\\s+) # Preceding whitespace + | + (@charset\\S[^;]*) # No whitespace after @charset + | + (?<=@charset) # Before quoted charset name + (\\x20{2,}|\\t+) # More than one space used, or a tab + | + (?<=@charset\\x20) # Beginning of charset name + ([^";]+) # Not double-quoted + | + ("[^"]+$) # Unclosed quote + | + (?<=") # After charset name + ([^;]+) # Unexpected junk instead of semicolon + ''' + } + { + 'captures': + '1': + 'name': 'keyword.control.at-rule.charset.css' + '2': + 'name': 'punctuation.definition.keyword.css' + 'match': '((@)charset)(?=\\s)' + } + { + 'begin': '"' + 'beginCaptures': + '0': + 'name': 'punctuation.definition.string.begin.css' + 'end': '"|$' + 'endCaptures': + '0': + 'name': 'punctuation.definition.string.end.css' + 'name': 'string.quoted.double.css' + 'patterns': [ + { + 'begin': '(?:\\G|^)(?=(?:[^"])+$)' + 'end': '$' + 'name': 'invalid.illegal.unclosed.string.css' + } + ] + } + ] + } + { + # @import + 'begin': '(?i)((@)import)(?:\\s+|$|(?=[\'"]|/\\*))' + 'beginCaptures': + '1': + 'name': 'keyword.control.at-rule.import.css' + '2': + 'name': 'punctuation.definition.keyword.css' + 'end': ';' + 'endCaptures': + '0': + 'name': 'punctuation.terminator.rule.css' + 'name': 'meta.at-rule.import.css' + 'patterns': [ + { + 'begin': '\\G\\s*(?=/\\*)' + 'end': '(?<=\\*/)\\s*' + 'patterns': [ + { + 'include': '#comment-block' + } + ] + } + { + 'include': '#string' + } + { + 'include': '#url' + } + { + 'include': '#media-query-list' + } + ] + } + { + # @font-face + 'begin': '(?i)((@)font-face)(?=\\s*|{|/\\*|$)' + 'beginCaptures': + '1': + 'name': 'keyword.control.at-rule.font-face.css' + '2': + 'name': 'punctuation.definition.keyword.css' + 'end': '(?!\\G)' + 'name': 'meta.at-rule.font-face.css' + 'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#escapes' + } + { + 'include': '#rule-list' + } + ] + } + { + # @page + 'begin': '(?i)(@)page(?=[\\s:{]|/\\*|$)' + 'captures': + '0': + 'name': 'keyword.control.at-rule.page.css' + '1': + 'name': 'punctuation.definition.keyword.css' + 'end': '(?=\\s*($|[:{;]))' + 'name': 'meta.at-rule.page.css' + 'patterns': [ + { + 'include': '#rule-list' + } + ] + } + { + # @media + 'begin': '(?i)(?=@media(\\s|\\(|/\\*|$))' + 'end': '(?<=})(?!\\G)' + 'patterns': [ + { + 'begin': '(?i)\\G(@)media' + 'beginCaptures': + '0': + 'name': 'keyword.control.at-rule.media.css' + '1': + 'name': 'punctuation.definition.keyword.css' + 'end': '(?=\\s*[{;])' + 'name': 'meta.at-rule.media.header.css' + 'patterns': [ + { + 'include': '#media-query-list' + } + ] + } + { + 'begin': '{' + 'beginCaptures': + '0': + 'name': 'punctuation.section.media.begin.bracket.curly.css' + 'end': '}' + 'endCaptures': + '0': + 'name': 'punctuation.section.media.end.bracket.curly.css' + 'name': 'meta.at-rule.media.body.css' + 'patterns': [ + { + 'include': '$self' + } + ] + } + ] + } + { + # @counter-style + 'begin': '(?i)(?=@counter-style([\\s\'"{;]|/\\*|$))' + 'end': '(?<=})(?!\\G)' + 'patterns': [ + { + 'begin': '(?i)\\G(@)counter-style' + 'beginCaptures': + '0': + 'name': 'keyword.control.at-rule.counter-style.css' + '1': + 'name': 'punctuation.definition.keyword.css' + 'end': '(?=\\s*{)' + 'name': 'meta.at-rule.counter-style.header.css' + 'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#escapes' + } + { + 'captures': + '0': + 'patterns': [ + { + 'include': '#escapes' + } + ] + 'match': '''(?x) + (?:[-a-zA-Z_] | [^\\x00-\\x7F]) # First letter + (?:[-a-zA-Z0-9_] | [^\\x00-\\x7F] # Remainder of identifier + |\\\\(?:[0-9a-fA-F]{1,6}|.) + )* + ''' + 'name': 'variable.parameter.style-name.css' + } + ] + } + { + 'begin': '{' + 'beginCaptures': + '0': + 'name': 'punctuation.section.property-list.begin.bracket.curly.css' + 'end': '}' + 'endCaptures': + '0': + 'name': 'punctuation.section.property-list.end.bracket.curly.css' + 'name': 'meta.at-rule.counter-style.body.css' + 'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#escapes' + } + { + 'include': '#rule-list-innards' + } + ] + } + ] + } + { + # @document + 'begin': '(?i)(?=@document([\\s\'"{;]|/\\*|$))' + 'end': '(?<=})(?!\\G)' + 'patterns': [ + { + 'begin': '(?i)\\G(@)document' + 'beginCaptures': + '0': + 'name': 'keyword.control.at-rule.document.css' + '1': + 'name': 'punctuation.definition.keyword.css' + 'end': '(?=\\s*[{;])' + 'name': 'meta.at-rule.document.header.css' + 'patterns': [ + { + 'begin': '(?i)(?>>' + 'name': 'invalid.deprecated.combinator.css' + } + { + 'match': '>>|>|\\+|~' + 'name': 'keyword.operator.combinator.css' + } + ] + 'commas': + 'match': ',' + 'name': 'punctuation.separator.list.comma.css' + 'comment-block': + 'begin': '/\\*' + 'beginCaptures': + '0': + 'name': 'punctuation.definition.comment.begin.css' + 'end': '\\*/' + 'endCaptures': + '0': + 'name': 'punctuation.definition.comment.end.css' + 'name': 'comment.block.css' + 'escapes': + 'patterns': [ + { + 'match': '\\\\[0-9a-fA-F]{1,6}' + 'name': 'constant.character.escape.codepoint.css' + } + { + 'begin': '\\\\$\\s*' + 'end': '^(?<:=]|\\)|/\\*) # Terminates cleanly + ''' + 'media-feature-keywords': + 'match': '''(?xi) + (?<=^|\\s|:|\\*/) + (?: portrait # Orientation + | landscape + | progressive # Scan types + | interlace + | fullscreen # Display modes + | standalone + | minimal-ui + | browser + ) + (?=\\s|\\)|$) + ''' + 'name': 'support.constant.property-value.css' + 'media-query': + 'begin': '\\G' + 'end': '(?=\\s*[{;])' + 'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#escapes' + } + { + 'include': '#media-types' + } + { + 'match': '(?i)(?<=\\s|^|,|\\*/)(only|not)(?=\\s|{|/\\*|$)' + 'name': 'keyword.operator.logical.$1.media.css' + } + { + 'match': '(?i)(?<=\\s|^|\\*/|\\))and(?=\\s|/\\*|$)' + 'name': 'keyword.operator.logical.and.media.css' + } + { + 'match': ',(?:(?:\\s*,)+|(?=\\s*[;){]))' + 'name': 'invalid.illegal.comma.css' + } + { + 'include': '#commas' + } + { + 'begin': '\\(' + 'beginCaptures': + '0': + 'name': 'punctuation.definition.parameters.begin.bracket.round.css' + 'end': '\\)' + 'endCaptures': + '0': + 'name': 'punctuation.definition.parameters.end.bracket.round.css' + 'patterns': [ + { + 'include': '#media-features' + } + { + 'include': '#media-feature-keywords' + } + { + 'match': ':' + 'name': 'punctuation.separator.key-value.css' + } + { + 'match': '>=|<=|=|<|>' + 'name': 'keyword.operator.comparison.css' + } + { + 'captures': + '1': + 'name': 'constant.numeric.css' + '2': + 'name': 'keyword.operator.arithmetic.css' + '3': + 'name': 'constant.numeric.css' + 'match': '(\\d+)\\s*(/)\\s*(\\d+)' + 'name': 'meta.ratio.css' + } + { + 'include': '#numeric-values' + } + { + 'include': '#comment-block' + } + ] + } + ] + 'media-query-list': + 'begin': '(?=\\s*[^{;])' + 'end': '(?=\\s*[{;])' + 'patterns': [ + { + 'include': '#media-query' + } + ] + 'media-types': + 'captures': + '1': + 'name': 'support.constant.media.css' + '2': + 'name': 'invalid.deprecated.constant.media.css' + 'match': '''(?xi) + (?<=^|\\s|,|\\*/) + (?: + # Valid media types + (all|print|screen|speech) + | + # Deprecated in Media Queries 4: http://dev.w3.org/csswg/mediaqueries/#media-types + (aural|braille|embossed|handheld|projection|tty|tv) + ) + (?=$|[{,\\s;]|/\\*) + ''' + 'numeric-values': + 'patterns': [ + { + 'captures': + '1': + 'name': 'punctuation.definition.constant.css' + 'match': '(#)(?:[0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})\\b' + 'name': 'constant.other.color.rgb-value.hex.css' + } + { + 'captures': + '1': + 'name': 'keyword.other.unit.percentage.css' + '2': + 'name': 'keyword.other.unit.${2:/downcase}.css' + 'match': '''(?xi) (?+~|] # - Followed by another selector + | /\\* # - Followed by a block comment + ) + | + # Name contains unescaped ASCII symbol + (?: # Check for acceptable preceding characters + [-a-zA-Z_0-9]|[^\\x00-\\x7F] # - Valid selector character + | \\\\(?:[0-9a-fA-F]{1,6}|.) # - Escape sequence + )* + (?: # Invalid punctuation + [!"'%&(*;+~|] # - Another selector + | /\\* # - A block comment + ) + ''' + 'name': 'entity.other.attribute-name.class.css' + } + { + 'captures': + '1': + 'name': 'punctuation.definition.entity.css' + '2': + 'patterns': [ + { + 'include': '#escapes' + } + ] + 'match': '''(?x) + (\\#) + ( + -? + (?![0-9]) + (?:[-a-zA-Z0-9_]|[^\\x00-\\x7F]|\\\\(?:[0-9a-fA-F]{1,6}|.))+ + ) + (?=$|[\\s,.\\#)\\[:{>+~|]|/\\*) + ''' + 'name': 'entity.other.attribute-name.id.css' + } + { + 'begin': '\\[' + 'beginCaptures': + '0': + 'name': 'punctuation.definition.entity.begin.bracket.square.css' + 'end': '\\]' + 'endCaptures': + '0': + 'name': 'punctuation.definition.entity.end.bracket.square.css' + 'name': 'meta.attribute-selector.css' + 'patterns': [ + { + 'include': '#comment-block' + } + { + 'include': '#string' + } + { + 'captures': + '1': + 'name': 'storage.modifier.ignore-case.css' + 'match': '(?<=["\'\\s]|^|\\*/)\\s*([iI])\\s*(?=[\\s\\]]|/\\*|$)' + } + { + 'captures': + '1': + 'name': 'string.unquoted.attribute-value.css' + 'patterns': [ + { + 'include': '#escapes' + } + ] + 'match': '(?x)(?<==)\\s*((?!/\\*)(?:[^\\\\"\'\\s\\]]|\\\\.)+)' + } + { + 'include': '#escapes' + } + { + 'match': '[~|^$*]?=' + 'name': 'keyword.operator.pattern.css' + } + { + 'match': '\\|' + 'name': 'punctuation.separator.css' + } + { + 'captures': + '1': + 'name': 'entity.other.namespace-prefix.css' + 'patterns': [ + { + 'include': '#escapes' + } + ] + 'match': '''(?x) + # Qualified namespace prefix + ( -?(?!\\d)(?:[\\w-]|[^\\x00-\\x7F]|\\\\(?:[0-9a-fA-F]{1,6}|.))+ + | \\* + ) + # Lookahead to ensure there's a valid identifier ahead + (?= + \\| (?!\\s|=|$|\\]) + (?: -?(?!\\d) + | [\\\\\\w-] + | [^\\x00-\\x7F] + ) + ) + ''' + } + { + 'captures': + '1': + 'name': 'entity.other.attribute-name.css' + 'patterns': [ + { + 'include': '#escapes' + } + ] + 'match': '''(?x) + (-?(?!\\d)(?>[\\w-]|[^\\x00-\\x7F]|\\\\(?:[0-9a-fA-F]{1,6}|.))+) + \\s* + (?=[~|^\\]$*=]|/\\*) + ''' + } + ] + } + { + 'include': '#pseudo-classes' + } + { + 'include': '#pseudo-elements' + } + { + 'include': '#functional-pseudo-classes' + } + # Custom HTML elements + { + 'match': '''(?x) (?\\s,.\\#|){:\\[]|/\\*|$) + ''' + 'name': 'entity.name.tag.css' + 'unicode-range': + 'captures': + '0': + 'name': 'constant.other.unicode-range.css' + '1': + 'name': 'punctuation.separator.dash.unicode-range.css' + 'match': '(?(['\"])(?:[^\\\\]|\\\\.)*?(\\6)))))?\\s*(\\])", - "name": "meta.attribute-selector.css" - } - ] - }, - "string-double": { - "begin": "\"", - "beginCaptures": { - "0": { - "name": "punctuation.definition.string.begin.css" - } - }, - "end": "\"", - "endCaptures": { - "0": { - "name": "punctuation.definition.string.end.css" - } - }, - "name": "string.quoted.double.css", - "patterns": [ - { - "match": "\\\\.", - "name": "constant.character.escape.css" - } - ] - }, - "string-single": { - "begin": "'", - "beginCaptures": { - "0": { - "name": "punctuation.definition.string.begin.css" - } - }, - "end": "'", - "endCaptures": { - "0": { - "name": "punctuation.definition.string.end.css" - } - }, - "name": "string.quoted.single.css", - "patterns": [ - { - "match": "\\\\.", - "name": "constant.character.escape.css" - } - ] - } - }, - "scopeName": "source.css" -} diff --git a/spec/fixtures/html-css-inline.cson b/spec/fixtures/html-css-inline.cson new file mode 100644 index 0000000..0e8ae52 --- /dev/null +++ b/spec/fixtures/html-css-inline.cson @@ -0,0 +1,78 @@ +'scopeName': 'text.html.basic.css' +'name': 'HTML with CSS (test)' +'patterns': [ + { + 'begin': '())' + 'beginCaptures': + '1': + 'name': 'punctuation.definition.tag.begin.html' + '2': + 'name': 'entity.name.tag.inline.any.html' + 'end': '((?: ?/)?>)' + 'endCaptures': + '1': + 'name': 'punctuation.definition.tag.end.html' + 'name': 'meta.tag.inline.any.html' + 'patterns': [ + { + 'include': '#tag-style-attribute' + } + ] + } +] +'repository': + 'tag-style-attribute': + 'begin': '\\b(style)(=)' + 'beginCaptures': + '1': + 'name': 'entity.other.attribute-name.style.html' + '2': + 'name': 'punctuation.separator.key-value.html' + 'end': '(?=\\s|/?>)' + 'name': 'meta.attribute-with-value.style.html' + 'patterns': [ + { + 'match': '(")(.+)(")' + 'name': 'string.quoted.double.html' + 'captures': + '1': + 'name': 'punctuation.definition.string.begin.html' + '2': + 'name': 'source.css.style.html' + 'patterns': [ + { + 'include': 'source.css#rule-list-innards' + } + ] + '3': + 'name': 'punctuation.definition.string.end.html' + } + { + 'match': "(')(.+)(')" + 'name': 'string.quoted.single.html' + 'captures': + '1': + 'name': 'punctuation.definition.string.begin.html' + '2': + 'name': 'source.css.style.html' + 'patterns': [ + { + 'include': 'source.css#rule-list-innards' + } + ] + '3': + 'name': 'punctuation.definition.string.end.html' + } + { + 'match': '([^\\s&>"\'<=`]|&(?=>))+' + 'name': 'string.unquoted.html' + 'captures': + '0': + 'name': 'source.css.style.html' + 'patterns': [ + { + 'include': 'source.css#rule-list-innards' + } + ] + } + ] diff --git a/spec/grammar-spec.coffee b/spec/grammar-spec.coffee index 51ae11a..f5346b3 100644 --- a/spec/grammar-spec.coffee +++ b/spec/grammar-spec.coffee @@ -933,7 +933,7 @@ describe "Grammar tokenization", -> describe "HTML", -> describe "when it contains CSS", -> it "correctly parses the CSS rules", -> - loadGrammarSync("css.json") + loadGrammarSync("css.cson") grammar = registry.grammarForScopeName("text.html.basic") lines = grammar.tokenizeLines """ @@ -958,6 +958,33 @@ describe "Grammar tokenization", -> "support.constant.color.w3c-standard-color-name.css" ] + describe "when it contains inline CSS", -> + it "correctly stops parsing CSS", -> + loadGrammarSync('css.cson') + loadGrammarSync('html-css-inline.cson') + grammar = registry.grammarForScopeName('text.html.basic.css') + + {tokens} = grammar.tokenizeLine "" + expect(tokens[8]).toEqual value: "'", scopes: [ + 'text.html.basic.css' + 'meta.tag.inline.any.html' + 'meta.attribute-with-value.style.html' + 'string.quoted.single.html' + 'punctuation.definition.string.end.html' + ] + + expect(tokens[9]).toEqual value: ">", scopes: [ + 'text.html.basic.css' + 'meta.tag.inline.any.html' + 'punctuation.definition.tag.end.html' + ] + + expect(tokens[10]).toEqual value: "<", scopes: [ + 'text.html.basic.css' + 'meta.tag.inline.any.html' + 'punctuation.definition.tag.begin.html' + ] + describe "Latex", -> it "properly emits close tags for scope names containing back-references", -> loadGrammarSync("latex.cson") From 4f864a2dca018fbf72f4c1a19734bb2aa9011f96 Mon Sep 17 00:00:00 2001 From: Wliu <50Wliu@users.noreply.github.com> Date: Wed, 20 Sep 2017 22:27:24 +0200 Subject: [PATCH 2/4] Fix test to actually pass when it should --- spec/grammar-spec.coffee | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/grammar-spec.coffee b/spec/grammar-spec.coffee index f5346b3..9ac3189 100644 --- a/spec/grammar-spec.coffee +++ b/spec/grammar-spec.coffee @@ -979,7 +979,7 @@ describe "Grammar tokenization", -> 'punctuation.definition.tag.end.html' ] - expect(tokens[10]).toEqual value: "<", scopes: [ + expect(tokens[10]).toEqual value: " Date: Thu, 21 Sep 2017 02:38:28 +0300 Subject: [PATCH 3/4] Fix insertion of newline characters to the end of lines --- benchmark/benchmark.coffee | 2 +- spec/fixtures/captures-patterns.cson | 35 ++++++++++++++++++++++++++++ spec/grammar-spec.coffee | 20 +++++++++++++--- src/grammar.coffee | 12 ++++++---- src/pattern.coffee | 2 +- src/rule.coffee | 4 ++-- 6 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 spec/fixtures/captures-patterns.cson diff --git a/benchmark/benchmark.coffee b/benchmark/benchmark.coffee index f0b8d88..e14f183 100644 --- a/benchmark/benchmark.coffee +++ b/benchmark/benchmark.coffee @@ -5,7 +5,7 @@ GrammarRegistry = require '../src/grammar-registry' registry = new GrammarRegistry() jsGrammar = registry.loadGrammarSync(path.resolve(__dirname, '..', 'spec', 'fixtures', 'javascript.json')) jsGrammar.maxTokensPerLine = Infinity -cssGrammar = registry.loadGrammarSync(path.resolve(__dirname, '..', 'spec', 'fixtures', 'css.json')) +cssGrammar = registry.loadGrammarSync(path.resolve(__dirname, '..', 'spec', 'fixtures', 'css.cson')) cssGrammar.maxTokensPerLine = Infinity tokenize = (grammar, content, lineCount) -> diff --git a/spec/fixtures/captures-patterns.cson b/spec/fixtures/captures-patterns.cson new file mode 100644 index 0000000..9a1ab84 --- /dev/null +++ b/spec/fixtures/captures-patterns.cson @@ -0,0 +1,35 @@ +'scopeName': 'abcabx' +'patterns': [ + { + 'match': 'a(.+)c' + 'captures': + '1': + 'patterns':[ + { + 'match': '[^c]+' + 'name': 'b' + } + ] + 'name': 'abc' + }, + { + 'match': 'a(.+)x' + 'captures': + '1': + 'patterns':[ + { + 'match': '[^x]+' + 'name': 'up-to-x-outer' + 'captures': + '0': + 'patterns':[ + { + 'match': '.+' + 'name': 'up-to-x-inner' + } + ] + } + ] + 'name': 'abx' + } +] diff --git a/spec/grammar-spec.coffee b/spec/grammar-spec.coffee index 9ac3189..a7f20a4 100644 --- a/spec/grammar-spec.coffee +++ b/spec/grammar-spec.coffee @@ -204,11 +204,9 @@ describe "Grammar tokenization", -> expect(lines[1][0].value).toEqual "test" expect(lines[1][0].scopes).toEqual ["source.test", "pre", "nested"] - expect(lines[2].length).toBe 2 + expect(lines[2].length).toBe 1 expect(lines[2][0].value).toEqual "#endif" expect(lines[2][0].scopes).toEqual ["source.test", "pre"] - expect(lines[2][1].value).toEqual "" - expect(lines[2][1].scopes).toEqual ["source.test", "all"] {line, tags} = grammar.tokenizeLine "test" tokens = registry.decodeTokens(line, tags) @@ -467,6 +465,22 @@ describe "Grammar tokenization", -> {line, tags, ruleStack} = grammar.tokenizeLine("// line comment") {line, tags, ruleStack} = grammar.tokenizeLine(" // second line comment with a single leading space", ruleStack) + it "can parse a grammar that captures the same text multiple times (regression)", -> + grammar = loadGrammarSync('captures-patterns.cson') + lines = grammar.tokenizeLines('abc') + expect(lines.length).toBe 1 + expect(lines[0].length).toBe 3 + expect(lines[0][0]).toEqual value: 'a', scopes: ['abcabx', 'abc'] + expect(lines[0][1]).toEqual value: 'b', scopes: ['abcabx', 'abc', 'b'] + expect(lines[0][2]).toEqual value: 'c', scopes: ['abcabx', 'abc'] + + lines = grammar.tokenizeLines('abx') + expect(lines.length).toBe 1 + expect(lines[0].length).toBe 3 + expect(lines[0][0]).toEqual value: 'a', scopes: ['abcabx', 'abx'] + expect(lines[0][1]).toEqual value: 'b', scopes: ['abcabx', 'abx', 'up-to-x-outer', 'up-to-x-inner'] + expect(lines[0][2]).toEqual value: 'x', scopes: ['abcabx', 'abx'] + describe "when inside a C block", -> beforeEach -> loadGrammarSync('c.json') diff --git a/src/grammar.coffee b/src/grammar.coffee index 82fd12c..68edc3a 100644 --- a/src/grammar.coffee +++ b/src/grammar.coffee @@ -70,11 +70,12 @@ class Grammar # Returns an {Array} of token arrays for each line tokenized. tokenizeLines: (text, compatibilityMode=true) -> lines = text.split('\n') + lastLine = lines.length - 1 ruleStack = null scopes = [] for line, lineNumber in lines - {tags, ruleStack} = @tokenizeLine(line, ruleStack, lineNumber is 0, compatibilityMode) + {tags, ruleStack} = @tokenizeLine(line, ruleStack, lineNumber is 0, compatibilityMode, lineNumber isnt lastLine) @registry.decodeTokens(line, tags, scopes) # Public: Tokenize the line of text. @@ -98,7 +99,7 @@ class Grammar # * `ruleStack` An {Array} of rules representing the tokenized state at the # end of the line. These should be passed back into this method when # tokenizing the next line in the file. - tokenizeLine: (inputLine, ruleStack, firstLine=false, compatibilityMode=true) -> + tokenizeLine: (inputLine, ruleStack, firstLine=false, compatibilityMode=true, withNewLine=true) -> tags = [] truncatedLine = false @@ -108,7 +109,8 @@ class Grammar else line = inputLine - string = new OnigString(line + '\n') + string = new OnigString(line) + stringWithNewLine = if withNewLine then new OnigString(line + '\n') else string if ruleStack? ruleStack = ruleStack.slice() @@ -133,13 +135,13 @@ class Grammar previousRuleStackLength = ruleStack.length previousPosition = position - break if position is line.length + 1 # include trailing newline position + break if position is stringWithNewLine.length if tokenCount >= @getMaxTokensPerLine() - 1 truncatedLine = true break - if match = _.last(ruleStack).rule.getNextTags(ruleStack, string, position, firstLine) + if match = _.last(ruleStack).rule.getNextTags(ruleStack, string, stringWithNewLine, position, firstLine) {nextTags, tagsStart, tagsEnd} = match # Unmatched text before next tags diff --git a/src/pattern.coffee b/src/pattern.coffee index 2aff1d4..30a2c91 100644 --- a/src/pattern.coffee +++ b/src/pattern.coffee @@ -171,7 +171,7 @@ class Pattern tagsForCaptureRule: (rule, line, captureStart, captureEnd, stack) -> captureText = line.substring(captureStart, captureEnd) - {tags} = rule.grammar.tokenizeLine(captureText, [stack..., {rule}]) + {tags} = rule.grammar.tokenizeLine(captureText, [stack..., {rule}], false, true, false) # only accept non empty tokens that don't exceed the capture end openScopes = [] diff --git a/src/rule.coffee b/src/rule.coffee index 3301060..84d7528 100644 --- a/src/rule.coffee +++ b/src/rule.coffee @@ -96,8 +96,8 @@ class Rule @normalizeCaptureIndices(lineWithNewline, result.captureIndices) result - getNextTags: (ruleStack, line, position, firstLine) -> - result = @findNextMatch(ruleStack, line, position, firstLine) + getNextTags: (ruleStack, line, lineWithNewline, position, firstLine) -> + result = @findNextMatch(ruleStack, lineWithNewline, position, firstLine) return null unless result? {index, captureIndices, scanner} = result From facfc3d3b07c980b0fadd6f504d8481b0657f2dd Mon Sep 17 00:00:00 2001 From: Indrek Ardel Date: Fri, 22 Sep 2017 02:13:42 +0300 Subject: [PATCH 4/4] Rename withNewLine to appendNewLine in tokenizeLine --- src/grammar.coffee | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grammar.coffee b/src/grammar.coffee index 68edc3a..a45d3d8 100644 --- a/src/grammar.coffee +++ b/src/grammar.coffee @@ -99,7 +99,7 @@ class Grammar # * `ruleStack` An {Array} of rules representing the tokenized state at the # end of the line. These should be passed back into this method when # tokenizing the next line in the file. - tokenizeLine: (inputLine, ruleStack, firstLine=false, compatibilityMode=true, withNewLine=true) -> + tokenizeLine: (inputLine, ruleStack, firstLine=false, compatibilityMode=true, appendNewLine=true) -> tags = [] truncatedLine = false @@ -110,7 +110,7 @@ class Grammar line = inputLine string = new OnigString(line) - stringWithNewLine = if withNewLine then new OnigString(line + '\n') else string + stringWithNewLine = if appendNewLine then new OnigString(line + '\n') else string if ruleStack? ruleStack = ruleStack.slice()