From f48d1bd351c28c3c27973b6c501d399f1f154349 Mon Sep 17 00:00:00 2001 From: RedCMD Date: Sun, 19 Nov 2023 15:15:10 +1300 Subject: [PATCH] Fix character class range bug and improve `\\x{}`&`\\o{}` code points --- syntaxes/regex.tmLanguage.json | 246 +++++++++++++++++++++++---------- 1 file changed, 172 insertions(+), 74 deletions(-) diff --git a/syntaxes/regex.tmLanguage.json b/syntaxes/regex.tmLanguage.json index 92b50d0..e35eb26 100644 --- a/syntaxes/regex.tmLanguage.json +++ b/syntaxes/regex.tmLanguage.json @@ -10,7 +10,7 @@ { "include": "#anchor" }, { "include": "#subroutine" }, { "include": "#backreference" }, - { "include": "#unicode" }, + { "include": "#code-point" }, { "include": "#alternation" }, { "include": "#quantifier" }, { "include": "#character-class" }, @@ -713,7 +713,7 @@ "contentName": "character-class", "patterns": [ { "include": "#character-class-range" }, - { "include": "#unicode" }, + { "include": "#character-class-code-point" }, { "include": "#character-class-escape" }, { "include": "#character-class-posix" }, { "include": "#character-class" }, @@ -798,7 +798,7 @@ "name": "constant.character.escape.tm" }, { - "match": "\\\\.?|[\"\\x0-\\x1F\\x7F]", + "match": "\\\\.?|[\"\\x-\\x1F\\x7F]", "name": "invalid.illegal.tm markup.underline regex" } ] @@ -806,7 +806,7 @@ "character-class-range": { "patterns": [ { - "match": "([\\\\-\\x{FFFFFFFF}]-(?!&&)[\\x0-Z]|[+-\\x{FFFFFFFF}]-(?!&&)[\\x0-*])(\\\\{2}(?=-))?", + "match": "([\\\\-\\x{FFFFFFFF}]-(?!&&)[\\x-Z]|[+-\\x{FFFFFFFF}]-(?!&&)[\\x-*])(\\\\{2}(?=-))?", "captures": { "1": { "name": "invalid.illegal.tm markup.underline regex" }, "2": { "name": "punctuation.definition.tag.tm" } @@ -853,19 +853,34 @@ } }, { - "match": "((?>(\\\\{3})\\\\|((\\\\{2}(?>c|[CM]-)(?:\\g<4>|\\\\u.{,4}|\\\\{,3}.))|\\\\{2}(?>x{\\h[^}]*}|o{[0-7][^}]*}|x\\h{,2}|[0-7]{1,3})|\\\\{1,2}u.{,4})|\\G(\\\\{2})?[]-]|\\G(?(\\\\{2})-|\\g<1>)(\\\\{2}(?=-))?", + "match": "((\\\\{3})\\\\|((?\\\\{2}(?>c|[CM]-)(?:\\g|\\\\u.{,4}|\\\\{,3}.))|\\\\{2}(?>x{\\h[^}]*}|o{[0-7][^}]*}|x\\h{,2}|[0-7]{1,3})|\\\\{1,2}u.{1,4})|\\G(\\\\{2})?[]-]|\\G(?(\\\\{2})-|(?>(\\\\{3})\\\\|((?\\\\{2}(?>c|[CM]-)(?:\\g|\\\\u.{,4}|\\\\{,3}.))|\\\\{2}(?>x{\\h[^}]*}|o{[0-7][^}]*}|x\\h{,2}|[0-7]{1,3})|\\\\{1,2}u.{,4})|\\G(\\\\{2})?[]-]|\\G(?[^]\\x0-\\x1F\"&:\\[\\\\\\x7F-]+(?!-))?", "name": "markup.italic regex" }, - "unicode": { - "comment": "\\777 \\xFF \\cZ \\o{0 1777777 17777777777} \\x{0 13FFF 7FFFFFFF} \\u0000 \\p{L} \\C-\\M-]", + "character-class-range-code-point": { + "comment": "\\o{37777777777 0 1777777} \\x{FFFFFFFF 0 1FFFFF}", + "patterns": [ + { + "begin": "(\\\\{2}o{)(0{,10}+)(3?[0-7]{1,10})(?\\\\\\?\\@\\A\\B\\C-]\\D\\E\\F\\G\\H\\I\\J\\K\\L\\M-a\\N\\O\\P{^L}\\Q\\R\\S\\T\\U\\V\\W\\X\\Y\\Z\\[\\\\\\]\\^\\_\\`\\a\\b\\c]\\d\\e\\f\\g<0>\\h\\i\\j\\k'1'\\l\\m\\n\\o\\p{l}\\q\\r\\s\\t\\u0000\\v\\w\\x\\y\\z\\{\\|\\}\\~ " }, { "match": "(*FAIL) (?x){2,3}{2,3}({2,3}(?:{2,3})){2,3}{,3}{2,}{2} {000} *{0} {0}* *{0}* {0}{0}{0} {0}{2,3} {2,3}{0}{2,3} {2,3\\} " }, { "match": "(*FAIL) () (?<= (?= (?> (?=)? ) )? ) (?= (?= )? ) " }, - { "match": "(*FAIL) \\x00\\x{00000000 0013FFFF} \\o{00000000000 00004777777} [\\x{42 48}-\\x{FFFFFFFF 45} \\o{102 110}-\\o{37777777777 105}] " }, - { "match": "(*FAIL) [a-z&&]u°𐐷Ꚛ�𐀀𐃘[�𐃘]®×ñÿ¡¼÷€Çô│╨ε■ ‚ Ȁ°Ççë£ ÿ[[:^upper:]] " }, + { "match": "(*FAIL) \\x \\x00 \\xFF \\x{} \\x{k} \\x\\{123} \\x{00000000 0013FFFF} [\\x{42 001FFFFF 48}-\\x{FFFFFFFF 45}] " }, + { "match": "(*FAIL) \\o{} \\o{k} \\o\\{123} \\o{00000000000 00004777777} [\\o{102 00004777777 110}-\\o{37777777777 105}] " }, + { "match": "(*FAIL) [a-z&&]u°𐐷Ꚛ�𐀀𐃘[�𐃘]®×ñÿ¡¼÷€Çô│╨ε■ ‚‚ Ȁ°Ççë£ ÿ[[:^upper:]] " }, { "match": "(*FAIL) \\g<99> \\x555 \\x{201E} \\p{L} \\p{-L etterLaaLaa--LL--LL--} " }, { "match": "(*FAIL) (?~a) (?{foo}) " }, { "match": "(*FAIL) QabcE $ \\$ an{name}bc At aT aa abc abd cd a1 b a1b n1000 Q.E q.e test ab$abc " },