From 472b42d09442d6b2b599671690d7b21b65e6b88e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 21 Feb 2022 17:02:34 -0500 Subject: [PATCH] Support Latin-1 supplement a0-ff as DSL string literals (#957) * Support Latin-1 supplement a0-ff as DSL string literals * build artifacts for previous commit --- internal/pkg/parsing/lexer/lexer.go | 15 ++++++++------- internal/pkg/parsing/lexer/transitiontable.go | 8 ++++++++ internal/pkg/parsing/mlr.bnf | 5 +++-- todo.txt | 1 + 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/internal/pkg/parsing/lexer/lexer.go b/internal/pkg/parsing/lexer/lexer.go index 7894220c4e..1cfd61a36b 100644 --- a/internal/pkg/parsing/lexer/lexer.go +++ b/internal/pkg/parsing/lexer/lexer.go @@ -12,7 +12,7 @@ import ( const ( NoState = -1 NumStates = 328 - NumSymbols = 579 + NumSymbols = 580 ) type Lexer struct { @@ -702,10 +702,11 @@ Lexer symbols: 570: 'A'-'Z' 571: 'a'-'z' 572: '0'-'9' -573: \u0100-\U0010ffff -574: 'A'-'Z' -575: 'a'-'z' -576: '0'-'9' -577: \u0100-\U0010ffff -578: . +573: \u00a0-\u00ff +574: \u0100-\U0010ffff +575: 'A'-'Z' +576: 'a'-'z' +577: '0'-'9' +578: \u0100-\U0010ffff +579: . */ diff --git a/internal/pkg/parsing/lexer/transitiontable.go b/internal/pkg/parsing/lexer/transitiontable.go index 08f2fd9302..8f0b772535 100644 --- a/internal/pkg/parsing/lexer/transitiontable.go +++ b/internal/pkg/parsing/lexer/transitiontable.go @@ -241,6 +241,8 @@ var TransTab = TransitionTable{ return 57 case r == 126: // ['~','~'] return 57 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 57 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 57 } @@ -1212,6 +1214,8 @@ var TransTab = TransitionTable{ return 57 case r == 126: // ['~','~'] return 57 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 57 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 57 } @@ -2980,6 +2984,8 @@ var TransTab = TransitionTable{ return 57 case r == 126: // ['~','~'] return 57 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 57 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 57 } @@ -4410,6 +4416,8 @@ var TransTab = TransitionTable{ return 57 case r == 126: // ['~','~'] return 57 + case 160 <= r && r <= 255: // [\u00a0,\u00ff] + return 57 case 256 <= r && r <= 1114111: // [\u0100,\U0010ffff] return 57 } diff --git a/internal/pkg/parsing/mlr.bnf b/internal/pkg/parsing/mlr.bnf index baee22c5ca..f2b32fe2ce 100644 --- a/internal/pkg/parsing/mlr.bnf +++ b/internal/pkg/parsing/mlr.bnf @@ -54,7 +54,7 @@ // CHARACTER CLASSES // ---------------------------------------------------------------- -_letter : 'a'-'z' | 'A'-'Z' | '\u00a0'-'\u00ff' | '\u0100'-'\U0010FFFF'; +_letter : 'a'-'z' | 'A'-'Z' | '\u00a0'-'\u00ff' | '\u0100'-'\U0010ffff'; _decdig : '0'-'9' ; _hexdig : '0'-'9' | 'a'-'f' | 'A'-'F'; _octdig : '0'-'7' ; @@ -95,7 +95,8 @@ _string_literal_element | ( '\\' '*' ) | ( '\\' '0' ) | ( '\\' '1' ) | ( '\\' '2' ) | ( '\\' '3' ) | ( '\\' '4' ) | ( '\\' '5' ) | ( '\\' '6' ) | ( '\\' '7' ) | ( '\\' '8' ) | ( '\\' '9' ) - | '\u0100'-'\U0010FFFF' + | '\u00a0'-'\u00ff' + | '\u0100'-'\U0010ffff' ; string_literal : '"' {_string_literal_element} '"' ; diff --git a/todo.txt b/todo.txt index a84c57b2a1..67fb7ba158 100644 --- a/todo.txt +++ b/todo.txt @@ -7,6 +7,7 @@ RELEASES https://github.com/johnkerl/miller/issues?q=is%3Aissue+is%3Aopen+label%3Aneeds-documentation o fmt/unfmt/regex doc o FAQ/examples reorg + ? ??? for doc-string contents search -- ? & mlr help namegoeshere foo ... k strptime/882 k fmtifnum, & recursive fmtnum/fmtifnum