Skip to content

Commit

Permalink
Fix lexing commands with multi-byte characters
Browse files Browse the repository at this point in the history
Add a workaround for maciejhirsz/logos#202.

See #857.
  • Loading branch information
pfoerster committed Mar 9, 2023
1 parent 9910e48 commit 51f7179
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Parse nested `\iffalse` blocks correctly ([#853](https://github.com/latex-lsp/texlab/issues/853))
- Parse commands with multi-byte characters correctly ([#857](https://github.com/latex-lsp/texlab/issues/857))

## [5.3.0] - 2023-02-25

Expand Down
32 changes: 30 additions & 2 deletions src/parser/latex/lexer/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub enum Token {
#[regex(r"[\r\n]+", priority = 2)]
LineBreak,

#[regex(r"\s+", priority = 1)]
#[regex(r"[^\S\r\n]+", priority = 1)]
Whitespace,

#[regex(r"%[^\r\n]*")]
Expand Down Expand Up @@ -42,10 +42,38 @@ pub enum Token {
#[regex(r"\$\$?")]
Dollar,

#[regex(r"\\([^\r\n]|[@a-zA-Z:_]+\*?)?", |_| CommandName::Generic)]
#[regex(r"\\", lex_command_name)]
CommandName(CommandName),
}

fn lex_command_name(lexer: &mut logos::Lexer<Token>) -> CommandName {
let input = &lexer.source()[lexer.span().end..];

let mut chars = input.chars();
let Some(c) = chars.next() else { return CommandName::Generic };
lexer.bump(c.len_utf8());
if !matches!(c, 'a'..='z'|'A'..='Z' | '@') {
return CommandName::Generic;
}

while let Some(c) = chars.next() {
match c {
'*' => {
lexer.bump(c.len_utf8());
break;
}
'a'..='z' | 'A'..='Z' | '@' | ':' | '_' => {
lexer.bump(c.len_utf8());
}
_ => {
break;
}
};
}

CommandName::Generic
}

#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum CommandName {
Generic,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ ROOT@0..50
KEY@7..10
WORD@7..10 "asy"
R_CURLY@10..11 "}"
WHITESPACE@11..16 "\n "
LINE_BREAK@11..12 "\n"
WHITESPACE@12..16 " "
TEXT@16..22
WORD@16..22 "printf"
MIXED_GROUP@22..39
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ ROOT@0..271
KEY@36..46
WORD@36..46 "tabularray"
R_CURLY@46..47 "}"
WHITESPACE@47..50 " \n\n"
WHITESPACE@47..48 " "
LINE_BREAK@48..50 "\n\n"
GENERIC_COMMAND@50..64
COMMAND_NAME@50..63 "\\ExplSyntaxOn"
LINE_BREAK@63..64 "\n"
Expand All @@ -41,16 +42,20 @@ ROOT@0..271
TEXT@100..101
WORD@100..101 "m"
R_CURLY@101..102 "}"
WHITESPACE@102..107 "\n "
LINE_BREAK@102..103 "\n"
WHITESPACE@103..107 " "
CURLY_GROUP@107..200
L_CURLY@107..108 "{"
WHITESPACE@108..113 "\n "
LINE_BREAK@108..109 "\n"
WHITESPACE@109..113 " "
GENERIC_COMMAND@113..194
COMMAND_NAME@113..119 "\\use:x"
WHITESPACE@119..124 "\n "
LINE_BREAK@119..120 "\n"
WHITESPACE@120..124 " "
CURLY_GROUP@124..194
L_CURLY@124..125 "{"
WHITESPACE@125..130 "\n "
LINE_BREAK@125..126 "\n"
WHITESPACE@126..130 " "
GENERIC_COMMAND@130..141
COMMAND_NAME@130..140 "\\exp_not:N"
WHITESPACE@140..141 " "
Expand All @@ -62,7 +67,8 @@ ROOT@0..271
KEY@148..152
WORD@148..152 "tblr"
R_CURLY@152..153 "}"
WHITESPACE@153..158 "\n "
LINE_BREAK@153..154 "\n"
WHITESPACE@154..158 " "
BRACK_GROUP@158..179
L_BRACK@158..159 "["
GENERIC_COMMAND@159..173
Expand All @@ -73,28 +79,34 @@ ROOT@0..271
WORD@170..172 "#1"
R_CURLY@172..173 "}"
R_BRACK@173..174 "]"
WHITESPACE@174..179 "\n "
LINE_BREAK@174..175 "\n"
WHITESPACE@175..179 " "
CURLY_GROUP@179..188
L_CURLY@179..180 "{"
TEXT@180..182
WORD@180..182 "#2"
R_CURLY@182..183 "}"
WHITESPACE@183..188 "\n "
LINE_BREAK@183..184 "\n"
WHITESPACE@184..188 " "
R_CURLY@188..189 "}"
WHITESPACE@189..194 "\n "
LINE_BREAK@189..190 "\n"
WHITESPACE@190..194 " "
R_CURLY@194..195 "}"
WHITESPACE@195..200 "\n "
LINE_BREAK@195..196 "\n"
WHITESPACE@196..200 " "
CURLY_GROUP@200..223
L_CURLY@200..201 "{"
WHITESPACE@201..206 "\n "
LINE_BREAK@201..202 "\n"
WHITESPACE@202..206 " "
GENERIC_COMMAND@206..221
COMMAND_NAME@206..210 "\\end"
CURLY_GROUP@210..221
L_CURLY@210..211 "{"
TEXT@211..215
WORD@211..215 "tblr"
R_CURLY@215..216 "}"
WHITESPACE@216..221 "\n "
LINE_BREAK@216..217 "\n"
WHITESPACE@217..221 " "
R_CURLY@221..222 "}"
LINE_BREAK@222..223 "\n"
GENERIC_COMMAND@223..239
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,19 @@ ROOT@0..135
LINE_BREAK@74..75 "\n"
BLOCK_COMMENT@75..120
COMMAND_NAME@75..83 "\\iffalse"
WHITESPACE@83..86 "\n "
LINE_BREAK@83..84 "\n"
WHITESPACE@84..86 " "
BLOCK_COMMENT@86..97
COMMAND_NAME@86..94 "\\iffalse"
COMMAND_NAME@94..97 "\\fi"
WHITESPACE@97..100 "\n "
LINE_BREAK@97..98 "\n"
WHITESPACE@98..100 " "
COMMAND_NAME@100..104 "\\end"
L_CURLY@104..105 "{"
WORD@105..114 "enumerate"
R_CURLY@114..115 "}"
WHITESPACE@115..117 " \n"
WHITESPACE@115..116 " "
LINE_BREAK@116..117 "\n"
COMMAND_NAME@117..120 "\\fi"
LINE_BREAK@120..121 "\n"
END@121..135
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
source: src/parser/latex.rs
expression: root
input_file: src/parser/test_data/latex/issue_857.txt
---
ROOT@0..16
PREAMBLE@0..16
COMMAND_DEFINITION@0..11
COMMAND_NAME@0..11 "\\newcommand"
GENERIC_COMMAND@11..16
COMMAND_NAME@11..14 "\\ö"
CURLY_GROUP@14..16
L_CURLY@14..15 "{"
R_CURLY@15..16 "}"

1 change: 1 addition & 0 deletions src/parser/test_data/latex/issue_857.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
\newcommand\ö{}

0 comments on commit 51f7179

Please sign in to comment.