From 9323ea5fe6d7054704a3708ce654186caecc683f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20=C5=A0melko?= Date: Wed, 9 Sep 2020 17:48:17 +0200 Subject: [PATCH] fix: Lexer infinite loop fix (#85) fixed is_space method in lexer. Before the change, it called std::isspace method. It caused that some characters could not be matched by any token (those that are matched by std::isspace but not by lex_tokens). Now these characters are matched in lex_word. --- parser_library/src/lexing/lexer.cpp | 34 +++++++------------ .../data_definition/data_definition_test.cpp | 2 +- .../test/debugging/debugger_test.cpp | 2 +- parser_library/test/lexing/lexer_test.cpp | 12 +++++++ parser_library/test/mock_parse_lib_provider.h | 2 +- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/parser_library/src/lexing/lexer.cpp b/parser_library/src/lexing/lexer.cpp index af5b0b636..84bbd562a 100644 --- a/parser_library/src/lexing/lexer.cpp +++ b/parser_library/src/lexing/lexer.cpp @@ -229,31 +229,21 @@ void lexer::consume() input_state_->char_position++; input_state_->c = static_cast(input_state_->input->LA(1)); - if (input_state_->c == '\t') + input_state_->char_position_in_line++; + if (input_state_->c == static_cast(-1)) { - input_state_->c = ' '; - // warning - input_state_->char_position_in_line += tab_size_; - input_state_->char_position_in_line_utf16 += tab_size_; + last_char_utf16_long_ = false; + input_state_->char_position_in_line_utf16 += 1; + } + else if (input_state_->c > 0xFFFF) + { + last_char_utf16_long_ = true; + input_state_->char_position_in_line_utf16 += 2; } else { - input_state_->char_position_in_line++; - if (input_state_->c == static_cast(-1)) - { - last_char_utf16_long_ = false; - input_state_->char_position_in_line_utf16 += 1; - } - else if (input_state_->c > 0xFFFF) - { - last_char_utf16_long_ = true; - input_state_->char_position_in_line_utf16 += 2; - } - else - { - last_char_utf16_long_ = false; - input_state_->char_position_in_line_utf16++; - } + last_char_utf16_long_ = false; + input_state_->char_position_in_line_utf16++; } } } @@ -601,7 +591,7 @@ bool lexer::ord_char(char_t c) bool lexer::is_ord_char() const { return ord_char(input_state_->c); } -bool lexer::is_space() const { return input_state_->c <= 255 && isspace(input_state_->c); } +bool lexer::is_space() const { return input_state_->c == ' ' || input_state_->c == '\n' || input_state_->c == '\r'; } bool lexer::is_data_attribute() const { diff --git a/parser_library/test/checking/data_definition/data_definition_test.cpp b/parser_library/test/checking/data_definition/data_definition_test.cpp index a4ac82fc3..3188d6a3b 100644 --- a/parser_library/test/checking/data_definition/data_definition_test.cpp +++ b/parser_library/test/checking/data_definition/data_definition_test.cpp @@ -59,7 +59,7 @@ TEST(data_definition_grammar, modifiers) DC (1*8)FDP(123)L3S(2*4)E12'2.25' DC (1*8)FDP(123)L1S30E(-12*2)'2.25' DC (1*8)FDP(123)L1S30E40'2.25' - + DC 10FDL(2*3)S(2*4)E(-12*2)'2.25' DC 10FDL2S(2*4)E(-12*2)'2.25' DC 10FDL(2*3)S6E(-12*2)'2.25' diff --git a/parser_library/test/debugging/debugger_test.cpp b/parser_library/test/debugging/debugger_test.cpp index 08d997170..7e8dd821a 100644 --- a/parser_library/test/debugging/debugger_test.cpp +++ b/parser_library/test/debugging/debugger_test.cpp @@ -443,7 +443,7 @@ B EQU A TEST(debugger, concurrent_next_and_file_change) { std::string open_code = R"( - LR 1,1 + LR 1,1 COPY COPY1 )"; std::string copy1_filename = "COPY1"; diff --git a/parser_library/test/lexing/lexer_test.cpp b/parser_library/test/lexing/lexer_test.cpp index 2adf0ea09..dd04dc52c 100644 --- a/parser_library/test/lexing/lexer_test.cpp +++ b/parser_library/test/lexing/lexer_test.cpp @@ -306,3 +306,15 @@ EOF ASSERT_EQ(token_string, out); } + +TEST(lexer_test, special_spaces) +{ + std::string in = "A\v\f\t LR"; + hlasm_plugin::parser_library::lexing::input_source input(in); + hlasm_plugin::parser_library::semantics::lsp_info_processor lsp_proc = { "rewind_input", "", nullptr, false }; + hlasm_plugin::parser_library::lexing::lexer l(&input, &lsp_proc); + + ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::IDENTIFIER); + ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::SPACE); + ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::ORDSYMBOL); +} diff --git a/parser_library/test/mock_parse_lib_provider.h b/parser_library/test/mock_parse_lib_provider.h index bbdc32fdb..b5810cbc1 100644 --- a/parser_library/test/mock_parse_lib_provider.h +++ b/parser_library/test/mock_parse_lib_provider.h @@ -51,7 +51,7 @@ class mock_parse_lib_provider : public workspaces::parse_lib_provider )"; const std::string copy_contents = R"(R2 EQU 2 - LR R2,R2)"; + LR R2,R2)"; }; } // namespace hlasm_plugin::parser_library \ No newline at end of file