Skip to content

Commit

Permalink
fix: Lexer infinite loop fix (eclipse-che4z#85)
Browse files Browse the repository at this point in the history
fixed is_space method in lexer. Before the change, it called std::isspace method. It caused that some characters could not be matched by any token (those that are matched by std::isspace but not by lex_tokens). Now these characters are matched in lex_word.
  • Loading branch information
asmelko authored Sep 9, 2020
1 parent a8ab49c commit 9323ea5
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 25 deletions.
34 changes: 12 additions & 22 deletions parser_library/src/lexing/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,31 +229,21 @@ void lexer::consume()
input_state_->char_position++;
input_state_->c = static_cast<char_t>(input_state_->input->LA(1));

if (input_state_->c == '\t')
input_state_->char_position_in_line++;
if (input_state_->c == static_cast<char32_t>(-1))
{
input_state_->c = ' ';
// warning
input_state_->char_position_in_line += tab_size_;
input_state_->char_position_in_line_utf16 += tab_size_;
last_char_utf16_long_ = false;
input_state_->char_position_in_line_utf16 += 1;
}
else if (input_state_->c > 0xFFFF)
{
last_char_utf16_long_ = true;
input_state_->char_position_in_line_utf16 += 2;
}
else
{
input_state_->char_position_in_line++;
if (input_state_->c == static_cast<char32_t>(-1))
{
last_char_utf16_long_ = false;
input_state_->char_position_in_line_utf16 += 1;
}
else if (input_state_->c > 0xFFFF)
{
last_char_utf16_long_ = true;
input_state_->char_position_in_line_utf16 += 2;
}
else
{
last_char_utf16_long_ = false;
input_state_->char_position_in_line_utf16++;
}
last_char_utf16_long_ = false;
input_state_->char_position_in_line_utf16++;
}
}
}
Expand Down Expand Up @@ -601,7 +591,7 @@ bool lexer::ord_char(char_t c)

bool lexer::is_ord_char() const { return ord_char(input_state_->c); }

bool lexer::is_space() const { return input_state_->c <= 255 && isspace(input_state_->c); }
bool lexer::is_space() const { return input_state_->c == ' ' || input_state_->c == '\n' || input_state_->c == '\r'; }

bool lexer::is_data_attribute() const
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ TEST(data_definition_grammar, modifiers)
DC (1*8)FDP(123)L3S(2*4)E12'2.25'
DC (1*8)FDP(123)L1S30E(-12*2)'2.25'
DC (1*8)FDP(123)L1S30E40'2.25'
DC 10FDL(2*3)S(2*4)E(-12*2)'2.25'
DC 10FDL2S(2*4)E(-12*2)'2.25'
DC 10FDL(2*3)S6E(-12*2)'2.25'
Expand Down
2 changes: 1 addition & 1 deletion parser_library/test/debugging/debugger_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ B EQU A
TEST(debugger, concurrent_next_and_file_change)
{
std::string open_code = R"(
LR 1,1
LR 1,1
COPY COPY1
)";
std::string copy1_filename = "COPY1";
Expand Down
12 changes: 12 additions & 0 deletions parser_library/test/lexing/lexer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,15 @@ EOF

ASSERT_EQ(token_string, out);
}

TEST(lexer_test, special_spaces)
{
std::string in = "A\v\f\t LR";
hlasm_plugin::parser_library::lexing::input_source input(in);
hlasm_plugin::parser_library::semantics::lsp_info_processor lsp_proc = { "rewind_input", "", nullptr, false };
hlasm_plugin::parser_library::lexing::lexer l(&input, &lsp_proc);

ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::IDENTIFIER);
ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::SPACE);
ASSERT_EQ(l.nextToken()->getType(), hlasm_plugin::parser_library::lexing::lexer::ORDSYMBOL);
}
2 changes: 1 addition & 1 deletion parser_library/test/mock_parse_lib_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class mock_parse_lib_provider : public workspaces::parse_lib_provider
)";
const std::string copy_contents =
R"(R2 EQU 2
LR R2,R2)";
LR R2,R2)";
};

} // namespace hlasm_plugin::parser_library

0 comments on commit 9323ea5

Please sign in to comment.