Skip to content

Commit

Permalink
fix: Incorrect attribute parsing in CA expressions without spaces bet…
Browse files Browse the repository at this point in the history
…ween individual terms

(closes #263)
  • Loading branch information
slavek-kucera authored May 19, 2022
1 parent e2fa7d8 commit 821fae0
Show file tree
Hide file tree
Showing 28 changed files with 380 additions and 243 deletions.
1 change: 1 addition & 0 deletions clients/vscode-hlasmplugin/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#### Fixed
- Incorrect attribute values generated when literals are substituted in CA expressions
- AINSERT grammar tests improved
- Incorrect attribute parsing in CA expressions without spaces between individual terms

## [1.2.0](https://github.com/eclipse/che-che4z-lsp-for-hlasm/compare/1.1.0...1.2.0) (2022-05-11)

Expand Down
3 changes: 3 additions & 0 deletions parser_library/src/analyzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "analyzer.h"

#include "hlasmparser.h"
#include "parsing/error_strategy.h"
#include "processing/preprocessor.h"

Expand Down Expand Up @@ -101,6 +102,8 @@ context::hlasm_context& analyzer::hlasm_ctx() { return *ctx_.hlasm_ctx; }

parsing::hlasmparser& analyzer::parser() { return mngr_.opencode_parser(); }

size_t analyzer::debug_syntax_errors() { return mngr_.opencode_parser().getNumberOfSyntaxErrors(); }

const semantics::source_info_processor& analyzer::source_processor() const { return src_proc_; }

void analyzer::analyze(std::atomic<bool>* cancel)
Expand Down
6 changes: 5 additions & 1 deletion parser_library/src/analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include "analyzing_context.h"
#include "context/hlasm_context.h"
#include "diagnosable_ctx.h"
#include "hlasmparser.h"
#include "lexing/token_stream.h"
#include "lsp/lsp_context.h"
#include "parsing/parser_error_listener.h"
Expand All @@ -30,6 +29,10 @@
#include "virtual_file_monitor.h"
#include "workspaces/parse_lib_provider.h"

namespace hlasm_plugin::parser_library::parsing {
class hlasmparser;
} // namespace hlasm_plugin::parser_library::parsing

namespace hlasm_plugin::parser_library {

enum class collect_highlighting_info : bool
Expand Down Expand Up @@ -139,6 +142,7 @@ class analyzer : public diagnosable_ctx
void register_stmt_analyzer(processing::statement_analyzer* stmt_analyzer);

parsing::hlasmparser& parser(); // for testing only
size_t debug_syntax_errors(); // for testing only
};

} // namespace hlasm_plugin::parser_library
Expand Down
15 changes: 9 additions & 6 deletions parser_library/src/lexing/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,12 +460,10 @@ bool lexer::is_ord_char() const { return ord_char(input_state_->c); }

bool lexer::is_space() const { return input_state_->c == ' ' || input_state_->c == '\n' || input_state_->c == '\r'; }

bool lexer::is_consuming_data_attribute() const
bool is_data_attribute(char_t c)
{
// Although there are more data attributes (N, K, D), only these 5 consume the apostrophe right away, so that it
// cannot denote the beginning of string
auto tmp = std::toupper(input_state_->c);
return tmp == 'O' || tmp == 'S' || tmp == 'I' || tmp == 'L' || tmp == 'T';
auto tmp = std::toupper(c);
return tmp == 'O' || tmp == 'S' || tmp == 'I' || tmp == 'L' || tmp == 'T' || tmp == 'N' || tmp == 'K' || tmp == 'D';
}

void lexer::lex_word()
Expand All @@ -475,14 +473,18 @@ void lexer::lex_word()
bool num = (input_state_->c >= '0' && input_state_->c <= '9');
size_t last_part_ord_len = 0;
size_t w_len = 0;
bool last_ord = true;
while (!is_space() && !eof() && !identifier_divider() && before_end())
{
bool curr_ord = is_ord_char();
if (!last_ord && curr_ord)
break;

last_part_ord_len = curr_ord ? last_part_ord_len + 1 : 0;
ord &= curr_ord;

num &= (input_state_->c >= '0' && input_state_->c <= '9');
last_char_data_attr = is_consuming_data_attribute();
last_char_data_attr = is_data_attribute(input_state_->c) && w_len == 0;

if (creating_var_symbol_ && !ord && w_len > 0 && w_len <= 63)
{
Expand All @@ -492,6 +494,7 @@ void lexer::lex_word()

consume();
++w_len;
last_ord = curr_ord;
}

bool var_sym_tmp = creating_var_symbol_;
Expand Down
1 change: 0 additions & 1 deletion parser_library/src/lexing/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ class lexer final : public antlr4::TokenSource
// is next input char an ord char?
bool is_ord_char() const;
bool is_space() const;
bool is_consuming_data_attribute() const;
void set_unlimited_line(bool unlimited_lines);
// set lexer's input state to file position
void set_file_offset(position file_offset, bool process_allowed = false);
Expand Down
2 changes: 1 addition & 1 deletion parser_library/src/parsing/grammar/ca_expr_rules.g4
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ var_symbol returns [vs_ptr vs]
);

data_attribute returns [context::data_attr_kind attribute, std::variant<context::id_index, semantics::vs_ptr, semantics::literal_si> value, range value_range]
: ORDSYMBOL (attr|apostrophe_as_attr) data_attribute_value
: ORDSYMBOL attr data_attribute_value
{
collector.add_hl_symbol(token_info(provider.get_range($ORDSYMBOL), hl_scopes::data_attr_type));
$attribute = get_attribute($ORDSYMBOL->getText());
Expand Down
29 changes: 28 additions & 1 deletion parser_library/src/parsing/grammar/deferred_operand_rules.g4
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,34 @@ deferred_entry returns [std::vector<vs_ptr> vs]
| dot
| lpar
| rpar
| attr
| ap1=ATTR
{disable_ca_string();}
(
(
(APOSTROPHE|ATTR) (APOSTROPHE|ATTR)
|
(
{std::string name;}
AMPERSAND
ORDSYMBOL {name += $ORDSYMBOL->getText();}
(ORDSYMBOL {name += $ORDSYMBOL->getText();}|NUM {name += $NUM->getText();})*
{
auto r = provider.get_range($AMPERSAND,_input->LT(-1));
$vs.push_back(std::make_unique<basic_variable_symbol>(hlasm_ctx->ids().add(std::move(name)), std::vector<ca_expr_ptr>(), r));
collector.add_hl_symbol(token_info(r,hl_scopes::var_symbol));
}
)
|
l_sp_ch_v
)*
ap2=(APOSTROPHE|ATTR)
{
collector.add_hl_symbol(token_info(provider.get_range($ap1,$ap2),hl_scopes::string));
}
|
{collector.add_hl_symbol(token_info(provider.get_range($ap1),hl_scopes::operator_symbol)); }
)
{enable_ca_string();}
|
ap1=APOSTROPHE
{disable_ca_string();}
Expand Down
2 changes: 0 additions & 2 deletions parser_library/src/parsing/grammar/hlasmparser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,6 @@ apostrophe
: APOSTROPHE;
attr
: ATTR {collector.add_hl_symbol(token_info(provider.get_range( $ATTR),hl_scopes::operator_symbol)); };
apostrophe_as_attr
: APOSTROPHE {collector.add_hl_symbol(token_info(provider.get_range($APOSTROPHE),hl_scopes::operator_symbol)); };
lpar
: LPAR { collector.add_hl_symbol(token_info(provider.get_range( $LPAR),hl_scopes::operator_symbol)); };
rpar
Expand Down
67 changes: 35 additions & 32 deletions parser_library/src/parsing/grammar/machine_expr_rules.g4
Original file line number Diff line number Diff line change
Expand Up @@ -68,37 +68,40 @@ mach_term returns [mach_expr_ptr m_e]
{
$m_e = std::make_unique<mach_expr_location_counter>( provider.get_range( $mach_location_counter.ctx));
}
| {is_data_attr()}? mach_data_attribute
{
auto rng = provider.get_range($mach_data_attribute.ctx);
auto attr = $mach_data_attribute.attribute;
if(attr == data_attr_kind::UNKNOWN)
$m_e = std::make_unique<mach_expr_default>(rng);
else
$m_e = std::visit([rng, attr, symbol_rng = $mach_data_attribute.symbol_rng](auto& arg) -> mach_expr_ptr {
if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,std::monostate>) {
return std::make_unique<mach_expr_default>(rng);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,int>) {
return std::make_unique<mach_expr_constant>(arg, symbol_rng);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,std::unique_ptr<mach_expr_literal>>) {
return std::make_unique<mach_expr_data_attr_literal>(std::move(arg), attr, rng, symbol_rng);
}
else {
return std::make_unique<mach_expr_data_attr>(arg.first, arg.second, attr, rng, symbol_rng);
}
}, $mach_data_attribute.data);
}
| self_def_term
{
$m_e = std::make_unique<mach_expr_constant>($self_def_term.value, provider.get_range( $self_def_term.ctx));
}
| id
{
collector.add_hl_symbol(token_info(provider.get_range( $id.ctx),hl_scopes::ordinary_symbol));
$m_e = std::make_unique<mach_expr_symbol>($id.name, $id.using_qualifier, provider.get_range( $id.ctx));
}
|
(
mach_data_attribute
{
auto rng = provider.get_range($mach_data_attribute.ctx);
auto attr = $mach_data_attribute.attribute;
if(attr == data_attr_kind::UNKNOWN)
$m_e = std::make_unique<mach_expr_default>(rng);
else
$m_e = std::visit([rng, attr, symbol_rng = $mach_data_attribute.symbol_rng](auto& arg) -> mach_expr_ptr {
if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,std::monostate>) {
return std::make_unique<mach_expr_default>(rng);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,int>) {
return std::make_unique<mach_expr_constant>(arg, symbol_rng);
}
else if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,std::unique_ptr<mach_expr_literal>>) {
return std::make_unique<mach_expr_data_attr_literal>(std::move(arg), attr, rng, symbol_rng);
}
else {
return std::make_unique<mach_expr_data_attr>(arg.first, arg.second, attr, rng, symbol_rng);
}
}, $mach_data_attribute.data);
}
| self_def_term
{
$m_e = std::make_unique<mach_expr_constant>($self_def_term.value, provider.get_range( $self_def_term.ctx));
}
| id
{
collector.add_hl_symbol(token_info(provider.get_range( $id.ctx),hl_scopes::ordinary_symbol));
$m_e = std::make_unique<mach_expr_symbol>($id.name, $id.using_qualifier, provider.get_range( $id.ctx));
}
)
| signed_num
{
collector.add_hl_symbol(token_info(provider.get_range( $signed_num.ctx),hl_scopes::number));
Expand Down Expand Up @@ -142,7 +145,7 @@ literal_internal returns [std::optional<data_definition> value]
};

mach_data_attribute returns [data_attr_kind attribute, std::variant<std::monostate, std::pair<id_index,id_index>, std::unique_ptr<mach_expr_literal>, int> data, range symbol_rng]
: ORDSYMBOL (attr|apostrophe_as_attr) {auto lit_restore = enable_literals();}
: ORDSYMBOL attr {auto lit_restore = enable_literals();}
{
collector.add_hl_symbol(token_info(provider.get_range($ORDSYMBOL), hl_scopes::data_attr_type));
$attribute = get_attribute($ORDSYMBOL->getText());
Expand Down
11 changes: 5 additions & 6 deletions parser_library/src/parsing/grammar/macro_operand_rules.g4
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ mac_entry [bool top_level = true] returns [concat_chain chain]
ap1=ATTR
{
$chain.push_back(std::make_unique<char_str_conc>("'", provider.get_range($ap1)));
bool attribute = true;
}
(
{!is_previous_attribute_consuming($top_level, _input->LT(-2))}?
Expand All @@ -209,12 +208,12 @@ mac_entry [bool top_level = true] returns [concat_chain chain]
{
$chain.push_back(std::make_unique<char_str_conc>("'", provider.get_range($ap2)));
collector.add_hl_symbol(token_info(provider.get_range($ap1,$ap2),hl_scopes::string));
attribute = false;
}
)?
{
if (attribute)
|
{is_previous_attribute_consuming($top_level, _input->LT(-2))}?
{
collector.add_hl_symbol(token_info(provider.get_range($ap1),hl_scopes::operator_symbol));
}
}
)
)+
;
37 changes: 35 additions & 2 deletions parser_library/src/parsing/parser_error_listener.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ void iterate_error_stream(antlr4::TokenStream* input_stream,
bool& sign_preceding,
bool& unexpected_sign,
bool& odd_apostrophes,
bool& ampersand_followed)
bool& ampersand_followed,
bool& attr_last)
{
int parenthesis = 0;
int apostrophes = 0;
Expand Down Expand Up @@ -96,7 +97,12 @@ void iterate_error_stream(antlr4::TokenStream* input_stream,
if (is_comparative_sign(type))
unexpected_sign = true;
if (type == APOSTROPHE)
{
apostrophes++;
attr_last = false;
}
if (type == ATTR)
attr_last = true;
}
// if there is right bracket preceding left bracket
if (parenthesis > 0)
Expand Down Expand Up @@ -130,8 +136,30 @@ void parser_error_listener_base::syntaxError(
auto start_token = excp.getStartToken();
int start_index = (int)start_token->getTokenIndex();

auto alternate_start_index = [](auto ctx) {
while (ctx)
{
auto first = ctx->getStart();
if (!first)
break;

if (first->getType() == antlr4::Token::EOF)
{
ctx = dynamic_cast<const antlr4::ParserRuleContext*>(ctx->parent);
continue;
}
return (int)first->getTokenIndex();
}
return -1;
}(dynamic_cast<const antlr4::ParserRuleContext*>(excp.getCtx()));

// find first eoln

if (alternate_start_index != -1 && alternate_start_index < start_index)
{
start_index = alternate_start_index;
}

auto first_symbol_type = input_stream->get(start_index)->getType();

// while it's a space, skip spaces
Expand Down Expand Up @@ -159,6 +187,7 @@ void parser_error_listener_base::syntaxError(
bool unexpected_sign = false;
bool odd_apostrophes = false;
bool ampersand_followed = true;
bool attr_last = false;

iterate_error_stream(input_stream,
start_index,
Expand All @@ -170,7 +199,8 @@ void parser_error_listener_base::syntaxError(
sign_preceding,
unexpected_sign,
odd_apostrophes,
ampersand_followed);
ampersand_followed,
attr_last);

// right paranthesis has no left match
if (right_prec)
Expand Down Expand Up @@ -224,6 +254,9 @@ void parser_error_listener_base::syntaxError(
diagnostic_severity::error,
"S0004",
"Unfinished statement, the label cannot be alone on a line");
else if (attr_last && is_expected(APOSTROPHE, expected_tokens))
add_parser_diagnostic(
range(position(line, char_pos_in_line)), diagnostic_severity::error, "S0005", "Expected an apostrophe");
// other undeclared errors
else
add_parser_diagnostic(
Expand Down
9 changes: 1 addition & 8 deletions parser_library/src/parsing/parser_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,6 @@ bool parser_impl::is_self_def()
return tmp == "B" || tmp == "X" || tmp == "C" || tmp == "G";
}

bool parser_impl::is_data_attr()
{
std::string tmp(_input->LT(1)->getText());
context::to_upper(tmp);
return tmp == "D" || tmp == "O" || tmp == "N" || tmp == "S" || tmp == "K" || tmp == "I" || tmp == "L" || tmp == "T";
}

bool parser_impl::is_var_def()
{
auto [_, opcode] = *proc_status;
Expand Down Expand Up @@ -225,7 +218,7 @@ bool parser_impl::is_previous_attribute_consuming(bool top_level, const antlr4::
return false;

auto text = token->getText();
if (text.empty() || text.size() >= 2 && std::isalpha((unsigned char)text[text.size() - 2]))
if (text.size() != 1)
return false;

auto tmp = std::toupper((unsigned char)text.back());
Expand Down
1 change: 0 additions & 1 deletion parser_library/src/parsing/parser_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ class parser_impl : public antlr4::Parser
void enable_continuation();
void disable_continuation();
bool is_self_def();
bool is_data_attr();
bool is_var_def();

bool allow_ca_string() const { return ca_string_enabled; }
Expand Down
1 change: 1 addition & 0 deletions parser_library/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ add_executable(library_test)

target_sources(library_test PRIVATE
aread_time_test.cpp
common_testing.cpp
common_testing.h
diagnosable_ctx_test.cpp
diagnostics_check_test.cpp
Expand Down
Loading

0 comments on commit 821fae0

Please sign in to comment.