Skip to content

Commit

Permalink
Merge pull request #10660 from jhump/jh/comments-ignored
Browse files Browse the repository at this point in the history
gracefully handle weird placement of linebreaks around comments
  • Loading branch information
fowles authored Sep 27, 2022
2 parents d993377 + 54ddc88 commit c45dd50
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 9 deletions.
45 changes: 38 additions & 7 deletions src/google/protobuf/io/tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,8 @@ class CommentCollector {
: prev_trailing_comments_(prev_trailing_comments),
detached_comments_(detached_comments),
next_leading_comments_(next_leading_comments),
num_comments_(0),
has_trailing_comment_(false),
has_comment_(false),
is_line_comment_(false),
can_attach_to_prev_(true) {
Expand Down Expand Up @@ -797,24 +799,47 @@ class CommentCollector {
if (prev_trailing_comments_ != NULL) {
prev_trailing_comments_->append(comment_buffer_);
}
has_trailing_comment_ = true;
can_attach_to_prev_ = false;
} else {
if (detached_comments_ != NULL) {
detached_comments_->push_back(comment_buffer_);
}
}
ClearBuffer();
num_comments_++;
}
}

void DetachFromPrev() { can_attach_to_prev_ = false; }

void MaybeDetachComment() {
int count = num_comments_;
if (has_comment_) count++;

// If there's one comment, make sure it is detached.
if (count == 1) {
if (has_trailing_comment_ && prev_trailing_comments_ != NULL) {
std::string trail = *prev_trailing_comments_;
if (detached_comments_ != NULL) {
// push trailing comment to front of detached
detached_comments_->insert(detached_comments_->begin(), 1, trail);
}
prev_trailing_comments_->clear();
}
// flush pending comment so it's detached instead of leading
Flush();
}
}

private:
std::string* prev_trailing_comments_;
std::vector<std::string>* detached_comments_;
std::string* next_leading_comments_;

std::string comment_buffer_;
int num_comments_;
bool has_trailing_comment_;

// True if any comments were read into comment_buffer_. This can be true even
// if comment_buffer_ is empty, namely if the comment was "/**/".
Expand All @@ -836,6 +861,9 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments,
CommentCollector collector(prev_trailing_comments, detached_comments,
next_leading_comments);

int prev_line = line_;
int trailing_comment_end_line = -1;

if (current_.type == TYPE_START) {
// Ignore unicode byte order mark(BOM) if it appears at the file
// beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
Expand All @@ -849,12 +877,14 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments,
}
}
collector.DetachFromPrev();
prev_line = -1;
} else {
// A comment appearing on the same line must be attached to the previous
// declaration.
ConsumeZeroOrMore<WhitespaceNoNewline>();
switch (TryConsumeCommentStart()) {
case LINE_COMMENT:
trailing_comment_end_line = line_;
ConsumeLineComment(collector.GetBufferForLineComment());

// Don't allow comments on subsequent lines to be attached to a trailing
Expand All @@ -863,14 +893,8 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments,
break;
case BLOCK_COMMENT:
ConsumeBlockComment(collector.GetBufferForBlockComment());

trailing_comment_end_line = line_;
ConsumeZeroOrMore<WhitespaceNoNewline>();
if (!TryConsume('\n')) {
// Oops, the next token is on the same line. If we recorded a comment
// we really have no idea which token it should be attached to.
collector.ClearBuffer();
return Next();
}

// Don't allow comments on subsequent lines to be attached to a trailing
// comment.
Expand Down Expand Up @@ -918,6 +942,13 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments,
// makes no sense to attach a comment to the following token.
collector.Flush();
}
if (prev_line == line_ || trailing_comment_end_line == line_) {
// When previous token and this one are on the same line, or
// even if a multi-line trailing comment ends on the same line
// as this token, it's unclear to what token the comment
// should be attached. So we detach it.
collector.MaybeDetachComment();
}
return result;
}
break;
Expand Down
50 changes: 48 additions & 2 deletions src/google/protobuf/io/tokenizer_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -651,10 +651,10 @@ DocCommentCase kDocCommentCases[] = {
{},
""},

{"prev /* ignored */ next",
{"prev /* detached */ next",

"",
{},
{" detached "},
""},

{"prev // trailing comment\n"
Expand All @@ -664,6 +664,13 @@ DocCommentCase kDocCommentCases[] = {
{},
""},

{"prev\n"
"/* leading comment */ next",

"",
{},
" leading comment "},

{"prev\n"
"// leading comment\n"
"// line 2\n"
Expand Down Expand Up @@ -763,6 +770,45 @@ DocCommentCase kDocCommentCases[] = {
"",
{},
" leading comment\n"},

{"prev /* many comments*/ /* all inline */ /* will be handled */ next",

" many comments",
{" all inline "},
" will be handled "},

{R"pb(
prev /* a single block comment
that spans multiple lines
is detached if it ends
on the same line as next */ next"
)pb",

"",
{" a single block comment\n"
"that spans multiple lines\n"
"is detached if it ends\n"
"on the same line as next "},
""},

{R"pb(
prev /* trailing */ /* leading */ next"
)pb",

" trailing ",
{},
" leading "},

{R"pb(
prev /* multi-line
trailing */ /* an oddly
placed detached */ /* an oddly
placed leading */ next"
)pb",

" multi-line\ntrailing ",
{" an oddly\nplaced detached "},
" an oddly\nplaced leading "},
};

TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
Expand Down

0 comments on commit c45dd50

Please sign in to comment.