diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc index 992d8c0b173b..4d7f56cc55f9 100644 --- a/src/google/protobuf/io/tokenizer.cc +++ b/src/google/protobuf/io/tokenizer.cc @@ -746,6 +746,8 @@ class CommentCollector { : prev_trailing_comments_(prev_trailing_comments), detached_comments_(detached_comments), next_leading_comments_(next_leading_comments), + num_comments_(0), + has_trailing_comment_(false), has_comment_(false), is_line_comment_(false), can_attach_to_prev_(true) { @@ -797,6 +799,7 @@ class CommentCollector { if (prev_trailing_comments_ != NULL) { prev_trailing_comments_->append(comment_buffer_); } + has_trailing_comment_ = true; can_attach_to_prev_ = false; } else { if (detached_comments_ != NULL) { @@ -804,17 +807,39 @@ class CommentCollector { } } ClearBuffer(); + num_comments_++; } } void DetachFromPrev() { can_attach_to_prev_ = false; } + void MaybeDetachComment() { + int count = num_comments_; + if (has_comment_) count++; + + // If there's one comment, make sure it is detached. + if (count == 1) { + if (has_trailing_comment_ && prev_trailing_comments_ != NULL) { + std::string trail = *prev_trailing_comments_; + if (detached_comments_ != NULL) { + // push trailing comment to front of detached + detached_comments_->insert(detached_comments_->begin(), 1, trail); + } + prev_trailing_comments_->clear(); + } + // flush pending comment so it's detached instead of leading + Flush(); + } + } + private: std::string* prev_trailing_comments_; std::vector* detached_comments_; std::string* next_leading_comments_; std::string comment_buffer_; + int num_comments_; + bool has_trailing_comment_; // True if any comments were read into comment_buffer_. This can be true even // if comment_buffer_ is empty, namely if the comment was "/**/". @@ -836,6 +861,9 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, CommentCollector collector(prev_trailing_comments, detached_comments, next_leading_comments); + int prev_line = line_; + int trailing_comment_end_line = -1; + if (current_.type == TYPE_START) { // Ignore unicode byte order mark(BOM) if it appears at the file // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted. @@ -849,12 +877,14 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, } } collector.DetachFromPrev(); + prev_line = -1; } else { // A comment appearing on the same line must be attached to the previous // declaration. ConsumeZeroOrMore(); switch (TryConsumeCommentStart()) { case LINE_COMMENT: + trailing_comment_end_line = line_; ConsumeLineComment(collector.GetBufferForLineComment()); // Don't allow comments on subsequent lines to be attached to a trailing @@ -863,14 +893,8 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, break; case BLOCK_COMMENT: ConsumeBlockComment(collector.GetBufferForBlockComment()); - + trailing_comment_end_line = line_; ConsumeZeroOrMore(); - if (!TryConsume('\n')) { - // Oops, the next token is on the same line. If we recorded a comment - // we really have no idea which token it should be attached to. - collector.ClearBuffer(); - return Next(); - } // Don't allow comments on subsequent lines to be attached to a trailing // comment. @@ -918,6 +942,13 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments, // makes no sense to attach a comment to the following token. collector.Flush(); } + if (prev_line == line_ || trailing_comment_end_line == line_) { + // When previous token and this one are on the same line, or + // even if a multi-line trailing comment ends on the same line + // as this token, it's unclear to what token the comment + // should be attached. So we detach it. + collector.MaybeDetachComment(); + } return result; } break; diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc index 40a24c8e9d46..f4d70f0d51d8 100644 --- a/src/google/protobuf/io/tokenizer_unittest.cc +++ b/src/google/protobuf/io/tokenizer_unittest.cc @@ -651,10 +651,10 @@ DocCommentCase kDocCommentCases[] = { {}, ""}, - {"prev /* ignored */ next", + {"prev /* detached */ next", "", - {}, + {" detached "}, ""}, {"prev // trailing comment\n" @@ -664,6 +664,13 @@ DocCommentCase kDocCommentCases[] = { {}, ""}, + {"prev\n" + "/* leading comment */ next", + + "", + {}, + " leading comment "}, + {"prev\n" "// leading comment\n" "// line 2\n" @@ -763,6 +770,45 @@ DocCommentCase kDocCommentCases[] = { "", {}, " leading comment\n"}, + + {"prev /* many comments*/ /* all inline */ /* will be handled */ next", + + " many comments", + {" all inline "}, + " will be handled "}, + + {R"pb( + prev /* a single block comment + that spans multiple lines + is detached if it ends + on the same line as next */ next" + )pb", + + "", + {" a single block comment\n" + "that spans multiple lines\n" + "is detached if it ends\n" + "on the same line as next "}, + ""}, + + {R"pb( + prev /* trailing */ /* leading */ next" + )pb", + + " trailing ", + {}, + " leading "}, + + {R"pb( + prev /* multi-line + trailing */ /* an oddly + placed detached */ /* an oddly + placed leading */ next" + )pb", + + " multi-line\ntrailing ", + {" an oddly\nplaced detached "}, + " an oddly\nplaced leading "}, }; TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {