From 4a37d16803f442475b0ae3cf3b90e30cfbaf2e7b Mon Sep 17 00:00:00 2001
From: Deanna Garcia <deannagarcia@google.com>
Date: Tue, 28 Feb 2023 13:05:57 -0800
Subject: [PATCH] Fix trailing comment attribution when file has no final
 newline (#12082)

Fixes #12081.

The issue was the call to `MaybeDetachComment`: the conditional assumed that there was a next token, which was on the same line as the previous one, making attribution unclear. However, if there is no next token, we should not detach.

The actual fix is a one-liner. The rest of this PR is updates to the tests to verify this behavior under a handful of scenarios.

Closes #12082

COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/12082 from jhump:jh/fix-trailing-comment-attribution 767e41cb05ba6e176977e86fe33e9aad090f83f6
FUTURE_COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/12082 from jhump:jh/fix-trailing-comment-attribution 767e41cb05ba6e176977e86fe33e9aad090f83f6
PiperOrigin-RevId: 513017635
---
 src/google/protobuf/io/tokenizer.cc          |  3 +-
 src/google/protobuf/io/tokenizer_unittest.cc | 62 +++++++++++++++-----
 2 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
index 53229f8e67e0..d75dfabf195a 100644
--- a/src/google/protobuf/io/tokenizer.cc
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -942,7 +942,8 @@ bool Tokenizer::NextWithComments(std::string* prev_trailing_comments,
             // makes no sense to attach a comment to the following token.
             collector.Flush();
           }
-          if (prev_line == line_ || trailing_comment_end_line == line_) {
+          if (result &&
+              (prev_line == line_ || trailing_comment_end_line == line_)) {
             // When previous token and this one are on the same line, or
             // even if a multi-line trailing comment ends on the same line
             // as this token, it's unclear to what token the comment
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
index 2654acba69ca..fea6b959cf3f 100644
--- a/src/google/protobuf/io/tokenizer_unittest.cc
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -650,6 +650,18 @@ DocCommentCase kDocCommentCases[] = {
      {},
      ""},
 
+    {"prev // no next token\n",
+
+     " no next token\n",
+     {},
+     ""},
+
+    {"prev // no next token and no trailing newline",
+
+     " no next token and no trailing newline",
+     {},
+     ""},
+
     {"prev /* detached */ next",
 
      "",
@@ -777,10 +789,11 @@ DocCommentCase kDocCommentCases[] = {
      " will be handled "},
 
     {R"pb(
-     prev /* a single block comment
-         that spans multiple lines
-         is detached if it ends
-         on the same line as next */ next"
+       prev /* a single block comment
+           that spans multiple lines
+           is detached if it ends
+           on the same line as next */
+           next
      )pb",
 
      "",
@@ -791,7 +804,7 @@ DocCommentCase kDocCommentCases[] = {
      ""},
 
     {R"pb(
-     prev /* trailing */ /* leading */ next"
+       prev /* trailing */ /* leading */ next
      )pb",
 
      " trailing ",
@@ -799,15 +812,30 @@ DocCommentCase kDocCommentCases[] = {
      " leading "},
 
     {R"pb(
-     prev /* multi-line
-          trailing */ /* an oddly
-                      placed detached */ /* an oddly
-                                         placed leading */ next"
+       prev /* multi-line
+            trailing */
+            /* an oddly
+            placed detached */
+            /* an oddly
+            placed leading */
+           next
      )pb",
 
      " multi-line\ntrailing ",
      {" an oddly\nplaced detached "},
      " an oddly\nplaced leading "},
+
+    {R"pb(
+       prev  // trailing with newline
+       // detached
+       /* another detached */
+       // leading but no next token to attach it to
+     )pb",
+
+     " trailing with newline\n",
+     {" detached\n", " another detached ",
+      " leading but no next token to attach it to\n"},
+     ""},
 };
 
 TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
@@ -822,8 +850,8 @@ TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
                          kDocCommentCases_case.input.size(), kBlockSizes_case);
   Tokenizer tokenizer2(&input2, &error_collector);
 
-  tokenizer.Next();
-  tokenizer2.Next();
+  EXPECT_TRUE(tokenizer.Next());
+  EXPECT_TRUE(tokenizer2.Next());
 
   EXPECT_EQ("prev", tokenizer.current().text);
   EXPECT_EQ("prev", tokenizer2.current().text);
@@ -831,11 +859,13 @@ TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
   std::string prev_trailing_comments;
   std::vector<std::string> detached_comments;
   std::string next_leading_comments;
-  tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
-                             &next_leading_comments);
-  tokenizer2.NextWithComments(NULL, NULL, NULL);
-  EXPECT_EQ("next", tokenizer.current().text);
-  EXPECT_EQ("next", tokenizer2.current().text);
+  bool has_next = tokenizer.NextWithComments(
+      &prev_trailing_comments, &detached_comments, &next_leading_comments);
+  EXPECT_EQ(has_next, tokenizer2.NextWithComments(nullptr, nullptr, nullptr));
+  if (has_next) {
+    EXPECT_EQ("next", tokenizer.current().text);
+    EXPECT_EQ("next", tokenizer2.current().text);
+  }
 
   EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
             prev_trailing_comments);