fix(camp): correctly apply and remove markdown blockquotes

We previously didn’t handle all line boundaries, e.g. U+2028, which prevented some reposts from matching. This unfortunately won’t fix already broken ones. We are in your debt Benjamin.
lilnouns · Oct 23, 2024 · 79757d2 · 79757d2
1 parent a32bdfd
commit 79757d2
Show file tree

Hide file tree

Showing 7 changed files with 436 additions and 68 deletions.
diff --git a/apps/nouns-camp/package.json b/apps/nouns-camp/package.json
@@ -5,13 +5,15 @@
     "build": "next build",
     "start": "next start",
     "format": "prettier . --write",
-    "lint": "next lint && prettier . --check"
+    "lint": "next lint && prettier . --check",
+    "test": "vitest"
   },
   "devDependencies": {
     "dotenv": "^16.4.5",
     "eslint": "^8.57.0",
     "eslint-config-next": "^14.2.8",
-    "eslint-config-prettier": "^9.1.0"
+    "eslint-config-prettier": "^9.1.0",
+    "vitest": "^2.1.3"
   },
   "dependencies": {
     "@emotion/cache": "^11.13.1",

diff --git a/apps/nouns-camp/src/utils/markdown.test.js b/apps/nouns-camp/src/utils/markdown.test.js
@@ -0,0 +1,68 @@
+import { expect, test } from "vitest";
+import { createRepostExtractor } from "./votes-and-feedbacks.js";
+
+const sourceReasons = [
+  "foo",
+  "bar",
+  `baz
+
++1
+
+> foo`,
+  "foo\u2028bar",
+];
+
+test("repost extraction", () => {
+  const extract = createRepostExtractor(
+    sourceReasons.map((reason) => ({ reason })),
+  );
+
+  const tests = [
+    { reason: "+1\n\n> foo", expectedMatchIndecies: [0] },
+    {
+      reason: `comment
+
++1
+
+> bar
+
+comment
+
++1
+
+>baz
+
+comment`,
+      expectedMatchIndecies: [1, 2],
+    },
+    {
+      reason: `comment
+
++1
+
+> baz
+>
+> +1
+>
+> > foo`,
+      expectedMatchIndecies: [2],
+    },
+    { reason: "+1\n\n> foo\u2028bar", expectedMatchIndecies: [0] },
+    { reason: "+1\n\n> foo\u2028> bar", expectedMatchIndecies: [3] },
+  ];
+
+  for (const { reason, expectedMatchIndecies } of tests) {
+    const [matchingPosts] = extract(reason);
+    const matchingReasons = matchingPosts.map((p) => p.reason);
+    const expectedMatchingReasons = expectedMatchIndecies.map(
+      (index) => sourceReasons[index],
+    );
+    for (const expectedMatchingReason of expectedMatchingReasons) {
+      expect(matchingReasons).toContain(expectedMatchingReason);
+    }
+    for (const matchingReason of matchingReasons) {
+      expect(expectedMatchingReasons).toContain(matchingReason);
+    }
+    expect(matchingPosts.length).toBe(expectedMatchIndecies.length);
+  }
+});
diff --git a/apps/nouns-camp/src/utils/votes-and-feedbacks.js b/apps/nouns-camp/src/utils/votes-and-feedbacks.js
@@ -26,55 +26,47 @@ export const REPOST_REGEX = /^\+1$(\n){2}(?<quote>(?:^>.*?$\s*)+)/gms;
 export const REPLY_REGEX =
   /^@(?<author>0x[a-zA-Z0-9]{4}(\.){3}[a-zA-Z0-p]{4})(\n){2}(?<reply>.+?)\n(?<quote>(?:^>.*?$\s*)+)/gms;
 
-const poormansUnquote = (markdownBlockquote) =>
-  markdownBlockquote
-    .split("\n")
-    .map((line) => {
-      if (line[0] !== ">") throw new Error();
-      if (line[1] === " ") return line.slice(2);
-      return line.slice(1);
-    })
-    .join("\n");
-
 export const createReplyExtractor =
   (ascendingSourceVotesAndFeedbackPosts) => (reason) => {
     if (reason == null || reason.trim() === "") return [[], reason];
 
-    const matches = reason.matchAll(REPLY_REGEX);
-
-    if (matches == null) return [[], reason];
+    const matches = [...reason.matchAll(REPLY_REGEX)];
 
     let strippedText = reason;
     const replies = [];
 
     for (const match of matches) {
-      const quoteBody = poormansUnquote(match.groups.quote.trim());
-      const truncatedReplyTargetAuthorAddress =
-        match.groups.author.toLowerCase();
-      const matchedReplyTarget = ascendingSourceVotesAndFeedbackPosts.find(
-        ({ voterId: originVoterId, reason: originReason }) => {
-          if (originReason == null) return false;
-
-          // Check content
-          if (originReason.trim() !== quoteBody.trim()) return false;
-
-          // Check author
-          const truncatedOriginVoterId = [
-            originVoterId.slice(0, 6),
-            originVoterId.slice(-4),
-          ].join("...");
-          return truncatedOriginVoterId === truncatedReplyTargetAuthorAddress;
-        },
-      );
-      if (matchedReplyTarget == null) {
-        // console.warn(`No match found for reply target "${match.groups.quote}"`);
+      try {
+        const quoteBody = markdownUtils.unquote(match.groups.quote.trim());
+        const truncatedReplyTargetAuthorAddress =
+          match.groups.author.toLowerCase();
+        const matchedReplyTarget = ascendingSourceVotesAndFeedbackPosts.find(
+          ({ voterId: originVoterId, reason: originReason }) => {
+            if (originReason == null) return false;
+
+            // Check content
+            if (originReason.trim() !== quoteBody.trim()) return false;
+
+            // Check author
+            const truncatedOriginVoterId = [
+              originVoterId.slice(0, 6),
+              originVoterId.slice(-4),
+            ].join("...");
+            return truncatedOriginVoterId === truncatedReplyTargetAuthorAddress;
+          },
+        );
+        if (matchedReplyTarget == null) {
+          // console.warn(`No match found for reply target "${match.groups.quote}"`);
+          continue;
+        }
+        strippedText = strippedText.replace(match[0], "");
+        replies.push({
+          body: match.groups.reply,
+          target: matchedReplyTarget,
+        });
+      } catch (e) {
         continue;
       }
-      strippedText = strippedText.replace(match[0], "");
-      replies.push({
-        body: match.groups.reply,
-        target: matchedReplyTarget,
-      });
     }
 
     return [replies, strippedText];
@@ -84,36 +76,41 @@ export const createRepostExtractor =
   (ascendingSourceVotesAndFeedbackPosts) => (reason) => {
     if (reason == null || reason.trim() === "") return [[], reason];
 
-    const matches = reason.matchAll(REPOST_REGEX);
-
-    if (matches == null) return [[], reason];
+    const matches = [...reason.matchAll(REPOST_REGEX)];
 
     let strippedText = reason;
     const reposts = [];
 
     for (const match of matches) {
-      const quoteBody = poormansUnquote(match.groups.quote.trim()).trim();
-      const matchedPost = ascendingSourceVotesAndFeedbackPosts.find(
-        ({ reason: originReason }) => {
-          if (originReason == null) return false;
-          // Check if the whole reason matches
-          // (can’t use e.g. `includes` since that might include nested reposts)
-          if (originReason.trim() === quoteBody.trim()) return true;
-          // Check if the reason matches when stripping it of reposts
-          return (
-            // Checking `includes` first to not match the regex unnecessarily
-            originReason.includes(quoteBody.trim()) &&
-            originReason.replaceAll(REPOST_REGEX, "").trim() ===
-              quoteBody.trim()
-          );
-        },
-      );
-      if (matchedPost == null) {
-        // console.warn(`No match found for repost quote body "${quoteBody}"`);
+      try {
+        const quoteBody = markdownUtils
+          .unquote(match.groups.quote.trim())
+          .trim();
+        const matchedPost = ascendingSourceVotesAndFeedbackPosts.find(
+          ({ reason: originReason }) => {
+            if (originReason == null) return false;
+            // Check if the whole reason matches
+            // (can’t use e.g. `includes` since that might include nested reposts)
+            if (originReason.trim() === quoteBody.trim()) return true;
+
+            // Check if the reason matches when stripping it of reposts
+            return (
+              // Checking `includes` first to not match the regex unnecessarily
+              originReason.includes(quoteBody.trim()) &&
+              originReason.replaceAll(REPOST_REGEX, "").trim() ===
+                quoteBody.trim()
+            );
+          },
+        );
+        if (matchedPost == null) {
+          // console.warn(`No match found for repost quote body "${quoteBody}"`);
+          continue;
+        }
+        strippedText = strippedText.replace(match[0], "");
+        reposts.push(matchedPost);
+      } catch (e) {
         continue;
       }
-      strippedText = strippedText.replace(match[0], "");
-      reposts.push(matchedPost);
     }
 
     return [reposts, strippedText];

diff --git a/packages/common/package.json b/packages/common/package.json
@@ -6,7 +6,8 @@
     "dev": "rollup -c -w",
     "build": "rollup -c",
     "format": "prettier . --write",
-    "lint": "eslint . && prettier . --check"
+    "lint": "eslint . && prettier . --check",
+    "test": "vitest"
   },
   "exports": {
     "./app": "./dist/app.js",
@@ -22,6 +23,7 @@
   "devDependencies": {
     "@swc/core": "^1.7.23",
     "rollup": "^3.21.0",
-    "rollup-plugin-swc3": "^0.11.2"
+    "rollup-plugin-swc3": "^0.11.2",
+    "vitest": "^2.1.3"
   }
 }
diff --git a/packages/common/src/utils/markdown.js b/packages/common/src/utils/markdown.js
@@ -331,9 +331,31 @@ export const getFirstParagraph = (string) => {
 
 export const blockquote = (string) =>
   string
-    .split("\n")
-    .map((l) => `> ${l}`)
-    .join("\n");
+    // https://www.unicode.org/reports/tr18/#Line_Boundaries
+    .split(/(\r\n|(?!\r\n)[\n-\r\x85\u2028\u2029])/)
+    .reduce((acc, part, i, parts) => {
+      if (i % 2 !== 0) return acc; // break part
+      const [line, breakCharacters] = [part, parts[i + 1]];
+      if (breakCharacters != null) return acc + "> " + line + breakCharacters;
+      if (line === "") return acc;
+      return acc + "> " + line;
+    }, "");
+
+// This will throw if the input isn’t a valid blockquote string
+export const unquote = (markdownBlockquote) =>
+  markdownBlockquote
+    // https://www.unicode.org/reports/tr18/#Line_Boundaries
+    .split(/(\r\n|(?!\r\n)[\n-\r\x85\u2028\u2029])/)
+    .reduce((acc, part, i, parts) => {
+      if (i % 2 !== 0) return acc; // break part
+      const [line, breakCharacters] = [part, parts[i + 1]];
+      if (line[0] !== ">") throw new Error("invalid blockquote");
+
+      const unquotedLine = line[1] === " " ? line.slice(2) : line.slice(1);
+      if (breakCharacters != null) return acc + unquotedLine + breakCharacters;
+      if (unquotedLine === "") return acc;
+      return acc + unquotedLine;
+    }, "");
 
 export const getFirstImage = (text) => {
   const blocks = toMessageBlocks(text);

diff --git a/packages/common/src/utils/markdown.test.js b/packages/common/src/utils/markdown.test.js
@@ -0,0 +1,18 @@
+import { expect, test } from "vitest";
+import { blockquote, unquote } from "./markdown.js";
+
+test("blockquote()", () => {
+  expect(blockquote("foo")).toBe("> foo");
+  expect(blockquote("foo \n bar")).toBe("> foo \n>  bar");
+  expect(blockquote("foo\n")).toBe("> foo\n");
+  expect(blockquote("foo\u2028bar")).toBe("> foo\u2028> bar");
+});
+
+test("unquote()", () => {
+  expect(unquote("> foo")).toBe("foo");
+  expect(unquote(">foo")).toBe("foo");
+  expect(unquote(">  foo")).toBe(" foo");
+  expect(unquote("> foo\n> bar")).toBe("foo\nbar");
+  expect(() => unquote("> foo\u2028bar")).toThrow(/invalid blockquote/);
+  expect(unquote("> foo\u2028> bar")).toBe("foo\u2028bar");
+});