Skip to content

Commit

Permalink
fix(camp): correctly apply and remove markdown blockquotes
Browse files Browse the repository at this point in the history
We previously didn’t handle all line boundaries, e.g. U+2028, which prevented
some reposts from matching. This unfortunately won’t fix already broken ones.

We are in your debt Benjamin.
  • Loading branch information
werme committed Oct 23, 2024
1 parent a32bdfd commit 79757d2
Show file tree
Hide file tree
Showing 7 changed files with 436 additions and 68 deletions.
6 changes: 4 additions & 2 deletions apps/nouns-camp/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@
"build": "next build",
"start": "next start",
"format": "prettier . --write",
"lint": "next lint && prettier . --check"
"lint": "next lint && prettier . --check",
"test": "vitest"
},
"devDependencies": {
"dotenv": "^16.4.5",
"eslint": "^8.57.0",
"eslint-config-next": "^14.2.8",
"eslint-config-prettier": "^9.1.0"
"eslint-config-prettier": "^9.1.0",
"vitest": "^2.1.3"
},
"dependencies": {
"@emotion/cache": "^11.13.1",
Expand Down
68 changes: 68 additions & 0 deletions apps/nouns-camp/src/utils/markdown.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { expect, test } from "vitest";
import { createRepostExtractor } from "./votes-and-feedbacks.js";

const sourceReasons = [
"foo",
"bar",
`baz
+1
> foo`,
"foo\u2028bar",
];

test("repost extraction", () => {
const extract = createRepostExtractor(
sourceReasons.map((reason) => ({ reason })),
);

const tests = [
{ reason: "+1\n\n> foo", expectedMatchIndecies: [0] },
{
reason: `comment
+1
> bar
comment
+1
>baz
comment`,
expectedMatchIndecies: [1, 2],
},
{
reason: `comment
+1
> baz
>
> +1
>
> > foo`,
expectedMatchIndecies: [2],
},
{ reason: "+1\n\n> foo\u2028bar", expectedMatchIndecies: [0] },
{ reason: "+1\n\n> foo\u2028> bar", expectedMatchIndecies: [3] },
];

for (const { reason, expectedMatchIndecies } of tests) {
const [matchingPosts] = extract(reason);
const matchingReasons = matchingPosts.map((p) => p.reason);
const expectedMatchingReasons = expectedMatchIndecies.map(
(index) => sourceReasons[index],
);
for (const expectedMatchingReason of expectedMatchingReasons) {
expect(matchingReasons).toContain(expectedMatchingReason);
}
for (const matchingReason of matchingReasons) {
expect(expectedMatchingReasons).toContain(matchingReason);
}
expect(matchingPosts.length).toBe(expectedMatchIndecies.length);
}
});
119 changes: 58 additions & 61 deletions apps/nouns-camp/src/utils/votes-and-feedbacks.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,55 +26,47 @@ export const REPOST_REGEX = /^\+1$(\n){2}(?<quote>(?:^>.*?$\s*)+)/gms;
export const REPLY_REGEX =
/^@(?<author>0x[a-zA-Z0-9]{4}(\.){3}[a-zA-Z0-p]{4})(\n){2}(?<reply>.+?)\n(?<quote>(?:^>.*?$\s*)+)/gms;

const poormansUnquote = (markdownBlockquote) =>
markdownBlockquote
.split("\n")
.map((line) => {
if (line[0] !== ">") throw new Error();
if (line[1] === " ") return line.slice(2);
return line.slice(1);
})
.join("\n");

export const createReplyExtractor =
(ascendingSourceVotesAndFeedbackPosts) => (reason) => {
if (reason == null || reason.trim() === "") return [[], reason];

const matches = reason.matchAll(REPLY_REGEX);

if (matches == null) return [[], reason];
const matches = [...reason.matchAll(REPLY_REGEX)];

let strippedText = reason;
const replies = [];

for (const match of matches) {
const quoteBody = poormansUnquote(match.groups.quote.trim());
const truncatedReplyTargetAuthorAddress =
match.groups.author.toLowerCase();
const matchedReplyTarget = ascendingSourceVotesAndFeedbackPosts.find(
({ voterId: originVoterId, reason: originReason }) => {
if (originReason == null) return false;

// Check content
if (originReason.trim() !== quoteBody.trim()) return false;

// Check author
const truncatedOriginVoterId = [
originVoterId.slice(0, 6),
originVoterId.slice(-4),
].join("...");
return truncatedOriginVoterId === truncatedReplyTargetAuthorAddress;
},
);
if (matchedReplyTarget == null) {
// console.warn(`No match found for reply target "${match.groups.quote}"`);
try {
const quoteBody = markdownUtils.unquote(match.groups.quote.trim());
const truncatedReplyTargetAuthorAddress =
match.groups.author.toLowerCase();
const matchedReplyTarget = ascendingSourceVotesAndFeedbackPosts.find(
({ voterId: originVoterId, reason: originReason }) => {
if (originReason == null) return false;

// Check content
if (originReason.trim() !== quoteBody.trim()) return false;

// Check author
const truncatedOriginVoterId = [
originVoterId.slice(0, 6),
originVoterId.slice(-4),
].join("...");
return truncatedOriginVoterId === truncatedReplyTargetAuthorAddress;
},
);
if (matchedReplyTarget == null) {
// console.warn(`No match found for reply target "${match.groups.quote}"`);
continue;
}
strippedText = strippedText.replace(match[0], "");
replies.push({
body: match.groups.reply,
target: matchedReplyTarget,
});
} catch (e) {
continue;
}
strippedText = strippedText.replace(match[0], "");
replies.push({
body: match.groups.reply,
target: matchedReplyTarget,
});
}

return [replies, strippedText];
Expand All @@ -84,36 +76,41 @@ export const createRepostExtractor =
(ascendingSourceVotesAndFeedbackPosts) => (reason) => {
if (reason == null || reason.trim() === "") return [[], reason];

const matches = reason.matchAll(REPOST_REGEX);

if (matches == null) return [[], reason];
const matches = [...reason.matchAll(REPOST_REGEX)];

let strippedText = reason;
const reposts = [];

for (const match of matches) {
const quoteBody = poormansUnquote(match.groups.quote.trim()).trim();
const matchedPost = ascendingSourceVotesAndFeedbackPosts.find(
({ reason: originReason }) => {
if (originReason == null) return false;
// Check if the whole reason matches
// (can’t use e.g. `includes` since that might include nested reposts)
if (originReason.trim() === quoteBody.trim()) return true;
// Check if the reason matches when stripping it of reposts
return (
// Checking `includes` first to not match the regex unnecessarily
originReason.includes(quoteBody.trim()) &&
originReason.replaceAll(REPOST_REGEX, "").trim() ===
quoteBody.trim()
);
},
);
if (matchedPost == null) {
// console.warn(`No match found for repost quote body "${quoteBody}"`);
try {
const quoteBody = markdownUtils
.unquote(match.groups.quote.trim())
.trim();
const matchedPost = ascendingSourceVotesAndFeedbackPosts.find(
({ reason: originReason }) => {
if (originReason == null) return false;
// Check if the whole reason matches
// (can’t use e.g. `includes` since that might include nested reposts)
if (originReason.trim() === quoteBody.trim()) return true;

// Check if the reason matches when stripping it of reposts
return (
// Checking `includes` first to not match the regex unnecessarily
originReason.includes(quoteBody.trim()) &&
originReason.replaceAll(REPOST_REGEX, "").trim() ===
quoteBody.trim()
);
},
);
if (matchedPost == null) {
// console.warn(`No match found for repost quote body "${quoteBody}"`);
continue;
}
strippedText = strippedText.replace(match[0], "");
reposts.push(matchedPost);
} catch (e) {
continue;
}
strippedText = strippedText.replace(match[0], "");
reposts.push(matchedPost);
}

return [reposts, strippedText];
Expand Down
6 changes: 4 additions & 2 deletions packages/common/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
"dev": "rollup -c -w",
"build": "rollup -c",
"format": "prettier . --write",
"lint": "eslint . && prettier . --check"
"lint": "eslint . && prettier . --check",
"test": "vitest"
},
"exports": {
"./app": "./dist/app.js",
Expand All @@ -22,6 +23,7 @@
"devDependencies": {
"@swc/core": "^1.7.23",
"rollup": "^3.21.0",
"rollup-plugin-swc3": "^0.11.2"
"rollup-plugin-swc3": "^0.11.2",
"vitest": "^2.1.3"
}
}
28 changes: 25 additions & 3 deletions packages/common/src/utils/markdown.js
Original file line number Diff line number Diff line change
Expand Up @@ -331,9 +331,31 @@ export const getFirstParagraph = (string) => {

export const blockquote = (string) =>
string
.split("\n")
.map((l) => `> ${l}`)
.join("\n");
// https://www.unicode.org/reports/tr18/#Line_Boundaries
.split(/(\r\n|(?!\r\n)[\n-\r\x85\u2028\u2029])/)
.reduce((acc, part, i, parts) => {
if (i % 2 !== 0) return acc; // break part
const [line, breakCharacters] = [part, parts[i + 1]];
if (breakCharacters != null) return acc + "> " + line + breakCharacters;
if (line === "") return acc;
return acc + "> " + line;
}, "");

// This will throw if the input isn’t a valid blockquote string
export const unquote = (markdownBlockquote) =>
markdownBlockquote
// https://www.unicode.org/reports/tr18/#Line_Boundaries
.split(/(\r\n|(?!\r\n)[\n-\r\x85\u2028\u2029])/)
.reduce((acc, part, i, parts) => {
if (i % 2 !== 0) return acc; // break part
const [line, breakCharacters] = [part, parts[i + 1]];
if (line[0] !== ">") throw new Error("invalid blockquote");

const unquotedLine = line[1] === " " ? line.slice(2) : line.slice(1);
if (breakCharacters != null) return acc + unquotedLine + breakCharacters;
if (unquotedLine === "") return acc;
return acc + unquotedLine;
}, "");

export const getFirstImage = (text) => {
const blocks = toMessageBlocks(text);
Expand Down
18 changes: 18 additions & 0 deletions packages/common/src/utils/markdown.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { expect, test } from "vitest";
import { blockquote, unquote } from "./markdown.js";

test("blockquote()", () => {
expect(blockquote("foo")).toBe("> foo");
expect(blockquote("foo \n bar")).toBe("> foo \n> bar");
expect(blockquote("foo\n")).toBe("> foo\n");
expect(blockquote("foo\u2028bar")).toBe("> foo\u2028> bar");
});

test("unquote()", () => {
expect(unquote("> foo")).toBe("foo");
expect(unquote(">foo")).toBe("foo");
expect(unquote("> foo")).toBe(" foo");
expect(unquote("> foo\n> bar")).toBe("foo\nbar");
expect(() => unquote("> foo\u2028bar")).toThrow(/invalid blockquote/);
expect(unquote("> foo\u2028> bar")).toBe("foo\u2028bar");
});
Loading

0 comments on commit 79757d2

Please sign in to comment.