From 014a489d82da122059a61dda2bae4cf342376fee Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 24 May 2023 14:37:28 +0100 Subject: [PATCH 1/3] Use grapheme-splitter instead of lodash for saving emoji from being ripped apart --- package.json | 1 + src/Avatar.ts | 5 ++--- src/HtmlUtils.tsx | 9 ++++++--- src/editor/parts.ts | 13 ++++++------- src/emoji.ts | 13 +++++++++++++ 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/package.json b/package.json index 959f0199947..b9a364044c6 100644 --- a/package.json +++ b/package.json @@ -82,6 +82,7 @@ "focus-visible": "^5.2.0", "gfm.css": "^1.1.2", "glob-to-regexp": "^0.4.1", + "grapheme-splitter": "^1.0.4", "highlight.js": "^11.3.1", "html-entities": "^2.0.0", "is-ip": "^3.1.0", diff --git a/src/Avatar.ts b/src/Avatar.ts index 79254ef1b59..64015b8dad6 100644 --- a/src/Avatar.ts +++ b/src/Avatar.ts @@ -18,11 +18,11 @@ import { RoomMember } from "matrix-js-sdk/src/models/room-member"; import { User } from "matrix-js-sdk/src/models/user"; import { Room } from "matrix-js-sdk/src/models/room"; import { ResizeMethod } from "matrix-js-sdk/src/@types/partials"; -import { split } from "lodash"; import DMRoomMap from "./utils/DMRoomMap"; import { mediaFromMxc } from "./customisations/Media"; import { isLocalRoom } from "./utils/localRoom/isLocalRoom"; +import { getFirstGrapheme } from "./emoji"; // Not to be used for BaseAvatar urls as that has similar default avatar fallback already export function avatarUrlForMember( @@ -133,8 +133,7 @@ export function getInitialLetter(name: string): string | undefined { name = name.substring(1); } - // rely on the grapheme cluster splitter in lodash so that we don't break apart compound emojis - return split(name, "", 1)[0].toUpperCase(); + return getFirstGrapheme(name).toUpperCase(); } export function avatarUrlForRoom( diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index 0d910bfecad..9025eeddd90 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -21,13 +21,14 @@ import React, { LegacyRef, ReactElement, ReactNode } from "react"; import sanitizeHtml from "sanitize-html"; import classNames from "classnames"; import EMOJIBASE_REGEX from "emojibase-regex"; -import { merge, split } from "lodash"; +import { merge } from "lodash"; import katex from "katex"; import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/models/event"; import { Optional } from "matrix-events-sdk"; import _Linkify from "linkify-react"; import escapeHtml from "escape-html"; +import GraphemeSplitter from "grapheme-splitter"; import { _linkifyElement, @@ -466,11 +467,13 @@ const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => ( function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] { const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan; const result: (JSX.Element | string)[] = []; + if (!message) return result; + let text = ""; let key = 0; - // We use lodash's grapheme splitter to avoid breaking apart compound emojis - for (const char of split(message, "")) { + const splitter = new GraphemeSplitter(); + for (const char of splitter.iterateGraphemes(message)) { if (EMOJIBASE_REGEX.test(char)) { if (text) { result.push(text); diff --git a/src/editor/parts.ts b/src/editor/parts.ts index e25b582e207..bee27b258bc 100644 --- a/src/editor/parts.ts +++ b/src/editor/parts.ts @@ -15,11 +15,11 @@ See the License for the specific language governing permissions and limitations under the License. */ -import { split } from "lodash"; import EMOJIBASE_REGEX from "emojibase-regex"; import { MatrixClient } from "matrix-js-sdk/src/client"; import { RoomMember } from "matrix-js-sdk/src/models/room-member"; import { Room } from "matrix-js-sdk/src/models/room"; +import GraphemeSplitter from "grapheme-splitter"; import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete"; import { unicodeToShortcode } from "../HtmlUtils"; @@ -27,6 +27,7 @@ import * as Avatar from "../Avatar"; import defaultDispatcher from "../dispatcher/dispatcher"; import { Action } from "../dispatcher/actions"; import SettingsStore from "../settings/SettingsStore"; +import { getFirstGrapheme } from "../emoji"; const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b); @@ -133,8 +134,7 @@ abstract class BasePart { // To only need to grapheme split the bits of the string we're working on. let buffer = str; while (buffer) { - // We use lodash's grapheme splitter to avoid breaking apart compound emojis - const [char] = split(buffer, "", 2); + const char = getFirstGrapheme(buffer); if (!this.acceptsInsertion(char, offset + str.length - buffer.length, inputType)) { break; } @@ -562,8 +562,7 @@ export class PartCreator { case "\n": return new NewlinePart(); default: - // We use lodash's grapheme splitter to avoid breaking apart compound emojis - if (EMOJIBASE_REGEX.test(split(input, "", 2)[0])) { + if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) { return new EmojiPart(); } return new PlainPart(); @@ -639,8 +638,8 @@ export class PartCreator { const parts: (PlainPart | EmojiPart)[] = []; let plainText = ""; - // We use lodash's grapheme splitter to avoid breaking apart compound emojis - for (const char of split(text, "")) { + const splitter = new GraphemeSplitter(); + for (const char of splitter.iterateGraphemes(text)) { if (EMOJIBASE_REGEX.test(char)) { if (plainText) { parts.push(this.plain(plainText)); diff --git a/src/emoji.ts b/src/emoji.ts index 12f136fbcf7..b07c7a78f87 100644 --- a/src/emoji.ts +++ b/src/emoji.ts @@ -16,6 +16,7 @@ limitations under the License. import EMOJIBASE from "emojibase-data/en/compact.json"; import SHORTCODES from "emojibase-data/en/shortcodes/iamcal.json"; +import GraphemeSplitter from "grapheme-splitter"; export interface IEmoji { label: string; @@ -124,3 +125,15 @@ export const EMOJI: IEmoji[] = EMOJIBASE.map((emojiData: Omit Date: Wed, 24 May 2023 14:39:38 +0100 Subject: [PATCH 2/3] Move to a more appropriate place --- src/Avatar.ts | 2 +- src/editor/parts.ts | 2 +- src/emoji.ts | 13 ------------- src/utils/strings.ts | 13 +++++++++++++ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Avatar.ts b/src/Avatar.ts index 64015b8dad6..3873a1a59de 100644 --- a/src/Avatar.ts +++ b/src/Avatar.ts @@ -22,7 +22,7 @@ import { ResizeMethod } from "matrix-js-sdk/src/@types/partials"; import DMRoomMap from "./utils/DMRoomMap"; import { mediaFromMxc } from "./customisations/Media"; import { isLocalRoom } from "./utils/localRoom/isLocalRoom"; -import { getFirstGrapheme } from "./emoji"; +import { getFirstGrapheme } from "./utils/strings"; // Not to be used for BaseAvatar urls as that has similar default avatar fallback already export function avatarUrlForMember( diff --git a/src/editor/parts.ts b/src/editor/parts.ts index bee27b258bc..93d79fdf059 100644 --- a/src/editor/parts.ts +++ b/src/editor/parts.ts @@ -27,7 +27,7 @@ import * as Avatar from "../Avatar"; import defaultDispatcher from "../dispatcher/dispatcher"; import { Action } from "../dispatcher/actions"; import SettingsStore from "../settings/SettingsStore"; -import { getFirstGrapheme } from "../emoji"; +import { getFirstGrapheme } from "../utils/strings"; const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b); diff --git a/src/emoji.ts b/src/emoji.ts index b07c7a78f87..12f136fbcf7 100644 --- a/src/emoji.ts +++ b/src/emoji.ts @@ -16,7 +16,6 @@ limitations under the License. import EMOJIBASE from "emojibase-data/en/compact.json"; import SHORTCODES from "emojibase-data/en/shortcodes/iamcal.json"; -import GraphemeSplitter from "grapheme-splitter"; export interface IEmoji { label: string; @@ -125,15 +124,3 @@ export const EMOJI: IEmoji[] = EMOJIBASE.map((emojiData: Omit { try { @@ -83,3 +84,15 @@ export function copyNode(ref?: Element | null): boolean { export function getSelectedText(): string { return window.getSelection()!.toString(); } + +/** + * Returns the first grapheme in the given string, + * especially useful for strings containing emoji, will not break compound emoji up. + * @param str string to parse + * @returns the first grapheme or an empty string if given an empty string + */ +export function getFirstGrapheme(str: string): string { + const splitter = new GraphemeSplitter(); + const result = splitter.iterateGraphemes(str).next(); + return result.done ? "" : result.value; +} From 1cee062c93414f254768a2042bde9b5cda5a0abe Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 24 May 2023 14:56:55 +0100 Subject: [PATCH 3/3] Add tests and improve types --- src/HtmlUtils.tsx | 6 ++++-- test/HtmlUtils-test.tsx | 18 +++++++++++++++++- test/__snapshots__/HtmlUtils-test.tsx.snap | 12 ++++++------ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index 9025eeddd90..866c1d0a0c3 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -464,7 +464,9 @@ const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => ( * @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis * and plain text for everything else */ -function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] { +export function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[]; +export function formatEmojis(message: string | undefined, isHtmlMessage: true): string[]; +export function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] { const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan; const result: (JSX.Element | string)[] = []; if (!message) return result; @@ -664,7 +666,7 @@ export function topicToHtml( isFormattedTopic = false; // Fall back to plain-text topic } - let emojiBodyElements: ReturnType | undefined; + let emojiBodyElements: JSX.Element[] | undefined; if (!isFormattedTopic && topicHasEmoji) { emojiBodyElements = formatEmojis(topic, false); } diff --git a/test/HtmlUtils-test.tsx b/test/HtmlUtils-test.tsx index bca781792a2..779e694f83c 100644 --- a/test/HtmlUtils-test.tsx +++ b/test/HtmlUtils-test.tsx @@ -19,7 +19,7 @@ import { mocked } from "jest-mock"; import { render, screen } from "@testing-library/react"; import { IContent } from "matrix-js-sdk/src/models/event"; -import { bodyToHtml, topicToHtml } from "../src/HtmlUtils"; +import { bodyToHtml, formatEmojis, topicToHtml } from "../src/HtmlUtils"; import SettingsStore from "../src/settings/SettingsStore"; jest.mock("../src/settings/SettingsStore"); @@ -168,3 +168,19 @@ describe("bodyToHtml", () => { }); }); }); + +describe("formatEmojis", () => { + it.each([ + ["🏴󠁧󠁢󠁥󠁮󠁧󠁿", [["🏴󠁧󠁢󠁥󠁮󠁧󠁿", "flag-england"]]], + ["🏴󠁧󠁢󠁳󠁣󠁴󠁿", [["🏴󠁧󠁢󠁳󠁣󠁴󠁿", "flag-scotland"]]], + ["🏴󠁧󠁢󠁷󠁬󠁳󠁿", [["🏴󠁧󠁢󠁷󠁬󠁳󠁿", "flag-wales"]]], + ])("%s emoji", (emoji, expectations) => { + const res = formatEmojis(emoji, false); + expect(res).toHaveLength(expectations.length); + for (let i = 0; i < res.length; i++) { + const [emoji, title] = expectations[i]; + expect(res[i].props.children).toEqual(emoji); + expect(res[i].props.title).toEqual(`:${title}:`); + } + }); +}); diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap index 0714120676c..c4d91467c07 100644 --- a/test/__snapshots__/HtmlUtils-test.tsx.snap +++ b/test/__snapshots__/HtmlUtils-test.tsx.snap @@ -1,5 +1,11 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; + +exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hello

ξ\\xi

world

"`; + +exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ξ\\xi world"`; + exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = ` `; - -exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; - -exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hello

ξ\\xi

world

"`; - -exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ξ\\xi world"`;