Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Use grapheme-splitter instead of lodash for saving emoji from being ripped apart #10976

Merged
merged 3 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"focus-visible": "^5.2.0",
"gfm.css": "^1.1.2",
"glob-to-regexp": "^0.4.1",
"grapheme-splitter": "^1.0.4",
"highlight.js": "^11.3.1",
"html-entities": "^2.0.0",
"is-ip": "^3.1.0",
Expand Down
5 changes: 2 additions & 3 deletions src/Avatar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ import { RoomMember } from "matrix-js-sdk/src/models/room-member";
import { User } from "matrix-js-sdk/src/models/user";
import { Room } from "matrix-js-sdk/src/models/room";
import { ResizeMethod } from "matrix-js-sdk/src/@types/partials";
import { split } from "lodash";

import DMRoomMap from "./utils/DMRoomMap";
import { mediaFromMxc } from "./customisations/Media";
import { isLocalRoom } from "./utils/localRoom/isLocalRoom";
import { getFirstGrapheme } from "./utils/strings";

// Not to be used for BaseAvatar urls as that has similar default avatar fallback already
export function avatarUrlForMember(
Expand Down Expand Up @@ -133,8 +133,7 @@ export function getInitialLetter(name: string): string | undefined {
name = name.substring(1);
}

// rely on the grapheme cluster splitter in lodash so that we don't break apart compound emojis
return split(name, "", 1)[0].toUpperCase();
return getFirstGrapheme(name).toUpperCase();
}

export function avatarUrlForRoom(
Expand Down
15 changes: 10 additions & 5 deletions src/HtmlUtils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@ import React, { LegacyRef, ReactElement, ReactNode } from "react";
import sanitizeHtml from "sanitize-html";
import classNames from "classnames";
import EMOJIBASE_REGEX from "emojibase-regex";
import { merge, split } from "lodash";
import { merge } from "lodash";
import katex from "katex";
import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/models/event";
import { Optional } from "matrix-events-sdk";
import _Linkify from "linkify-react";
import escapeHtml from "escape-html";
import GraphemeSplitter from "grapheme-splitter";

import {
_linkifyElement,
Expand Down Expand Up @@ -463,14 +464,18 @@ const emojiToJsxSpan = (emoji: string, key: number): JSX.Element => (
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
* and plain text for everything else
*/
function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
export function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[];
export function formatEmojis(message: string | undefined, isHtmlMessage: true): string[];
export function formatEmojis(message: string | undefined, isHtmlMessage: boolean): (JSX.Element | string)[] {
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
const result: (JSX.Element | string)[] = [];
if (!message) return result;

let text = "";
let key = 0;

// We use lodash's grapheme splitter to avoid breaking apart compound emojis
for (const char of split(message, "")) {
const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(message)) {
if (EMOJIBASE_REGEX.test(char)) {
if (text) {
result.push(text);
Expand Down Expand Up @@ -661,7 +666,7 @@ export function topicToHtml(
isFormattedTopic = false; // Fall back to plain-text topic
}

let emojiBodyElements: ReturnType<typeof formatEmojis> | undefined;
let emojiBodyElements: JSX.Element[] | undefined;
if (!isFormattedTopic && topicHasEmoji) {
emojiBodyElements = formatEmojis(topic, false);
}
Expand Down
13 changes: 6 additions & 7 deletions src/editor/parts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

import { split } from "lodash";
import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient } from "matrix-js-sdk/src/client";
import { RoomMember } from "matrix-js-sdk/src/models/room-member";
import { Room } from "matrix-js-sdk/src/models/room";
import GraphemeSplitter from "grapheme-splitter";

import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
import SettingsStore from "../settings/SettingsStore";
import { getFirstGrapheme } from "../utils/strings";

const REGIONAL_EMOJI_SEPARATOR = String.fromCodePoint(0x200b);

Expand Down Expand Up @@ -133,8 +134,7 @@ abstract class BasePart {
// To only need to grapheme split the bits of the string we're working on.
let buffer = str;
while (buffer) {
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
const [char] = split(buffer, "", 2);
const char = getFirstGrapheme(buffer);
if (!this.acceptsInsertion(char, offset + str.length - buffer.length, inputType)) {
break;
}
Expand Down Expand Up @@ -562,8 +562,7 @@ export class PartCreator {
case "\n":
return new NewlinePart();
default:
// We use lodash's grapheme splitter to avoid breaking apart compound emojis
if (EMOJIBASE_REGEX.test(split(input, "", 2)[0])) {
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
return new EmojiPart();
}
return new PlainPart();
Expand Down Expand Up @@ -639,8 +638,8 @@ export class PartCreator {
const parts: (PlainPart | EmojiPart)[] = [];
let plainText = "";

// We use lodash's grapheme splitter to avoid breaking apart compound emojis
for (const char of split(text, "")) {
const splitter = new GraphemeSplitter();
for (const char of splitter.iterateGraphemes(text)) {
if (EMOJIBASE_REGEX.test(char)) {
if (plainText) {
parts.push(this.plain(plainText));
Expand Down
13 changes: 13 additions & 0 deletions src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ limitations under the License.
* @param text the plaintext to put in the user's clipboard
*/
import { logger } from "matrix-js-sdk/src/logger";
import GraphemeSplitter from "grapheme-splitter";

export async function copyPlaintext(text: string): Promise<boolean> {
try {
Expand Down Expand Up @@ -83,3 +84,15 @@ export function copyNode(ref?: Element | null): boolean {
export function getSelectedText(): string {
return window.getSelection()!.toString();
}

/**
* Returns the first grapheme in the given string,
* especially useful for strings containing emoji, will not break compound emoji up.
* @param str string to parse
* @returns the first grapheme or an empty string if given an empty string
*/
export function getFirstGrapheme(str: string): string {
const splitter = new GraphemeSplitter();
const result = splitter.iterateGraphemes(str).next();
return result.done ? "" : result.value;
}
18 changes: 17 additions & 1 deletion test/HtmlUtils-test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import { mocked } from "jest-mock";
import { render, screen } from "@testing-library/react";
import { IContent } from "matrix-js-sdk/src/models/event";

import { bodyToHtml, topicToHtml } from "../src/HtmlUtils";
import { bodyToHtml, formatEmojis, topicToHtml } from "../src/HtmlUtils";
import SettingsStore from "../src/settings/SettingsStore";

jest.mock("../src/settings/SettingsStore");
Expand Down Expand Up @@ -168,3 +168,19 @@ describe("bodyToHtml", () => {
});
});
});

describe("formatEmojis", () => {
it.each([
["🏴󠁧󠁢󠁥󠁮󠁧󠁿", [["🏴󠁧󠁢󠁥󠁮󠁧󠁿", "flag-england"]]],
["🏴󠁧󠁢󠁳󠁣󠁴󠁿", [["🏴󠁧󠁢󠁳󠁣󠁴󠁿", "flag-scotland"]]],
["🏴󠁧󠁢󠁷󠁬󠁳󠁿", [["🏴󠁧󠁢󠁷󠁬󠁳󠁿", "flag-wales"]]],
])("%s emoji", (emoji, expectations) => {
const res = formatEmojis(emoji, false);
expect(res).toHaveLength(expectations.length);
for (let i = 0; i < res.length; i++) {
const [emoji, title] = expectations[i];
expect(res[i].props.children).toEqual(emoji);
expect(res[i].props.title).toEqual(`:${title}:`);
}
});
});
12 changes: 6 additions & 6 deletions test/__snapshots__/HtmlUtils-test.tsx.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;

exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;

exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;

exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
<DocumentFragment>
<span
Expand All @@ -15,9 +21,3 @@ exports[`bodyToHtml should generate big emoji for an emoji-only reply to a messa
</span>
</DocumentFragment>
`;

exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;

exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hello</p><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span></span><p>world</p>"`;

exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;