Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Don't consider textual characters to be emoji #12582

Merged
merged 2 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ module.exports = {
name: "matrix-react-sdk/",
message: "Please use matrix-react-sdk/src/index instead",
},
{
name: "emojibase-regex",
message:
"This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
},
],
patterns: [
{
Expand Down Expand Up @@ -141,6 +146,11 @@ module.exports = {
],
message: "Please use matrix-js-sdk/src/matrix instead",
},
{
group: ["emojibase-regex/emoji*"],
message:
"This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
},
],
},
],
Expand Down
34 changes: 29 additions & 5 deletions src/HtmlUtils.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ limitations under the License.
import React, { LegacyRef, ReactNode } from "react";
import sanitizeHtml from "sanitize-html";
import classNames from "classnames";
import EMOJIBASE_REGEX from "emojibase-regex";
import katex from "katex";
import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
Expand All @@ -46,10 +45,35 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;

// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace)
const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g;
// (Zero-Width Space, other whitespace)
const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;

// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
// emoji presentation VS (U+FE0F), but not those sequences that are followed by
// a text presentation VS (U+FE0E). We also count lone regional indicators
// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji
// followed by U+FE0E when the emoji doesn't have a text variant, but in
// practice this doesn't matter.
export const EMOJI_REGEX = (() => {
try {
// Per our support policy, v mode is available to us, but we still don't
// want the app to completely crash on older platforms. We use the
// constructor here to avoid a syntax error on such platforms.
return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(?<!\\uFE0F)\\uFE0F)?|[\\u{1f1e6}-\\u{1f1ff}]", "v");
} catch (_e) {
// v mode not supported; fall back to matching nothing
return /(?!)/;
}
})();

const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, "i");
const BIGEMOJI_REGEX = (() => {
try {
return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv");
} catch (_e) {
// Fall back, just like for EMOJI_REGEX
return /(?!)/;
}
})();

/*
* Return true if the given string contains emoji
Expand Down Expand Up @@ -266,7 +290,7 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
let key = 0;

for (const data of graphemeSegmenter.segment(message)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (EMOJI_REGEX.test(data.segment)) {
if (text) {
result.push(text);
text = "";
Expand Down
2 changes: 1 addition & 1 deletion src/components/views/rooms/SendMessageComposer.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ limitations under the License.
*/

import React, { createRef, KeyboardEvent, SyntheticEvent } from "react";
import EMOJI_REGEX from "emojibase-regex";
import {
IContent,
MatrixEvent,
Expand Down Expand Up @@ -70,6 +69,7 @@ import { doMaybeLocalRoomAction } from "../../../utils/local-room";
import { Caret } from "../../../editor/caret";
import { IDiff } from "../../../editor/diff";
import { getBlobSafeMimeType } from "../../../utils/blobs";
import { EMOJI_REGEX } from "../../../HtmlUtils";

/**
* Build the mentions information based on the editor model (and any related events):
Expand Down
11 changes: 5 additions & 6 deletions src/editor/parts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,10 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";

import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
import { unicodeToShortcode } from "../HtmlUtils";
import { EMOJI_REGEX, unicodeToShortcode } from "../HtmlUtils";
import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
Expand Down Expand Up @@ -197,7 +196,7 @@ abstract class BasePart {

abstract class PlainBasePart extends BasePart {
protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean {
if (chr === "\n" || EMOJIBASE_REGEX.test(chr)) {
if (chr === "\n" || EMOJI_REGEX.test(chr)) {
return false;
}
// when not pasting or dropping text, reject characters that should start a pill candidate
Expand Down Expand Up @@ -375,7 +374,7 @@ class NewlinePart extends BasePart implements IBasePart {

export class EmojiPart extends BasePart implements IBasePart {
protected acceptsInsertion(chr: string, offset: number): boolean {
return EMOJIBASE_REGEX.test(chr);
return EMOJI_REGEX.test(chr);
}

protected acceptsRemoval(position: number, chr: string): boolean {
Expand Down Expand Up @@ -573,7 +572,7 @@ export class PartCreator {
case "\n":
return new NewlinePart();
default:
if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
if (EMOJI_REGEX.test(getFirstGrapheme(input))) {
return new EmojiPart();
}
return new PlainPart();
Expand Down Expand Up @@ -650,7 +649,7 @@ export class PartCreator {
let plainText = "";

for (const data of graphemeSegmenter.segment(text)) {
if (EMOJIBASE_REGEX.test(data.segment)) {
if (EMOJI_REGEX.test(data.segment)) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";
Expand Down
12 changes: 12 additions & 0 deletions test/HtmlUtils-test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ describe("bodyToHtml", () => {
expect(html).toMatchInlineSnapshot(`"<span class="mx_EventTile_searchHighlight">test</span> foo &lt;b&gt;bar"`);
});

it("generates big emoji for emoji made of multiple characters", () => {
const { asFragment } = render(bodyToHtml({ body: "👨‍👩‍👧‍👦 ↔️ 🇮🇸", msgtype: "m.text" }, [], {}) as ReactElement);

expect(asFragment()).toMatchSnapshot();
});

it("should generate big emoji for an emoji-only reply to a message", () => {
const { asFragment } = render(
bodyToHtml(
Expand All @@ -132,6 +138,12 @@ describe("bodyToHtml", () => {
expect(asFragment()).toMatchSnapshot();
});

it("does not mistake characters in text presentation mode for emoji", () => {
const { asFragment } = render(bodyToHtml({ body: "↔ ❗︎", msgtype: "m.text" }, [], {}) as ReactElement);

expect(asFragment()).toMatchSnapshot();
});

describe("feature_latex_maths", () => {
beforeEach(() => {
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
Expand Down
41 changes: 41 additions & 0 deletions test/__snapshots__/HtmlUtils-test.tsx.snap
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP

exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = `
<DocumentFragment>
<span
class="mx_EventTile_body"
dir="auto"
>
↔ ❗︎
</span>
</DocumentFragment>
`;

exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"<p>hello</p><pre><code>$\\xi$</code></pre><p>world</p>"`;

exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"<p>hello</p><div>world</div>"`;
Expand All @@ -8,6 +19,36 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"<p>hel

exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>ξ</mi></mrow><annotation encoding="application/x-tex">\\xi</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal" style="margin-right:0.04601em;">ξ</span></span></span></span> world"`;

exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = `
<DocumentFragment>
<span
class="mx_EventTile_body mx_EventTile_bigEmoji"
dir="auto"
>
<span
class="mx_Emoji"
title=":man-woman-girl-boy:"
>
👨‍👩‍👧‍👦
</span>

<span
class="mx_Emoji"
title=":left_right_arrow:"
>
↔️
</span>

<span
class="mx_Emoji"
title=":flag-is:"
>
🇮🇸
</span>
</span>
</DocumentFragment>
`;

exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `
<DocumentFragment>
<span
Expand Down
Loading