diff --git a/.eslintrc.js b/.eslintrc.js
index 0f1335b586d..c6251ef722d 100644
--- a/.eslintrc.js
+++ b/.eslintrc.js
@@ -78,6 +78,11 @@ module.exports = {
name: "matrix-react-sdk/",
message: "Please use matrix-react-sdk/src/index instead",
},
+ {
+ name: "emojibase-regex",
+ message:
+ "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
+ },
],
patterns: [
{
@@ -141,6 +146,11 @@ module.exports = {
],
message: "Please use matrix-js-sdk/src/matrix instead",
},
+ {
+ group: ["emojibase-regex/emoji*"],
+ message:
+ "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.",
+ },
],
},
],
diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx
index d8c154440bd..888c30d76cc 100644
--- a/src/HtmlUtils.tsx
+++ b/src/HtmlUtils.tsx
@@ -20,7 +20,6 @@ limitations under the License.
import React, { LegacyRef, ReactNode } from "react";
import sanitizeHtml from "sanitize-html";
import classNames from "classnames";
-import EMOJIBASE_REGEX from "emojibase-regex";
import katex from "katex";
import { decode } from "html-entities";
import { IContent } from "matrix-js-sdk/src/matrix";
@@ -46,10 +45,35 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
-// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace)
-const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g;
+// (Zero-Width Space, other whitespace)
+const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;
+
+// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
+// emoji presentation VS (U+FE0F), but not those sequences that are followed by
+// a text presentation VS (U+FE0E). We also count lone regional indicators
+// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji
+// followed by U+FE0E when the emoji doesn't have a text variant, but in
+// practice this doesn't matter.
+export const EMOJI_REGEX = (() => {
+ try {
+ // Per our support policy, v mode is available to us, but we still don't
+ // want the app to completely crash on older platforms. We use the
+ // constructor here to avoid a syntax error on such platforms.
+ return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(? {
+ try {
+ return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv");
+ } catch (_e) {
+ // Fall back, just like for EMOJI_REGEX
+ return /(?!)/;
+ }
+})();
/*
* Return true if the given string contains emoji
@@ -266,7 +290,7 @@ export function formatEmojis(message: string | undefined, isHtmlMessage?: boolea
let key = 0;
for (const data of graphemeSegmenter.segment(message)) {
- if (EMOJIBASE_REGEX.test(data.segment)) {
+ if (EMOJI_REGEX.test(data.segment)) {
if (text) {
result.push(text);
text = "";
diff --git a/src/components/views/rooms/SendMessageComposer.tsx b/src/components/views/rooms/SendMessageComposer.tsx
index 0ea0bdf94c1..c5972ee86ac 100644
--- a/src/components/views/rooms/SendMessageComposer.tsx
+++ b/src/components/views/rooms/SendMessageComposer.tsx
@@ -15,7 +15,6 @@ limitations under the License.
*/
import React, { createRef, KeyboardEvent, SyntheticEvent } from "react";
-import EMOJI_REGEX from "emojibase-regex";
import {
IContent,
MatrixEvent,
@@ -70,6 +69,7 @@ import { doMaybeLocalRoomAction } from "../../../utils/local-room";
import { Caret } from "../../../editor/caret";
import { IDiff } from "../../../editor/diff";
import { getBlobSafeMimeType } from "../../../utils/blobs";
+import { EMOJI_REGEX } from "../../../HtmlUtils";
/**
* Build the mentions information based on the editor model (and any related events):
diff --git a/src/editor/parts.ts b/src/editor/parts.ts
index 3f482357d1e..2b732a6dd1f 100644
--- a/src/editor/parts.ts
+++ b/src/editor/parts.ts
@@ -15,11 +15,10 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
-import EMOJIBASE_REGEX from "emojibase-regex";
import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix";
import AutocompleteWrapperModel, { GetAutocompleterComponent, UpdateCallback, UpdateQuery } from "./autocomplete";
-import { unicodeToShortcode } from "../HtmlUtils";
+import { EMOJI_REGEX, unicodeToShortcode } from "../HtmlUtils";
import * as Avatar from "../Avatar";
import defaultDispatcher from "../dispatcher/dispatcher";
import { Action } from "../dispatcher/actions";
@@ -197,7 +196,7 @@ abstract class BasePart {
abstract class PlainBasePart extends BasePart {
protected acceptsInsertion(chr: string, offset: number, inputType: string): boolean {
- if (chr === "\n" || EMOJIBASE_REGEX.test(chr)) {
+ if (chr === "\n" || EMOJI_REGEX.test(chr)) {
return false;
}
// when not pasting or dropping text, reject characters that should start a pill candidate
@@ -375,7 +374,7 @@ class NewlinePart extends BasePart implements IBasePart {
export class EmojiPart extends BasePart implements IBasePart {
protected acceptsInsertion(chr: string, offset: number): boolean {
- return EMOJIBASE_REGEX.test(chr);
+ return EMOJI_REGEX.test(chr);
}
protected acceptsRemoval(position: number, chr: string): boolean {
@@ -573,7 +572,7 @@ export class PartCreator {
case "\n":
return new NewlinePart();
default:
- if (EMOJIBASE_REGEX.test(getFirstGrapheme(input))) {
+ if (EMOJI_REGEX.test(getFirstGrapheme(input))) {
return new EmojiPart();
}
return new PlainPart();
@@ -650,7 +649,7 @@ export class PartCreator {
let plainText = "";
for (const data of graphemeSegmenter.segment(text)) {
- if (EMOJIBASE_REGEX.test(data.segment)) {
+ if (EMOJI_REGEX.test(data.segment)) {
if (plainText) {
parts.push(this.plain(plainText));
plainText = "";
diff --git a/test/HtmlUtils-test.tsx b/test/HtmlUtils-test.tsx
index d9e75faaa99..ae12a717806 100644
--- a/test/HtmlUtils-test.tsx
+++ b/test/HtmlUtils-test.tsx
@@ -107,6 +107,12 @@ describe("bodyToHtml", () => {
expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`);
});
+ it("generates big emoji for emoji made of multiple characters", () => {
+ const { asFragment } = render(bodyToHtml({ body: "๐จโ๐ฉโ๐งโ๐ฆ โ๏ธ ๐ฎ๐ธ", msgtype: "m.text" }, [], {}) as ReactElement);
+
+ expect(asFragment()).toMatchSnapshot();
+ });
+
it("should generate big emoji for an emoji-only reply to a message", () => {
const { asFragment } = render(
bodyToHtml(
@@ -132,6 +138,12 @@ describe("bodyToHtml", () => {
expect(asFragment()).toMatchSnapshot();
});
+ it("does not mistake characters in text presentation mode for emoji", () => {
+ const { asFragment } = render(bodyToHtml({ body: "โ โ๏ธ", msgtype: "m.text" }, [], {}) as ReactElement);
+
+ expect(asFragment()).toMatchSnapshot();
+ });
+
describe("feature_latex_maths", () => {
beforeEach(() => {
jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths");
diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap
index c33cc46433d..c69eaa7d952 100644
--- a/test/__snapshots__/HtmlUtils-test.tsx.snap
+++ b/test/__snapshots__/HtmlUtils-test.tsx.snap
@@ -1,5 +1,16 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
+exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = `
+
hello
$\\xi$
world
"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"hello
hel
exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello world"`;
+exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = `
+