From 3720a28276d9457d03fdb933bc06acd146f4f8ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com>
Date: Wed, 30 Mar 2022 19:03:41 +0100
Subject: [PATCH] feat: Add `escapeAttribute` and `escapeText` (#770)
---
src/encode.spec.ts | 12 ++++++++++++
src/encode.ts | 37 +++++++++++++++++++++++++++++++++++++
src/index.ts | 16 ++++++++++++++++
3 files changed, 65 insertions(+)
diff --git a/src/encode.spec.ts b/src/encode.spec.ts
index 60e093c0..a432742b 100644
--- a/src/encode.spec.ts
+++ b/src/encode.spec.ts
@@ -71,3 +71,15 @@ describe("encodeNonAsciiHTML", () => {
"♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️"
));
});
+
+describe("escape HTML", () => {
+ it("should escape HTML attribute values", () =>
+ expect(entities.escapeAttribute(' & value \u00a0!')).toBe(
+ " & value !"
+ ));
+
+ it("should escape HTML text", () =>
+ expect(entities.escapeText(' & value \u00a0!')).toBe(
+ '<a " text > & value !'
+ ));
+});
diff --git a/src/encode.ts b/src/encode.ts
index 28c4aac9..73a3bda2 100644
--- a/src/encode.ts
+++ b/src/encode.ts
@@ -4,6 +4,9 @@ const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g;
const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
const xmlInvalidChars = /[&<>'"]/g;
+const textReplacer = /[&<>\u00A0]/g;
+const attrReplacer = /["&\u00A0]/g;
+
const xmlCodeMap = new Map([
[34, """],
[38, "&"],
@@ -12,6 +15,14 @@ const xmlCodeMap = new Map([
[62, ">"],
]);
+const htmlEscapeCodeMap = new Map([
+ [34, """],
+ [38, "&"],
+ [60, "<"],
+ [62, ">"],
+ [160, " "],
+]);
+
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
@@ -108,3 +119,29 @@ export function escapeUTF8(data: string): string {
return result + data.substring(lastIdx);
}
+
+/**
+ * Encodes all characters that have to be escaped in HTML attributes,
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
+ *
+ * @param data String to escape.
+ */
+export function escapeAttribute(data: string): string {
+ return data.replace(
+ attrReplacer,
+ (match) => htmlEscapeCodeMap.get(match.charCodeAt(0))!
+ );
+}
+
+/**
+ * Encodes all characters that have to be escaped in HTML text,
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
+ *
+ * @param data String to escape.
+ */
+export function escapeText(data: string): string {
+ return data.replace(
+ textReplacer,
+ (match) => htmlEscapeCodeMap.get(match.charCodeAt(0))!
+ );
+}
diff --git a/src/index.ts b/src/index.ts
index 0da27e7f..1978c15e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -4,6 +4,8 @@ import {
escapeUTF8,
encodeHTML,
encodeNonAsciiHTML,
+ escapeAttribute,
+ escapeText,
} from "./encode";
/** The level of entities to support. */
@@ -39,6 +41,16 @@ export enum EncodingMode {
* characters that are not ASCII characters.
*/
Extensive,
+ /**
+ * Encode all characters that have to be escaped in HTML attributes,
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
+ */
+ Attribute,
+ /**
+ * Encode all characters that have to be escaped in HTML text,
+ * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
+ */
+ Text,
}
export interface DecodingOptions {
@@ -136,6 +148,8 @@ export function encode(
// Mode `UTF8` just escapes XML entities
if (opts.mode === EncodingMode.UTF8) return escapeUTF8(data);
+ if (opts.mode === EncodingMode.Attribute) return escapeAttribute(data);
+ if (opts.mode === EncodingMode.Text) return escapeText(data);
if (opts.level === EntityLevel.HTML) {
if (opts.mode === EncodingMode.ASCII) {
@@ -155,6 +169,8 @@ export {
encodeNonAsciiHTML,
escape,
escapeUTF8,
+ escapeAttribute,
+ escapeText,
// Legacy aliases (deprecated)
encodeHTML as encodeHTML4,
encodeHTML as encodeHTML5,