From 3720a28276d9457d03fdb933bc06acd146f4f8ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Wed, 30 Mar 2022 19:03:41 +0100 Subject: [PATCH] feat: Add `escapeAttribute` and `escapeText` (#770) --- src/encode.spec.ts | 12 ++++++++++++ src/encode.ts | 37 +++++++++++++++++++++++++++++++++++++ src/index.ts | 16 ++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/src/encode.spec.ts b/src/encode.spec.ts index 60e093c0..a432742b 100644 --- a/src/encode.spec.ts +++ b/src/encode.spec.ts @@ -71,3 +71,15 @@ describe("encodeNonAsciiHTML", () => { "♒️♓️♈️♉️♊️♋️♌️♍️♎️♏️♐️♑️" )); }); + +describe("escape HTML", () => { + it("should escape HTML attribute values", () => + expect(entities.escapeAttribute(' & value \u00a0!')).toBe( + " & value  !" + )); + + it("should escape HTML text", () => + expect(entities.escapeText(' & value \u00a0!')).toBe( + '<a " text > & value  !' + )); +}); diff --git a/src/encode.ts b/src/encode.ts index 28c4aac9..73a3bda2 100644 --- a/src/encode.ts +++ b/src/encode.ts @@ -4,6 +4,9 @@ const htmlReplacer = /[\t\n!-,./:-@[-`\f{-}$\x80-\uFFFF]/g; const xmlReplacer = /["&'<>$\x80-\uFFFF]/g; const xmlInvalidChars = /[&<>'"]/g; +const textReplacer = /[&<>\u00A0]/g; +const attrReplacer = /["&\u00A0]/g; + const xmlCodeMap = new Map([ [34, """], [38, "&"], @@ -12,6 +15,14 @@ const xmlCodeMap = new Map([ [62, ">"], ]); +const htmlEscapeCodeMap = new Map([ + [34, """], + [38, "&"], + [60, "<"], + [62, ">"], + [160, " "], +]); + /** * Encodes all non-ASCII characters, as well as characters not valid in XML * documents using XML entities. @@ -108,3 +119,29 @@ export function escapeUTF8(data: string): string { return result + data.substring(lastIdx); } + +/** + * Encodes all characters that have to be escaped in HTML attributes, + * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. + * + * @param data String to escape. + */ +export function escapeAttribute(data: string): string { + return data.replace( + attrReplacer, + (match) => htmlEscapeCodeMap.get(match.charCodeAt(0))! + ); +} + +/** + * Encodes all characters that have to be escaped in HTML text, + * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. + * + * @param data String to escape. + */ +export function escapeText(data: string): string { + return data.replace( + textReplacer, + (match) => htmlEscapeCodeMap.get(match.charCodeAt(0))! + ); +} diff --git a/src/index.ts b/src/index.ts index 0da27e7f..1978c15e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -4,6 +4,8 @@ import { escapeUTF8, encodeHTML, encodeNonAsciiHTML, + escapeAttribute, + escapeText, } from "./encode"; /** The level of entities to support. */ @@ -39,6 +41,16 @@ export enum EncodingMode { * characters that are not ASCII characters. */ Extensive, + /** + * Encode all characters that have to be escaped in HTML attributes, + * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. + */ + Attribute, + /** + * Encode all characters that have to be escaped in HTML text, + * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. + */ + Text, } export interface DecodingOptions { @@ -136,6 +148,8 @@ export function encode( // Mode `UTF8` just escapes XML entities if (opts.mode === EncodingMode.UTF8) return escapeUTF8(data); + if (opts.mode === EncodingMode.Attribute) return escapeAttribute(data); + if (opts.mode === EncodingMode.Text) return escapeText(data); if (opts.level === EntityLevel.HTML) { if (opts.mode === EncodingMode.ASCII) { @@ -155,6 +169,8 @@ export { encodeNonAsciiHTML, escape, escapeUTF8, + escapeAttribute, + escapeText, // Legacy aliases (deprecated) encodeHTML as encodeHTML4, encodeHTML as encodeHTML5,