-
-
Notifications
You must be signed in to change notification settings - Fork 153
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
defmodule Floki.CSSEscape do | ||
@moduledoc false | ||
|
||
# This is a direct translation of | ||
# https://github.com/mathiasbynens/CSS.escape/blob/master/css.escape.js | ||
# into Elixir. | ||
|
||
@doc """ | ||
Escapes a string for use as a CSS identifier. | ||
## Examples | ||
iex> Floki.CSSEscape.escape("hello world") | ||
"hello\\\\ world" | ||
iex> Floki.CSSEscape.escape("-123") | ||
"-\\\\31 23" | ||
""" | ||
@spec escape(String.t()) :: String.t() | ||
def escape(value) when is_binary(value) do | ||
value | ||
|> String.to_charlist() | ||
|> escape_chars() | ||
|> IO.iodata_to_binary() | ||
end | ||
|
||
def escape(_), do: raise(ArgumentError, "CSS.escape requires a string argument") | ||
|
||
defp escape_chars(chars) do | ||
case chars do | ||
# If the character is the first character and is a `-` (U+002D), and | ||
# there is no second character, […] | ||
[?- | []] -> ["\\-"] | ||
_ -> do_escape_chars(chars, 0, []) | ||
end | ||
end | ||
|
||
defp do_escape_chars([], _, acc), do: Enum.reverse(acc) | ||
|
||
defp do_escape_chars([char | rest], index, acc) do | ||
escaped = | ||
cond do | ||
# If the character is NULL (U+0000), then the REPLACEMENT CHARACTER | ||
# (U+FFFD). | ||
char == 0 -> | ||
<<0xFFFD::utf8>> | ||
|
||
# If the character is in the range [\1-\1F] (U+0001 to U+001F) or is | ||
# U+007F, | ||
# if the character is the first character and is in the range [0-9] | ||
# (U+0030 to U+0039), | ||
# if the character is the second character and is in the range [0-9] | ||
# (U+0030 to U+0039) and the first character is a `-` (U+002D), | ||
char in 0x0001..0x001F or char == 0x007F or | ||
(index == 0 and char in ?0..?9) or | ||
(index == 1 and char in ?0..?9 and hd(acc) == "-") -> | ||
# https://drafts.csswg.org/cssom/#escape-a-character-as-code-point | ||
["\\", Integer.to_string(char, 16), " "] | ||
|
||
# If the character is not handled by one of the above rules and is | ||
# greater than or equal to U+0080, is `-` (U+002D) or `_` (U+005F), or | ||
# is in one of the ranges [0-9] (U+0030 to U+0039), [A-Z] (U+0041 to | ||
# U+005A), or [a-z] (U+0061 to U+007A), […] | ||
char >= 0x0080 or char in [?-, ?_] or char in ?0..?9 or char in ?A..?Z or char in ?a..?z -> | ||
# the character itself | ||
<<char::utf8>> | ||
|
||
true -> | ||
# Otherwise, the escaped character. | ||
# https://drafts.csswg.org/cssom/#escape-a-character | ||
["\\", <<char::utf8>>] | ||
end | ||
|
||
do_escape_chars(rest, index + 1, [escaped | acc]) | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
defmodule Floki.CSSEscapeTest do | ||
use ExUnit.Case, async: true | ||
|
||
doctest Floki.CSSEscape | ||
|
||
test "null character" do | ||
assert Floki.CSSEscape.escape(<<0>>) == <<0xFFFD::utf8>> | ||
assert Floki.CSSEscape.escape("a\u0000") == "a\ufffd" | ||
assert Floki.CSSEscape.escape("\u0000b") == "\ufffdb" | ||
assert Floki.CSSEscape.escape("a\u0000b") == "a\ufffdb" | ||
end | ||
|
||
test "replacement character" do | ||
assert Floki.CSSEscape.escape(<<0xFFFD::utf8>>) == <<0xFFFD::utf8>> | ||
assert Floki.CSSEscape.escape("a\ufffd") == "a\ufffd" | ||
assert Floki.CSSEscape.escape("\ufffdb") == "\ufffdb" | ||
assert Floki.CSSEscape.escape("a\ufffdb") == "a\ufffdb" | ||
end | ||
|
||
test "invalid input" do | ||
assert_raise ArgumentError, fn -> Floki.CSSEscape.escape(nil) end | ||
end | ||
|
||
test "control characters" do | ||
assert Floki.CSSEscape.escape(<<0x01, 0x02, 0x1E, 0x1F>>) == "\\1 \\2 \\1E \\1F " | ||
end | ||
|
||
test "leading digit" do | ||
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do | ||
assert Floki.CSSEscape.escape("#{digit}a") == "\\#{expected} a" | ||
end | ||
end | ||
|
||
test "non-leading digit" do | ||
for digit <- 0..9 do | ||
assert Floki.CSSEscape.escape("a#{digit}b") == "a#{digit}b" | ||
end | ||
end | ||
|
||
test "leading hyphen and digit" do | ||
for {digit, expected} <- Enum.zip(0..9, ~w(30 31 32 33 34 35 36 37 38 39)) do | ||
assert Floki.CSSEscape.escape("-#{digit}a") == "-\\#{expected} a" | ||
end | ||
end | ||
|
||
test "hyphens" do | ||
assert Floki.CSSEscape.escape("-") == "\\-" | ||
assert Floki.CSSEscape.escape("-a") == "-a" | ||
assert Floki.CSSEscape.escape("--") == "--" | ||
assert Floki.CSSEscape.escape("--a") == "--a" | ||
end | ||
|
||
test "non-ASCII and special characters" do | ||
assert Floki.CSSEscape.escape("🤷🏻♂️-_©") == "🤷🏻♂️-_©" | ||
|
||
assert Floki.CSSEscape.escape( | ||
<<0x7F, | ||
"\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f">> | ||
) == | ||
"\\7F \u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008a\u008b\u008c\u008d\u008e\u008f\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009a\u009b\u009c\u009d\u009e\u009f" | ||
|
||
assert Floki.CSSEscape.escape("\u00a0\u00a1\u00a2") == "\u00a0\u00a1\u00a2" | ||
end | ||
|
||
test "alphanumeric characters" do | ||
assert Floki.CSSEscape.escape("a0123456789b") == "a0123456789b" | ||
assert Floki.CSSEscape.escape("abcdefghijklmnopqrstuvwxyz") == "abcdefghijklmnopqrstuvwxyz" | ||
assert Floki.CSSEscape.escape("ABCDEFGHIJKLMNOPQRSTUVWXYZ") == "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | ||
end | ||
|
||
test "space and exclamation mark" do | ||
assert Floki.CSSEscape.escape(<<0x20, 0x21, 0x78, 0x79>>) == "\\ \\!xy" | ||
end | ||
|
||
test "unicode characters" do | ||
# astral symbol (U+1D306 TETRAGRAM FOR CENTRE) | ||
assert Floki.CSSEscape.escape(<<0x1D306::utf8>>) == <<0x1D306::utf8>> | ||
end | ||
end |