From ac137073162af097fc650bfd1aba8c6468eea2a4 Mon Sep 17 00:00:00 2001 From: Philip Sampaio Date: Fri, 2 Jun 2023 16:54:18 -0300 Subject: [PATCH] Fix class selector with colon (#464) This is related to the following: https://github.com/philss/floki/pull/458 https://github.com/philss/floki/issues/411 I decided to push the "cleaning" to the lexer, but I think for more complex escaping rules, we may need to push back to Elixir. --- lib/floki/selector/parser.ex | 2 +- src/floki_selector_lexer.xrl | 14 ++++++++++---- test/floki/selector/parser_test.exs | 8 +++++++- test/floki_test.exs | 25 +++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/lib/floki/selector/parser.ex b/lib/floki/selector/parser.ex index a4f4852d..134f965f 100644 --- a/lib/floki/selector/parser.ex +++ b/lib/floki/selector/parser.ex @@ -54,7 +54,7 @@ defmodule Floki.Selector.Parser do end defp do_parse([{:hash, _, id} | t], selector) do - do_parse(t, %{selector | id: to_string(id) |> String.replace("\\.", ".")}) + do_parse(t, %{selector | id: to_string(id)}) end defp do_parse([{:class, _, class} | t], selector) do diff --git a/src/floki_selector_lexer.xrl b/src/floki_selector_lexer.xrl index f402b201..7b6c7bd2 100644 --- a/src/floki_selector_lexer.xrl +++ b/src/floki_selector_lexer.xrl @@ -1,6 +1,7 @@ Definitions. -IDENTIFIER = [-A-Za-z0-9_]+(\\\.[-A-Za-z0-9_]+)* +ESCAPED = \\[:.] +IDENTIFIER = [-A-Za-z0-9_]+(({ESCAPED})?[-A-Za-z0-9_]+)* QUOTED = (\"[^"]*\"|\'[^']*\') PARENTESIS = \([^)]*\) INT = [0-9]+ @@ -18,9 +19,8 @@ Rules. {QUOTED} : {token, {quoted, TokenLine, remove_wrapper(TokenChars)}}. {ATTRIBUTE_IDENTIFIER} : {token, {attribute_identifier, TokenLine, TokenChars}}. {SYMBOL} : {token, {TokenChars, TokenLine}}. -#{IDENTIFIER} : {token, {hash, TokenLine, tail(TokenChars)}}. -\.{IDENTIFIER} : {token, {class, TokenLine, tail(TokenChars)}}. -\.{IDENTIFIER}\\:{IDENTIFIER} : {token, {class, TokenLine, tail(TokenChars)}}. +#{IDENTIFIER} : {token, {hash, TokenLine, unescape_inside_id_name(tail(TokenChars))}}. +\.{IDENTIFIER} : {token, {class, TokenLine, unescape_inside_class_name(tail(TokenChars))}}. \:{NOT}\( : {token, {pseudo_not, TokenLine}}. \:{IDENTIFIER} : {token, {pseudo, TokenLine, tail(TokenChars)}}. \({INT}\) : {token, {pseudo_class_int, TokenLine, list_to_integer(remove_wrapper(TokenChars))}}. @@ -51,3 +51,9 @@ remove_wrapper(Chars) -> tail([_|T]) -> T. + +unescape_inside_class_name(Chars) -> + lists:flatten(string:replace(Chars, "\\:", ":", all)). + +unescape_inside_id_name(Chars) -> + lists:flatten(string:replace(Chars, "\\.", ".", all)). diff --git a/test/floki/selector/parser_test.exs b/test/floki/selector/parser_test.exs index 1b9ef255..b275d1da 100644 --- a/test/floki/selector/parser_test.exs +++ b/test/floki/selector/parser_test.exs @@ -26,7 +26,13 @@ defmodule Floki.Selector.ParserTest do tokens = tokenize("a.xs\\:red-500") assert Parser.parse(tokens) == [ - %Selector{type: "a", classes: ["xs\\:red-500"], pseudo_classes: []} + %Selector{type: "a", classes: ["xs:red-500"], pseudo_classes: []} + ] + + tokens = tokenize("a.xs\\:red-500\\:big") + + assert Parser.parse(tokens) == [ + %Selector{type: "a", classes: ["xs:red-500:big"], pseudo_classes: []} ] end diff --git a/test/floki_test.exs b/test/floki_test.exs index f0f131cc..44985a12 100644 --- a/test/floki_test.exs +++ b/test/floki_test.exs @@ -630,6 +630,31 @@ defmodule FlokiTest do assert Floki.find(document!(@html), class_selector) == [] end + test "find elements with colon in class names" do + html = + document!( + html_body(""" +
+
+
+ """) + ) + + assert Floki.find(html, ".xl\\:flex-row.md\\:space-y-20") == [ + { + "div", + [ + { + "class", + "m-auto max-w-7xl px-4 pt-12 pb-20 flex flex-col xl:flex-row space-y-16\nmd:space-y-20 xl:space-y-0" + } + ], + [] + } + ] + end + # Floki.find/2 - Tag name test "select elements by tag name" do