diff --git a/lib/floki.ex b/lib/floki.ex
index bf76dc8d..264d5bdf 100644
--- a/lib/floki.ex
+++ b/lib/floki.ex
@@ -280,17 +280,13 @@ defmodule Floki do
)
with {:ok, document} <- Floki.parse_document(html) do
- {tree, results} = Finder.find(document, selector)
-
- Enum.map(results, fn html_node -> HTMLTree.to_tuple(tree, html_node) end)
+ Finder.find(document, selector)
end
end
def find(html_tree_as_tuple, selector)
when is_list(html_tree_as_tuple) or is_html_node(html_tree_as_tuple) do
- {tree, results} = Finder.find(html_tree_as_tuple, selector)
-
- Enum.map(results, fn html_node -> HTMLTree.to_tuple(tree, html_node) end)
+ Finder.find(html_tree_as_tuple, selector)
end
@doc """
@@ -385,7 +381,8 @@ defmodule Floki do
({String.t(), html_attributes()} -> {String.t(), html_attributes()} | :delete)
) :: html_tree()
def find_and_update(html_tree, selector, fun) do
- {tree, results} = Finder.find(html_tree, selector)
+ tree = HTMLTree.build(html_tree)
+ results = Finder.find(tree, selector)
operations_with_nodes =
Enum.map(results, fn
diff --git a/lib/floki/finder.ex b/lib/floki/finder.ex
index a0ca4817..c522c5fc 100644
--- a/lib/floki/finder.ex
+++ b/lib/floki/finder.ex
@@ -6,14 +6,16 @@ defmodule Floki.Finder do
alias Floki.{HTMLTree, Selector}
alias HTMLTree.HTMLNode
+ import Floki, only: [is_html_node: 1]
# Find elements inside a HTML tree.
# Second argument can be either a selector string, a selector struct or a list of selector structs.
- @spec find(Floki.html_tree(), Floki.css_selector()) :: {HTMLTree.t(), [HTMLTree.HTMLNode.t()]}
+ @spec find(HTMLTree.t(), Floki.css_selector()) :: [HTMLTree.HTMLNode.t()]
+ @spec find(Floki.html_tree() | Floki.html_node(), Floki.css_selector()) :: [Floki.html_node()]
- def find([], _), do: {%HTMLTree{}, []}
- def find(html_as_string, _) when is_binary(html_as_string), do: {%HTMLTree{}, []}
+ def find([], _), do: []
+ def find(html_as_string, _) when is_binary(html_as_string), do: []
def find(html_tree, selector_as_string) when is_binary(selector_as_string) do
selectors = Selector.Parser.parse(selector_as_string)
@@ -24,18 +26,21 @@ defmodule Floki.Finder do
find(html_tree, [selector])
end
- def find(html_tree, selectors) when is_list(selectors) do
- tree = HTMLTree.build(html_tree)
+ def find(html_tree_as_tuple, selectors)
+ when (is_list(html_tree_as_tuple) or is_html_node(html_tree_as_tuple)) and
+ is_list(selectors) do
+ tree = HTMLTree.build(html_tree_as_tuple)
+ results = find(tree, selectors)
+ Enum.map(results, fn html_node -> HTMLTree.to_tuple(tree, html_node) end)
+ end
+ def find(%HTMLTree{} = tree, selectors) when is_list(selectors) do
node_ids = Enum.reverse(tree.node_ids)
stack = Enum.map(selectors, fn s -> {s, node_ids} end)
- results =
- traverse_with(:cont, tree, [], stack)
- |> Enum.reverse()
- |> Enum.uniq()
-
- {tree, results}
+ traverse_with(:cont, tree, [], stack)
+ |> Enum.reverse()
+ |> Enum.uniq()
end
# The stack serves as accumulator when there is another combinator to traverse.
diff --git a/test/floki_test.exs b/test/floki_test.exs
index 9ff30e28..91168679 100644
--- a/test/floki_test.exs
+++ b/test/floki_test.exs
@@ -5,6 +5,7 @@ defmodule FlokiTest do
require Floki
alias Floki.HTMLParser.{Html5ever, Mochiweb, FastHtml}
+ alias Floki.HTMLTree
@plain_text_tags [
"script",
@@ -557,52 +558,52 @@ defmodule FlokiTest do
# Floki.find/2 - Classes
test "find elements with a given class" do
- assert Floki.find(document!(@html), ".js-cool") == [
- {
- "a",
- [
- {"href", "http://google.com"},
- {"class", "js-google js-cool"}
- ],
- ["Google"]
- },
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), ".js-cool", [
+ {
+ "a",
+ [
+ {"href", "http://google.com"},
+ {"class", "js-google js-cool"}
+ ],
+ ["Google"]
+ },
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
@tag except_parser: FastHtml
test "find elements with a given class and attributes as maps" do
- assert Floki.find(document!(@html, attributes_as_maps: true), ".js-cool") == [
- {
- "a",
- %{
- "href" => "http://google.com",
- "class" => "js-google js-cool"
- },
- ["Google"]
- },
- {
- "a",
- %{"href" => "http://elixir-lang.org", "class" => "js-elixir js-cool"},
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html, attributes_as_maps: true), ".js-cool", [
+ {
+ "a",
+ %{
+ "href" => "http://google.com",
+ "class" => "js-google js-cool"
+ },
+ ["Google"]
+ },
+ {
+ "a",
+ %{"href" => "http://elixir-lang.org", "class" => "js-elixir js-cool"},
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements with two classes combined" do
class_selector = ".js-cool.js-elixir"
- assert Floki.find(document!(@html), class_selector) == [
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), class_selector, [
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements with anormal class spacing" do
@@ -615,13 +616,13 @@ defmodule FlokiTest do
class_selector = ".js-cool.js-elixir"
- assert Floki.find(html, class_selector) == [
- {
- "div",
- [{"class", "js-cool\t\t js-elixir"}],
- []
- }
- ]
+ assert_find(html, class_selector, [
+ {
+ "div",
+ [{"class", "js-cool\t\t js-elixir"}],
+ []
+ }
+ ])
end
test "find elements with a given class in html_without_html_tag" do
@@ -633,42 +634,40 @@ defmodule FlokiTest do
{:ok, html} = Floki.parse_fragment(html_without_html_tag)
- assert Floki.find(html, ".js-cool") == [
- {"h2", [{"class", "js-cool"}], ["One"]}
- ]
+ assert_find(html, ".js-cool", [{"h2", [{"class", "js-cool"}], ["One"]}])
end
test "find element that does not have child node" do
class_selector = ".js-twitter-logo"
- assert Floki.find(document!(@html_with_img), class_selector) == [
- {
- "img",
- [{"src", "http://twitter.com/logo.png"}, {"class", "js-twitter-logo"}],
- []
- }
- ]
+ assert_find(document!(@html_with_img), class_selector, [
+ {
+ "img",
+ [{"src", "http://twitter.com/logo.png"}, {"class", "js-twitter-logo"}],
+ []
+ }
+ ])
end
test "find element that does not close the tag" do
class_selector = ".img-without-closing-tag"
- assert Floki.find(document!(@html_with_img), class_selector) == [
- {
- "img",
- [
- {"src", "http://twitter.com/logo.png"},
- {"class", "img-without-closing-tag"}
- ],
- []
- }
- ]
+ assert_find(document!(@html_with_img), class_selector, [
+ {
+ "img",
+ [
+ {"src", "http://twitter.com/logo.png"},
+ {"class", "img-without-closing-tag"}
+ ],
+ []
+ }
+ ])
end
test "does not find elements" do
class_selector = ".nothing"
- assert Floki.find(document!(@html), class_selector) == []
+ assert_find(document!(@html), class_selector, [])
end
test "find elements with colon in class names" do
@@ -682,18 +681,18 @@ defmodule FlokiTest do
""")
)
- assert Floki.find(html, ".xl\\:flex-row.md\\:space-y-20") == [
- {
- "div",
- [
- {
- "class",
- "m-auto max-w-7xl px-4 pt-12 pb-20 flex flex-col xl:flex-row space-y-16\nmd:space-y-20 xl:space-y-0"
- }
- ],
- []
- }
- ]
+ assert_find(html, ".xl\\:flex-row.md\\:space-y-20", [
+ {
+ "div",
+ [
+ {
+ "class",
+ "m-auto max-w-7xl px-4 pt-12 pb-20 flex flex-col xl:flex-row space-y-16\nmd:space-y-20 xl:space-y-0"
+ }
+ ],
+ []
+ }
+ ])
end
# Floki.find/2 - Tag name
@@ -701,30 +700,30 @@ defmodule FlokiTest do
test "select elements by tag name" do
html = document!(html_body(~s(NameJulius)))
- assert [{"a", [{"href", "profile"}], ["Julius"]}] = Floki.find(html, "a")
+ assert_find(html, "a", [{"a", [{"href", "profile"}], ["Julius"]}])
end
# Floki.find/2 - ID
test "find element by id" do
- assert Floki.find(document!(@html_with_img), "#logo") == [
- {
- "img",
- [{"src", "logo.png"}, {"id", "logo"}],
- []
- }
- ]
+ assert_find(document!(@html_with_img), "#logo", [
+ {
+ "img",
+ [{"src", "logo.png"}, {"id", "logo"}],
+ []
+ }
+ ])
end
@tag only_parser: Mochiweb
test "find element by id when tree has attributes as maps" do
- assert Floki.find(document!(@html_with_img, attributes_as_maps: true), "#logo") == [
- {
- "img",
- %{"src" => "logo.png", "id" => "logo"},
- []
- }
- ]
+ assert_find(document!(@html_with_img, attributes_as_maps: true), "#logo", [
+ {
+ "img",
+ %{"src" => "logo.png", "id" => "logo"},
+ []
+ }
+ ])
end
## Floki.find/2 - Attributes
@@ -732,178 +731,179 @@ defmodule FlokiTest do
test "find elements with a tag and a given attribute value with shorthand syntax" do
attribute_selector = "a[data-action=lolcats]"
- assert Floki.find(document!(@html_with_data_attributes), attribute_selector) == [
- {
- "a",
- [
- {"href", "http://google.com"},
- {"class", "js-google js-cool"},
- {"data-action", "lolcats"}
- ],
- ["Google"]
- }
- ]
+ assert_find(document!(@html_with_data_attributes), attribute_selector, [
+ {
+ "a",
+ [
+ {"href", "http://google.com"},
+ {"class", "js-google js-cool"},
+ {"data-action", "lolcats"}
+ ],
+ ["Google"]
+ }
+ ])
end
@tag only_parser: Mochiweb
test "find elements with a tag and a given attribute value with tree containing attributes as maps" do
attribute_selector = "a[data-action=lolcats]"
- assert Floki.find(
- document!(@html_with_data_attributes, attributes_as_maps: true),
- attribute_selector
- ) == [
- {
- "a",
- %{
- "class" => "js-google js-cool",
- "data-action" => "lolcats",
- "href" => "http://google.com"
- },
- ["Google"]
- }
- ]
+ assert_find(
+ document!(@html_with_data_attributes, attributes_as_maps: true),
+ attribute_selector,
+ [
+ {
+ "a",
+ %{
+ "class" => "js-google js-cool",
+ "data-action" => "lolcats",
+ "href" => "http://google.com"
+ },
+ ["Google"]
+ }
+ ]
+ )
end
test "find elements only by given attribute value with shorthand syntax" do
attribute_selector = "[data-action=lolcats]"
- assert Floki.find(document!(@html_with_data_attributes), attribute_selector) == [
- {
- "a",
- [
- {"href", "http://google.com"},
- {"class", "js-google js-cool"},
- {"data-action", "lolcats"}
- ],
- ["Google"]
- }
- ]
+ assert_find(document!(@html_with_data_attributes), attribute_selector, [
+ {
+ "a",
+ [
+ {"href", "http://google.com"},
+ {"class", "js-google js-cool"},
+ {"data-action", "lolcats"}
+ ],
+ ["Google"]
+ }
+ ])
end
test "find elements by the attribute's |= selector" do
attribute_selector = "a[href|='http://elixir']"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements by the attribute's ^= selector" do
attribute_selector = "a[href^='http://g']"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
- ["Google"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
+ ["Google"]
+ }
+ ])
end
@tag only_parser: Mochiweb
test "find elements by the attribute's ^= selector against a tree with attributes as maps" do
attribute_selector = "a[href^='http://g']"
- assert Floki.find(document!(@html, attributes_as_maps: true), attribute_selector) == [
- {
- "a",
- %{"href" => "http://google.com", "class" => "js-google js-cool"},
- ["Google"]
- }
- ]
+ assert_find(document!(@html, attributes_as_maps: true), attribute_selector, [
+ {
+ "a",
+ %{"href" => "http://google.com", "class" => "js-google js-cool"},
+ ["Google"]
+ }
+ ])
end
test "find elements by the attribute's $= selector" do
attribute_selector = "a[href$='.org']"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements by the attribute's *= selector" do
attribute_selector = "a[class*='google']"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
- ["Google"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
+ ["Google"]
+ }
+ ])
end
test "find elements only by given case-insensitive attribute value" do
attribute_selector = "meta[name='robots' i]"
html = document!(html_body(~s()))
- assert Floki.find(html, attribute_selector) == [
- {
- "meta",
- [
- {"name", "ROBOTS"},
- {"content", "INDEX, FOLLOW, NOIMAGEINDEX"}
- ],
- []
- }
- ]
+ assert_find(html, attribute_selector, [
+ {
+ "meta",
+ [
+ {"name", "ROBOTS"},
+ {"content", "INDEX, FOLLOW, NOIMAGEINDEX"}
+ ],
+ []
+ }
+ ])
end
test "find elements by the attribute's |= selector with case-insensitive flag" do
attribute_selector = "a[href|='HTTP://ELIXIR' i]"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements by the attribute's ^= selector with case-insensitive flag" do
attribute_selector = "a[href^='HTTP://G' i]"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
- ["Google"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
+ ["Google"]
+ }
+ ])
end
test "find elements by the attribute's $= selector with case-insensitive flag" do
attribute_selector = "a[href$='.ORG' i]"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
- ["Elixir lang"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://elixir-lang.org"}, {"class", "js-elixir js-cool"}],
+ ["Elixir lang"]
+ }
+ ])
end
test "find elements by the attribute's *= selector with case-insensitive flag" do
attribute_selector = "a[class*='GOOGLE' i]"
- assert Floki.find(document!(@html), attribute_selector) == [
- {
- "a",
- [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
- ["Google"]
- }
- ]
+ assert_find(document!(@html), attribute_selector, [
+ {
+ "a",
+ [{"href", "http://google.com"}, {"class", "js-google js-cool"}],
+ ["Google"]
+ }
+ ])
end
# Floki.find/2 - Selector with descendant combinator
@@ -917,7 +917,7 @@ defmodule FlokiTest do
}
]
- assert Floki.find(document!(@html_with_img), "a img") == expected
+ assert_find(document!(@html_with_img), "a img", expected)
end
# Floki.find/2 - Selector with child combinator
@@ -939,9 +939,9 @@ defmodule FlokiTest do
}
]
- assert Floki.find(document!(@html_with_img), "div.logo-container > img") == expected
- assert Floki.find(document!(@html_with_img), "body > div.logo-container > img") == expected
- assert Floki.find(document!(@html_with_img), "body > img") == []
+ assert_find(document!(@html_with_img), "div.logo-container > img", expected)
+ assert_find(document!(@html_with_img), "body > div.logo-container > img", expected)
+ assert_find(document!(@html_with_img), "body > img", [])
end
test "find only immediate children elements" do
@@ -962,8 +962,8 @@ defmodule FlokiTest do
""")
)
- assert Floki.find(html, "div > p > img") == []
- assert Floki.find(html, "div > p > span > img") == expected
+ assert_find(html, "div > p > img", [])
+ assert_find(html, "div > p > span > img", expected)
end
test "find a sibling after immediate child chain" do
@@ -989,7 +989,7 @@ defmodule FlokiTest do
""")
)
- assert Floki.find(html, "div > p > span > img + img") == expected
+ assert_find(html, "div > p > span > img + img", expected)
end
# Floki.find/2 - Sibling combinator
@@ -1009,14 +1009,14 @@ defmodule FlokiTest do
]}
]
- assert Floki.find(html, "a + div") == expected
- assert Floki.find(html, "a + .l-c") == expected
+ assert_find(html, "a + div", expected)
+ assert_find(html, "a + .l-c", expected)
- assert Floki.find(html, "a + div #lg") == [
- {"img", [{"src", "l.png"}, {"id", "lg"}], []}
- ]
+ assert_find(html, "a + div #lg", [
+ {"img", [{"src", "l.png"}, {"id", "lg"}], []}
+ ])
- assert Floki.find(html, "a + #lg") == []
+ assert_find(html, "a + #lg", [])
end
# Floki.find/2 - General sibling combinator
@@ -1028,10 +1028,10 @@ defmodule FlokiTest do
{"a", [{"href", "http://java.com"}, {"class", "js-java"}], ["Java"]}
]
- assert Floki.find(document!(@html), "a.js-google ~ a") == expected
- assert Floki.find(document!(@html), "body > div > a.js-google ~ a") == expected
- assert Floki.find(document!(@html), "body > div ~ a") == []
- assert Floki.find(document!(@html), "a.js-java ~ a") == []
+ assert_find(document!(@html), "a.js-google ~ a", expected)
+ assert_find(document!(@html), "body > div > a.js-google ~ a", expected)
+ assert_find(document!(@html), "body > div ~ a", [])
+ assert_find(document!(@html), "a.js-java ~ a", [])
end
# Floki.find/2 - Using groups with comma
@@ -1042,13 +1042,13 @@ defmodule FlokiTest do
{"img", [{"src", "logo.png"}, {"id", "logo"}], []}
]
- assert Floki.find(document!(@html_with_img), ".js-twitter-logo, #logo") == expected
+ assert_find(document!(@html_with_img), ".js-twitter-logo, #logo", expected)
end
test "get one element when search for multiple and just one exist" do
expected = [{"img", [{"src", "logo.png"}, {"id", "logo"}], []}]
- assert Floki.find(document!(@html_with_img), ".js-x-logo, #logo") == expected
+ assert_find(document!(@html_with_img), ".js-x-logo, #logo", expected)
end
# Floki.find/2 - Pseudo-class
@@ -1069,45 +1069,45 @@ defmodule FlokiTest do