Skip to content

Commit

Permalink
Fix: Set encoding in XML.parse_html explicitly to UTF-8 (crystal-la…
Browse files Browse the repository at this point in the history
  • Loading branch information
straight-shoota authored and Blacksmoke16 committed Dec 11, 2023
1 parent 1ac464d commit 2b8a9c5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
32 changes: 32 additions & 0 deletions spec/std/xml/xml_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,38 @@ describe XML do
assert_prints node.to_xml, %(<p>&lt;foo&gt;</p>)
end

it "parses HTML UTF-8 from memory (#13703)" do
doc = XML.parse_html("<p>České psaní</p>")

node = doc.root.try(&.children.first).should_not be_nil

node.text.should eq "České psaní"
end

it "parses HTML UTF-8 from IO (#13703)" do
doc = XML.parse_html(IO::Memory.new("<p>České psaní</p>"))

node = doc.root.try(&.children.first).should_not be_nil

node.text.should eq "České psaní"
end

it "parses XML UTF-8 from memory (#13703)" do
doc = XML.parse("<p>České psaní</p>")

node = doc.root.try(&.children.first).should_not be_nil

node.text.should eq "České psaní"
end

it "parses XML UTF-8 from IO (#13703)" do
doc = XML.parse(IO::Memory.new("<p>České psaní</p>"))

node = doc.root.try(&.children.first).should_not be_nil

node.text.should eq "České psaní"
end

it "gets empty content" do
doc = XML.parse("<foo/>")
doc.children.first.content.should eq("")
Expand Down
4 changes: 2 additions & 2 deletions src/xml.cr
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module XML
# See `HTMLParserOptions.default` for default options.
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Node
raise XML::Error.new("Document is empty", 0) if string.empty?
from_ptr { LibXML.htmlReadMemory(string, string.bytesize, nil, nil, options) }
from_ptr { LibXML.htmlReadMemory(string, string.bytesize, nil, "utf-8", options) }
end

# Parses an HTML document from *io* with *options* into an `XML::Node`.
Expand All @@ -92,7 +92,7 @@ module XML
->(ctx) { 0 },
Box(IO).box(io),
nil,
nil,
"utf-8",
options,
) }
end
Expand Down

0 comments on commit 2b8a9c5

Please sign in to comment.