diff --git a/spec/std/xml/xml_spec.cr b/spec/std/xml/xml_spec.cr index efbb79e6e226..6ad874fb290a 100644 --- a/spec/std/xml/xml_spec.cr +++ b/spec/std/xml/xml_spec.cr @@ -419,6 +419,38 @@ describe XML do assert_prints node.to_xml, %(

<foo>

) end + it "parses HTML UTF-8 from memory (#13703)" do + doc = XML.parse_html("

České psaní

") + + node = doc.root.try(&.children.first).should_not be_nil + + node.text.should eq "České psaní" + end + + it "parses HTML UTF-8 from IO (#13703)" do + doc = XML.parse_html(IO::Memory.new("

České psaní

")) + + node = doc.root.try(&.children.first).should_not be_nil + + node.text.should eq "České psaní" + end + + it "parses XML UTF-8 from memory (#13703)" do + doc = XML.parse("

České psaní

") + + node = doc.root.try(&.children.first).should_not be_nil + + node.text.should eq "České psaní" + end + + it "parses XML UTF-8 from IO (#13703)" do + doc = XML.parse(IO::Memory.new("

České psaní

")) + + node = doc.root.try(&.children.first).should_not be_nil + + node.text.should eq "České psaní" + end + it "gets empty content" do doc = XML.parse("") doc.children.first.content.should eq("") diff --git a/src/xml.cr b/src/xml.cr index 05a1e3d41891..e0529be130f3 100644 --- a/src/xml.cr +++ b/src/xml.cr @@ -78,7 +78,7 @@ module XML # See `HTMLParserOptions.default` for default options. def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Node raise XML::Error.new("Document is empty", 0) if string.empty? - from_ptr { LibXML.htmlReadMemory(string, string.bytesize, nil, nil, options) } + from_ptr { LibXML.htmlReadMemory(string, string.bytesize, nil, "utf-8", options) } end # Parses an HTML document from *io* with *options* into an `XML::Node`. @@ -92,7 +92,7 @@ module XML ->(ctx) { 0 }, Box(IO).box(io), nil, - nil, + "utf-8", options, ) } end