Skip to content

Commit

Permalink
tidy: update html4/sax/test_parser.rb to use minitest::spec (#3285)
Browse files Browse the repository at this point in the history
**What problem is this PR intended to solve?**

I'm about to do some work on the sax parser encoding functionality, so
let's clean this file up.
  • Loading branch information
flavorjones authored Jul 7, 2024
2 parents f582d6b + e145889 commit 3062700
Showing 1 changed file with 55 additions and 58 deletions.
113 changes: 55 additions & 58 deletions test/html4/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,80 +4,77 @@
require "helper"

module Nokogiri
module HTML
module SAX
class TestParser < Nokogiri::SAX::TestCase
def setup
super
@parser = Nokogiri::HTML4::SAX::Parser.new(Doc.new)
end
module SAX
class TestCase
describe Nokogiri::HTML4::SAX::Parser do
let(:parser) { Nokogiri::HTML4::SAX::Parser.new(Doc.new) }

def test_parse_empty_document
it "parse_empty_document" do
# This caused a segfault in libxml 2.6.x
assert_nil(@parser.parse(""))
assert_nil(parser.parse(""))
end

def test_parse_empty_file
it "parse_empty_file" do
# Make sure empty files don't break stuff
empty_file_name = File.join(ASSETS_DIR, "bogus.xml")

refute_raises do
@parser.parse_file(empty_file_name)
parser.parse_file(empty_file_name)
end
end

def test_parse_file
@parser.parse_file(HTML_FILE)
it "parse_file" do
parser.parse_file(HTML_FILE)

# Take a look at the comment in test_parse_document to know
# a possible reason to this difference.
if Nokogiri.uses_libxml?
assert_equal(1111, @parser.document.end_elements.length)
assert_equal(1111, parser.document.end_elements.length)
else
assert_equal(1120, @parser.document.end_elements.length)
assert_equal(1120, parser.document.end_elements.length)
end
end

def test_parse_file_nil_argument
it "parse_file_nil_argument" do
assert_raises(ArgumentError) do
@parser.parse_file(nil)
parser.parse_file(nil)
end
end

def test_parse_file_non_existent
it "parse_file_non_existent" do
assert_raises(Errno::ENOENT) do
@parser.parse_file("there_is_no_reasonable_way_this_file_exists")
parser.parse_file("there_is_no_reasonable_way_this_file_exists")
end
end

def test_parse_file_with_dir
it "parse_file_with_dir" do
assert_raises(Errno::EISDIR) do
@parser.parse_file(File.dirname(__FILE__))
parser.parse_file(File.dirname(__FILE__))
end
end

def test_parse_memory_nil
it "parse_memory_nil" do
assert_raises(TypeError) do
@parser.parse_memory(nil)
parser.parse_memory(nil)
end
end

def test_parse_force_encoding
@parser.parse_memory(<<-HTML, "UTF-8")
it "parse_force_encoding" do
parser.parse_memory(<<-HTML, "UTF-8")
<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
Информация
HTML
assert_equal(
"Информация",
@parser.document.data.join.strip,
parser.document.data.join.strip,
)
end

def test_parse_document
@parser.parse_memory(<<-eoxml)
it "parse_document" do
parser.parse_memory(<<~HTML)
<p>Paragraph 1</p>
<p>Paragraph 2</p>
eoxml
HTML

# JRuby version is different because of the internal implementation
# JRuby version uses NekoHTML which inserts empty "head" elements.
Expand All @@ -88,17 +85,17 @@ def test_parse_document
if Nokogiri.uses_libxml?
assert_equal(
[["html", []], ["body", []], ["p", []], ["p", []]],
@parser.document.start_elements,
parser.document.start_elements,
)
else
assert_equal(
[["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
@parser.document.start_elements,
parser.document.start_elements,
)
end
end

def test_parser_attributes
it "parser_attributes" do
html = <<~eohtml
<html>
<head>
Expand All @@ -112,7 +109,7 @@ def test_parser_attributes
eohtml

block_called = false
@parser.parse(html) do |ctx|
parser.parse(html) do |ctx|
block_called = true
ctx.replace_entities = true
end
Expand All @@ -136,40 +133,40 @@ def test_parser_attributes
["size", "2"],
],],
],
@parser.document.start_elements,
parser.document.start_elements,
)
end

HTML_WITH_BR_TAG = <<-EOF
<html>
<head></head>
<body>
<div>
hello
<br>
</div>
<div>
hello again
</div>
</body>
</html>
EOF

def test_parsing_dom_error_from_string
@parser.parse(HTML_WITH_BR_TAG)
assert_equal(6, @parser.document.start_elements.length)
let(:html_with_br_tag) { <<~HTML }
<html>
<head></head>
<body>
<div>
hello
<br>
</div>
<div>
hello again
</div>
</body>
</html>
HTML

it "parsing_dom_error_from_string" do
parser.parse(html_with_br_tag)
assert_equal(6, parser.document.start_elements.length)
end

def test_parsing_dom_error_from_io
@parser.parse(StringIO.new(HTML_WITH_BR_TAG))
assert_equal(6, @parser.document.start_elements.length)
it "parsing_dom_error_from_io" do
parser.parse(StringIO.new(html_with_br_tag))
assert_equal(6, parser.document.start_elements.length)
end

def test_empty_processing_instruction
it "empty_processing_instruction" do
# https://github.com/sparklemotion/nokogiri/issues/845
refute_raises do
@parser.parse_memory("<strong>this will segfault<?strong>")
parser.parse_memory("<strong>this will segfault<?strong>")
end
end

Expand Down

0 comments on commit 3062700

Please sign in to comment.