From 29c145b2e8a7e7d61ea81ef0863fe180cbea5ea7 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 29 Nov 2018 12:07:24 +0100 Subject: [PATCH] Implement error handling in Nokogiri parser If a parse error occurred anywhere inside the XML, the Nokogiri parser just returns whatever happens to be on top of the stack. This makes it very hard to troubleshoot the error down the line. To avoid this we need to implement the `error` method and raise an error. This introduces a new exception type Nori::ParseError which we also need to raise in the REXML parser to make it symmetric. --- lib/nori.rb | 1 + lib/nori/parser/nokogiri.rb | 5 +++++ lib/nori/parser/rexml.rb | 6 +++++- spec/nori/nori_spec.rb | 4 ++++ 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/nori.rb b/lib/nori.rb index ba55215..2d70e6c 100644 --- a/lib/nori.rb +++ b/lib/nori.rb @@ -3,6 +3,7 @@ require "nori/xml_utility_node" class Nori + class ParseError < StandardError; end def self.hash_key(name, options = {}) name = name.tr("-", "_") if options[:convert_dashes_to_underscores] diff --git a/lib/nori/parser/nokogiri.rb b/lib/nori/parser/nokogiri.rb index af45ecb..533de70 100644 --- a/lib/nori/parser/nokogiri.rb +++ b/lib/nori/parser/nokogiri.rb @@ -10,6 +10,7 @@ module Nokogiri class Document < ::Nokogiri::XML::SAX::Document attr_accessor :options + attr_accessor :last_error def stack @stack ||= [] @@ -44,6 +45,9 @@ def characters(string) alias cdata_block characters + def error(message) + @last_error = message + end end def self.parse(xml, options) @@ -51,6 +55,7 @@ def self.parse(xml, options) document.options = options parser = ::Nokogiri::XML::SAX::Parser.new document parser.parse xml + raise ParseError, document.last_error if document.last_error document.stack.length > 0 ? document.stack.pop.to_hash : {} end diff --git a/lib/nori/parser/rexml.rb b/lib/nori/parser/rexml.rb index 0cada7f..efc1fe6 100644 --- a/lib/nori/parser/rexml.rb +++ b/lib/nori/parser/rexml.rb @@ -15,7 +15,11 @@ def self.parse(xml, options) parser = ::REXML::Parsers::BaseParser.new(xml) while true - raw_data = parser.pull + begin + raw_data = parser.pull + rescue ::REXML::ParseException => error + raise Nori::ParseError, error.message + end event = unnormalize(raw_data) case event[0] when :end_document diff --git a/spec/nori/nori_spec.rb b/spec/nori/nori_spec.rb index e673722..d11a6f4 100644 --- a/spec/nori/nori_spec.rb +++ b/spec/nori/nori_spec.rb @@ -640,6 +640,10 @@ expect(parse(' ')).to eq({}) end + it "raises error on missing end tag" do + expect { parse('foo bar') }.to raise_error(Nori::ParseError) + end + end end