From 9c87439d9afa14a365ff13e73adc809cb2c3d97b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 23 Nov 2020 00:47:02 -0500 Subject: [PATCH] feat: XML::Schema and RelaxNG creation accept optional ParseOptions I'm trying out a new pattern, which is that the parsed object carries around the ParseOptions it was created with, which should make some testing a bit easier. I'm also not implementing the "config block" pattern in use for Documents, because I think the UX is weird and I'm hoping to change everything to use kwargs in a 2.0 release, anyway. --- ext/java/nokogiri/XmlRelaxng.java | 11 ++++++-- ext/java/nokogiri/XmlSchema.java | 47 ++++++++++++++++++++++--------- ext/nokogiri/xml_relax_ng.c | 39 +++++++++++++++++-------- ext/nokogiri/xml_schema.c | 46 ++++++++++++++++++++++-------- lib/nokogiri/xml/parse_options.rb | 2 ++ lib/nokogiri/xml/relax_ng.rb | 4 +-- lib/nokogiri/xml/schema.rb | 10 ++++--- test/xml/test_relax_ng.rb | 34 ++++++++++++++++++++++ test/xml/test_schema.rb | 33 ++++++++++++++++++++++ 9 files changed, 182 insertions(+), 44 deletions(-) diff --git a/ext/java/nokogiri/XmlRelaxng.java b/ext/java/nokogiri/XmlRelaxng.java index 4b0d73f4b9a..e57aa83d9f7 100644 --- a/ext/java/nokogiri/XmlRelaxng.java +++ b/ext/java/nokogiri/XmlRelaxng.java @@ -56,6 +56,7 @@ import org.jruby.RubyClass; import org.jruby.anno.JRubyClass; import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; @@ -78,11 +79,17 @@ private void setVerifier(Verifier verifier) { this.verifier = verifier; } - static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) { + static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) { Ruby runtime = context.getRuntime(); XmlRelaxng xmlRelaxng = (XmlRelaxng) NokogiriService.XML_RELAXNG_ALLOCATOR.allocate(runtime, klazz); + + if (parseOptions == null) { + parseOptions = defaultParseOptions(context.getRuntime()); + } + xmlRelaxng.setInstanceVariable("@errors", runtime.newEmptyArray()); - + xmlRelaxng.setInstanceVariable("@parse_options", parseOptions); + try { Schema schema = xmlRelaxng.getSchema(source, context); xmlRelaxng.setVerifier(schema.newVerifier()); diff --git a/ext/java/nokogiri/XmlSchema.java b/ext/java/nokogiri/XmlSchema.java index b030fb84e9b..f84609518b6 100644 --- a/ext/java/nokogiri/XmlSchema.java +++ b/ext/java/nokogiri/XmlSchema.java @@ -106,10 +106,16 @@ private void setValidator(Validator validator) { this.validator = validator; } - static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source) { + static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) { Ruby runtime = context.getRuntime(); XmlSchema xmlSchema = (XmlSchema) NokogiriService.XML_SCHEMA_ALLOCATOR.allocate(runtime, klazz); + + if (parseOptions == null) { + parseOptions = defaultParseOptions(context.getRuntime()); + } + xmlSchema.setInstanceVariable("@errors", runtime.newEmptyArray()); + xmlSchema.setInstanceVariable("@parse_options", parseOptions); try { SchemaErrorHandler error_handler = new SchemaErrorHandler(context.getRuntime(), (RubyArray)xmlSchema.getInstanceVariable("@errors")); @@ -121,14 +127,24 @@ static XmlSchema createSchemaInstance(ThreadContext context, RubyClass klazz, So } } + protected static IRubyObject defaultParseOptions(Ruby runtime) { + return ((RubyClass)runtime.getClassFromPath("Nokogiri::XML::ParseOptions")).getConstant("DEFAULT_SCHEMA"); + } + /* * call-seq: * from_document(doc) * * Create a new Schema from the Nokogiri::XML::Document +doc+ */ - @JRubyMethod(meta=true) - public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject document) { + @JRubyMethod(meta=true, required=1, optional=1) + public static IRubyObject from_document(ThreadContext context, IRubyObject klazz, IRubyObject[] args) { + IRubyObject document = args[0]; + IRubyObject parseOptions = null; + if (args.length > 1) { + parseOptions = args[1]; + } + XmlDocument doc = ((XmlDocument) ((XmlNode) document).document(context)); RubyArray errors = (RubyArray) doc.getInstanceVariable("@errors"); @@ -144,25 +160,30 @@ public static IRubyObject from_document(ThreadContext context, IRubyObject klazz source.setSystemId(uri.convertToString().asJavaString()); } - return getSchema(context, (RubyClass)klazz, source); + return getSchema(context, (RubyClass)klazz, source, parseOptions); } - private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source) { + @JRubyMethod(meta=true, required=1, optional=1) + public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) { + IRubyObject content = args[0]; + IRubyObject parseOptions = null; + if (args.length > 1) { + parseOptions = args[1]; + } + String data = content.convertToString().asJavaString(); + return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions); + } + + private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) { String moduleName = klazz.getName(); if ("Nokogiri::XML::Schema".equals(moduleName)) { - return XmlSchema.createSchemaInstance(context, klazz, source); + return XmlSchema.createSchemaInstance(context, klazz, source, parseOptions); } else if ("Nokogiri::XML::RelaxNG".equals(moduleName)) { - return XmlRelaxng.createSchemaInstance(context, klazz, source); + return XmlRelaxng.createSchemaInstance(context, klazz, source, parseOptions); } return context.getRuntime().getNil(); } - @JRubyMethod(meta=true) - public static IRubyObject read_memory(ThreadContext context, IRubyObject klazz, IRubyObject content) { - String data = content.convertToString().asJavaString(); - return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data))); - } - @JRubyMethod(visibility=Visibility.PRIVATE) public IRubyObject validate_document(ThreadContext context, IRubyObject document) { return validate_document_or_file(context, (XmlDocument)document); diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index cb80bdb056f..9e62ff39108 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document) * * Create a new RelaxNG from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE read_memory(int argc, VALUE *argv, VALUE klass) { - xmlRelaxNGParserCtxtPtr ctx = xmlRelaxNGNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE content; + VALUE parse_options; + xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; - VALUE errors = rb_ary_new(); + VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS @@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content) * * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; xmlDocPtr doc; xmlRelaxNGParserCtxtPtr ctx; xmlRelaxNGPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); Data_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } ctx = xmlRelaxNGNewDocParserCtxt(doc); @@ -143,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -156,7 +173,7 @@ void init_xml_relax_ng() cNokogiriXmlRelaxNG = klass; - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); + rb_define_singleton_method(klass, "read_memory", read_memory, -1); + rb_define_singleton_method(klass, "from_document", from_document, -1); rb_define_private_method(klass, "validate_document", validate_document, 1); } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index 439f7219648..ea7c3d31644 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -93,15 +93,26 @@ static VALUE validate_file(VALUE self, VALUE rb_filename) * * Create a new Schema from the contents of +string+ */ -static VALUE read_memory(VALUE klass, VALUE content) +static VALUE read_memory(int argc, VALUE *argv, VALUE klass) { + VALUE content; + VALUE parse_options; + int parse_options_int; + xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; - xmlSchemaParserCtxtPtr ctx = xmlSchemaNewMemParserCtxt( - (const char *)StringValuePtr(content), - (int)RSTRING_LEN(content) - ); + VALUE errors; VALUE rb_schema; - VALUE errors = rb_ary_new(); + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options); + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); + + ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content)); + + errors = rb_ary_new(); xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher); #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS @@ -109,7 +120,7 @@ static VALUE read_memory(VALUE klass, VALUE content) ctx, Nokogiri_error_array_pusher, (void *)errors - ); + ); #endif schema = xmlSchemaParse(ctx); @@ -129,6 +140,7 @@ static VALUE read_memory(VALUE klass, VALUE content) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; } @@ -164,18 +176,27 @@ static int has_blank_nodes_p(VALUE cache) * * Create a new Schema from the Nokogiri::XML::Document +doc+ */ -static VALUE from_document(VALUE klass, VALUE document) +static VALUE from_document(int argc, VALUE *argv, VALUE klass) { + VALUE document; + VALUE parse_options; + int parse_options_int; xmlDocPtr doc; xmlSchemaParserCtxtPtr ctx; xmlSchemaPtr schema; VALUE errors; VALUE rb_schema; + int scanned_args = 0; + + scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); Data_Get_Struct(document, xmlDoc, doc); + doc = doc->doc; /* In case someone passes us a node. ugh. */ - /* In case someone passes us a node. ugh. */ - doc = doc->doc; + if (scanned_args == 1) { + parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA")); + } + parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0)); if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) { rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous"); @@ -211,6 +232,7 @@ static VALUE from_document(VALUE klass, VALUE document) rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema); rb_iv_set(rb_schema, "@errors", errors); + rb_iv_set(rb_schema, "@parse_options", parse_options); return rb_schema; @@ -226,8 +248,8 @@ void init_xml_schema() cNokogiriXmlSchema = klass; - rb_define_singleton_method(klass, "read_memory", read_memory, 1); - rb_define_singleton_method(klass, "from_document", from_document, 1); + rb_define_singleton_method(klass, "read_memory", read_memory, -1); + rb_define_singleton_method(klass, "from_document", from_document, -1); rb_define_private_method(klass, "validate_document", validate_document, 1); rb_define_private_method(klass, "validate_file", validate_file, 1); diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb index 039afa2dcf9..a266d5ba073 100644 --- a/lib/nokogiri/xml/parse_options.rb +++ b/lib/nokogiri/xml/parse_options.rb @@ -73,6 +73,8 @@ class ParseOptions DEFAULT_XML = RECOVER | NONET # the default options used for parsing HTML documents DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET + # the default options used for parsing XML schemas + DEFAULT_SCHEMA = NONET attr_accessor :options def initialize options = STRICT diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb index 4d9ad65daf0..b1e83efb06e 100644 --- a/lib/nokogiri/xml/relax_ng.rb +++ b/lib/nokogiri/xml/relax_ng.rb @@ -5,8 +5,8 @@ class << self ### # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+. # See Nokogiri::XML::RelaxNG for an example. - def RelaxNG string_or_io - RelaxNG.new(string_or_io) + def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) + RelaxNG.new(string_or_io, options) end end diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb index 60f1b2d36b9..b3719d7d24d 100644 --- a/lib/nokogiri/xml/schema.rb +++ b/lib/nokogiri/xml/schema.rb @@ -5,8 +5,8 @@ class << self ### # Create a new Nokogiri::XML::Schema object using a +string_or_io+ # object. - def Schema string_or_io - Schema.new(string_or_io) + def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) + Schema.new(string_or_io, options) end end @@ -30,12 +30,14 @@ def Schema string_or_io class Schema # Errors while parsing the schema file attr_accessor :errors + # The Nokogiri::XML::ParseOptions used to parse the schema + attr_accessor :parse_options ### # Create a new Nokogiri::XML::Schema object using a +string_or_io+ # object. - def self.new string_or_io - from_document Nokogiri::XML(string_or_io) + def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA + from_document(Nokogiri::XML(string_or_io), options) end ### diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb index 23ede368ac1..02fd91b01c6 100644 --- a/test/xml/test_relax_ng.rb +++ b/test/xml/test_relax_ng.rb @@ -26,6 +26,40 @@ def test_parse_with_io assert_equal 0, xsd.errors.length end + def test_constructor_method_with_parse_options + schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_new_with_parse_options + schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::RelaxNG.new(File.read(ADDRESS_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_from_document_with_parse_options + schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE))) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::RelaxNG.from_document(Nokogiri::XML::Document.parse(File.read(ADDRESS_SCHEMA_FILE)), + Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_read_memory_with_parse_options + schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE), + Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + def test_parse_with_errors xml = File.read(ADDRESS_SCHEMA_FILE).sub(/name="/, 'name=') assert_raises(Nokogiri::XML::SyntaxError) { diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 908c7c18d59..2bd267b9dac 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -109,6 +109,39 @@ def test_new assert_instance_of Nokogiri::XML::Schema, xsd end + def test_schema_method_with_parse_options + schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_schema_new_with_parse_options + schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::Schema.new(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_schema_from_document_with_parse_options + schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE))) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::Schema.from_document(Nokogiri::XML::Document.parse(File.read(PO_SCHEMA_FILE)), + Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + + def test_schema_read_memory_with_parse_options + schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE)) + assert_equal Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA, schema.parse_options + + schema = Nokogiri::XML::Schema.read_memory(File.read(PO_SCHEMA_FILE), Nokogiri::XML::ParseOptions.new.recover) + assert_equal Nokogiri::XML::ParseOptions.new.recover, schema.parse_options + end + def test_parse_with_io xsd = nil File.open(PO_SCHEMA_FILE, "rb") { |f|