diff --git a/CHANGELOG.md b/CHANGELOG.md index 222aec55782..b6b68ad30c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Improved * Documentation has been improved for `CSS.xpath_for`. [#3224] @flavorjones +* `XML::Schema#read_memory` and `XML::RelaxNG#read_memory` are now Ruby methods that call `#from_document`. Previously these were native functions, but they were buggy on both CRuby and JRuby (but worse on JRuby) and so this is now useful, comparable in performance, and simpler code that is easier to maintain. [#2113, #2115] @flavorjones * [CRuby] When compiling packaged libraries from source, allow users' `AR` and `LD` environment variables to set the archiver and linker commands, respectively. This augments the existing `CC` environment variable to set the compiler command. [#3165] @ziggythehamster * [CRuby] The HTML5 parse methods accept a `:parse_noscript_content_as_text` keyword argument which will emulate the parsing behavior of a browser which has scripting enabled. [#3178, #3231] @stevecheckoway * [CRuby] `HTML5::DocumentFragment.parse` and `.new` accept a `:context` keyword argument that is the parse context node or element name. Previously this could only be passed in as a positional argument to `.new` and not at all to `.parse`. @flavorjones diff --git a/ext/java/nokogiri/XmlSchema.java b/ext/java/nokogiri/XmlSchema.java index 2a58d31a498..be7471c394d 100644 --- a/ext/java/nokogiri/XmlSchema.java +++ b/ext/java/nokogiri/XmlSchema.java @@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject return getSchema(context, (RubyClass)klazz, source, parseOptions); } - @JRubyMethod(meta = true, required = 1, optional = 1) - public static IRubyObject - read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) - { - IRubyObject content = args[0]; - IRubyObject parseOptions = null; - if (args.length > 1) { - parseOptions = args[1]; - } - String data = content.convertToString().asJavaString(); - return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions); - } - private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) { diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index 4cf74f7cd13..40670ef0bb3 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -17,12 +17,6 @@ static const rb_data_type_t xml_relax_ng_type = { .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED, }; -/* - * call-seq: - * validate_document(document) - * - * Validate a Nokogiri::XML::Document against this RelaxNG schema. - */ static VALUE noko_xml_relax_ng__validate_document(VALUE self, VALUE document) { @@ -107,36 +101,23 @@ xml_relax_ng_parse_schema( } /* - * call-seq: - * read_memory(string) + * :call-seq: + * from_document(document) → Nokogiri::XML::RelaxNG + * from_document(document, parse_options) → Nokogiri::XML::RelaxNG * - * Create a new RelaxNG from the contents of +string+ - */ -static VALUE -read_memory(int argc, VALUE *argv, VALUE rb_class) -{ - VALUE rb_content; - VALUE rb_parse_options; - xmlRelaxNGParserCtxtPtr c_parser_context; - - rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options); - - c_parser_context = xmlRelaxNGNewMemParserCtxt( - (const char *)StringValuePtr(rb_content), - (int)RSTRING_LEN(rb_content) - ); - - return xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options); -} - -/* - * call-seq: - * from_document(doc) + * Create a Schema from an already-parsed RELAX NG schema definition document. + * + * [Parameters] + * - +document+ (XML::Document) A XML::Document object representing the parsed RELAX NG + * - +parse_options+ (Nokogiri::XML::ParseOptions) ⚠ Unused + * + * [Returns] Nokogiri::XML::RelaxNG * - * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+ + * ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for + * future functionality. */ static VALUE -from_document(int argc, VALUE *argv, VALUE rb_class) +noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class) { VALUE rb_document; VALUE rb_parse_options; @@ -159,8 +140,7 @@ noko_init_xml_relax_ng(void) assert(cNokogiriXmlSchema); cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1); - rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1); + rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1); rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1); } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index 36f423c64ed..6a085d703ad 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -18,7 +18,7 @@ static const rb_data_type_t xml_schema_type = { }; static VALUE -validate_document(VALUE self, VALUE document) +noko_xml_schema__validate_document(VALUE self, VALUE document) { xmlDocPtr doc; xmlSchemaPtr schema; @@ -50,14 +50,8 @@ validate_document(VALUE self, VALUE document) return errors; } -/* - * call-seq: - * validate_file(filename) - * - * Validate a file against this Schema. - */ static VALUE -validate_file(VALUE self, VALUE rb_filename) +noko_xml_schema__validate_file(VALUE self, VALUE rb_filename) { xmlSchemaPtr schema; xmlSchemaValidCtxtPtr valid_ctxt; @@ -96,7 +90,7 @@ xml_schema_parse_schema( VALUE rb_parse_options ) { - xmlExternalEntityLoader old_loader = 0; + xmlExternalEntityLoader saved_loader = 0; libxmlStructuredErrorHandlerState handler_state; if (NIL_P(rb_parse_options)) { @@ -117,14 +111,14 @@ xml_schema_parse_schema( ); if (c_parse_options & XML_PARSE_NONET) { - old_loader = xmlGetExternalEntityLoader(); + saved_loader = xmlGetExternalEntityLoader(); xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader); } xmlSchemaPtr c_schema = xmlSchemaParse(c_parser_context); - if (old_loader) { - xmlSetExternalEntityLoader(old_loader); + if (saved_loader) { + xmlSetExternalEntityLoader(saved_loader); } xmlSchemaFreeParserCtxt(c_parser_context); @@ -147,46 +141,21 @@ xml_schema_parse_schema( } /* - * call-seq: - * read_memory(string) → Nokogiri::XML::Schema - * - * Create a new schema parsed from the contents of +string+ - * - * [Parameters] - * - +string+: String containing XML to be parsed as a schema - * - * [Returns] Nokogiri::XML::Schema - */ -static VALUE -read_memory(int argc, VALUE *argv, VALUE rb_class) -{ - VALUE rb_content; - VALUE rb_parse_options; - xmlSchemaParserCtxtPtr c_parser_context; - - rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options); - - c_parser_context = xmlSchemaNewMemParserCtxt( - (const char *)StringValuePtr(rb_content), - (int)RSTRING_LEN(rb_content) - ); - - return xml_schema_parse_schema(rb_class, c_parser_context, rb_parse_options); -} - -/* - * call-seq: + * :call-seq: * from_document(document) → Nokogiri::XML::Schema + * from_document(document, parse_options) → Nokogiri::XML::Schema * - * Create a new schema parsed from the +document+. + * Create a Schema from an already-parsed XSD schema definition document. * * [Parameters] - * - +document+: Nokogiri::XML::Document to be parsed + * - +document+ (XML::Document) A document object representing the parsed XSD + * - +parse_options+ (Nokogiri::XML::ParseOptions) + * Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA * * [Returns] Nokogiri::XML::Schema */ static VALUE -rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE rb_class) +noko_xml_schema_s_from_document(int argc, VALUE *argv, VALUE rb_class) { VALUE rb_document; VALUE rb_parse_options; @@ -236,9 +205,8 @@ noko_init_xml_schema(void) rb_undef_alloc_func(cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1); - rb_define_singleton_method(cNokogiriXmlSchema, "from_document", rb_xml_schema_s_from_document, -1); + rb_define_singleton_method(cNokogiriXmlSchema, "from_document", noko_xml_schema_s_from_document, -1); - rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1); - rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_document", noko_xml_schema__validate_document, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_file", noko_xml_schema__validate_file, 1); } diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb index 6d01ba01e5b..dbe6c0e24e3 100644 --- a/lib/nokogiri/xml/relax_ng.rb +++ b/lib/nokogiri/xml/relax_ng.rb @@ -3,36 +3,83 @@ module Nokogiri module XML class << self - ### - # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+. - # See Nokogiri::XML::RelaxNG for an example. - def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) - RelaxNG.new(string_or_io, options) + # :call-seq: + # RelaxNg(input) → Nokogiri::XML::RelaxNG + # RelaxNg(input, parse_options) → Nokogiri::XML::RelaxNG + # + # Parse a RELAX NG schema definition and create a new Schema object. This is a convenience + # method for Nokogiri::XML::RelaxNG.new + # + # See related: Nokogiri::XML::RelaxNG.new + # + # [Parameters] + # - +input+ (String, IO) RELAX NG schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to ParseOptions::DEFAULT_SCHEMA + # + # [Returns] Nokogiri::XML::RelaxNG + # + def RelaxNG(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + RelaxNG.new(input, parse_options) end end - ### - # Nokogiri::XML::RelaxNG is used for validating XML against a - # RelaxNG schema. + # Nokogiri::XML::RelaxNG is used for validating XML against a RELAX NG schema definition. # - # == Synopsis + # *Example:* Determine whether an XML document is valid. # - # Validate an XML document against a RelaxNG schema. Loop over the errors - # that are returned and print them out: + # schema = Nokogiri::XML::RelaxNG(File.read(RELAX_NG_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # schema.valid?(doc) # Boolean # - # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE)) - # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE)) + # *Example:* Validate an XML document against a RelaxNG schema, and capture any errors that are found. # - # schema.validate(doc).each do |error| - # puts error.message - # end + # schema = Nokogiri::XML::RelaxNG(File.open(RELAX_NG_FILE)) + # doc = Nokogiri::XML(File.open(XML_FILE)) + # errors = schema.validate(doc) # Array # - # The list of errors are Nokogiri::XML::SyntaxError objects. - # - # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the - # underlying parsing libraries to access network resources. This is counter to Nokogiri's - # "untrusted by default" security policy, but is a limitation of the underlying libraries. + # ⚠ RELAX NG input is always treated as *trusted*, meaning that the underlying parsing libraries + # *will access network resources*. This is counter to Nokogiri's "untrusted by default" security + # policy, but is an unfortunate limitation of the underlying libraries. Please do not use this + # class for untrusted schema documents. class RelaxNG < Nokogiri::XML::Schema + # :call-seq: + # new(input) → Nokogiri::XML::RelaxNG + # new(input, parse_options) → Nokogiri::XML::RelaxNG + # + # Parse a RELAX NG schema definition and create a new Schema object. + # + # [Parameters] + # - +input+ (String, IO) RELAX NG schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused + # + # [Returns] Nokogiri::XML::RelaxNG + # + # ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for + # future functionality. + def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + read_memory(input, parse_options) + end + + # :call-seq: + # read_memory(input) → Nokogiri::XML::RelaxNG + # read_memory(input, parse_options) → Nokogiri::XML::RelaxNG + # + # Parse a RELAX NG schema definition and create a new Schema object. + # + # [Parameters] + # - +input+ (String) RELAX NG schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused + # + # [Returns] Nokogiri::XML::RelaxNG + # + # ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for + # future functionality. + def self.read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML::Document.parse(input), parse_options) + end end end end diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb index 497fc51532e..db3419012bf 100644 --- a/lib/nokogiri/xml/schema.rb +++ b/lib/nokogiri/xml/schema.rb @@ -3,70 +3,149 @@ module Nokogiri module XML class << self - ### - # Create a new Nokogiri::XML::Schema object using a +string_or_io+ - # object. - def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) - Schema.new(string_or_io, options) + # :call-seq: + # Schema(input) → Nokogiri::XML::Schema + # Schema(input, parse_options) → Nokogiri::XML::Schema + # + # Parse an XSD schema definition and create a new {Schema} object. This is a convenience + # method for Nokogiri::XML::Schema.new + # + # See related: Nokogiri::XML::Schema.new + # + # [Parameters] + # - +input+ (String, IO) XSD schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # [Returns] Nokogiri::XML::Schema + # + def Schema(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + Schema.new(input, parse_options) end end - ### - # Nokogiri::XML::Schema is used for validating XML against a schema - # (usually from an xsd file). + # Nokogiri::XML::Schema is used for validating XML against an XSD schema definition. # - # == Synopsis + # *Example:* Determine whether an XML document is valid. # - # Validate an XML document against a Schema. Loop over the errors that - # are returned and print them out: + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # schema.valid?(doc) # Boolean # - # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) - # doc = Nokogiri::XML(File.read(PO_XML_FILE)) + # *Example:* Validate an XML document against a Schema, and capture any errors that are found. # - # xsd.validate(doc).each do |error| - # puts error.message - # end + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # errors = schema.validate(doc) # Array # - # The list of errors are Nokogiri::XML::SyntaxError objects. + # ⚠ As of v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are + # not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the + # caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity + # resolution over a network connection. # - # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities - # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated - # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default" - # security policy. If a document is trusted, then the caller may turn off the NONET option via - # the ParseOptions to re-enable external entity resolution over a network connection. + # Previously, documents were "trusted" by default during schema parsing which was counter to + # Nokogiri's "untrusted by default" security policy. class Schema - # Errors while parsing the schema file + # The errors found while parsing the XSD + # + # [Returns] Array attr_accessor :errors - # The Nokogiri::XML::ParseOptions used to parse the schema + + # The options used to parse the schema + # + # [Returns] Nokogiri::XML::ParseOptions attr_accessor :parse_options - ### - # Create a new Nokogiri::XML::Schema object using a +string_or_io+ - # object. - def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) - from_document(Nokogiri::XML(string_or_io), options) + # :call-seq: + # new(input) → Nokogiri::XML::Schema + # new(input, parse_options) → Nokogiri::XML::Schema + # + # Parse an XSD schema definition and create a new Nokogiri::XML:Schema object. + # + # [Parameters] + # - +input+ (String, IO) XSD schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA + # + # [Returns] Nokogiri::XML::Schema + # + def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + read_memory(input, parse_options) + end + + # :call-seq: + # read_memory(input) → Nokogiri::XML::Schema + # read_memory(input, parse_options) → Nokogiri::XML::Schema + # + # Parse an XSD schema definition and create a new Schema object. + # + # 💡 Note that the limitation of this method relative to Schema.new is that +input+ must be type + # String, whereas Schema.new also supports IO types. + # + # [parameters] + # - +input+ (String) XSD schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA + # + # [Returns] Nokogiri::XML::Schema + def self.read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML::Document.parse(input), parse_options) end - ### - # Validate +thing+ against this schema. +thing+ can be a - # Nokogiri::XML::Document object, or a filename. An Array of - # Nokogiri::XML::SyntaxError objects found while validating the - # +thing+ is returned. - def validate(thing) - if thing.is_a?(Nokogiri::XML::Document) - validate_document(thing) - elsif File.file?(thing) - validate_file(thing) + # + # :call-seq: validate(input) → Array + # + # Validate +input+ and return any errors that are found. + # + # [Parameters] + # - +input+ (Nokogiri::XML::Document, String) + # + # A parsed document, or a string containing a local filename. + # + # [Returns] Array + # + # *Example:* Validate an existing Document +document+, and capture any errors that are found. + # + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # errors = schema.validate(document) + # + # *Example:* Validate an XML document on disk, and capture any errors that are found. + # + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # errors = schema.validate("/path/to/file.xml") + # + def validate(input) + if input.is_a?(Nokogiri::XML::Document) + validate_document(input) + elsif File.file?(input) + validate_file(input) else - raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file" + raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file" end end - ### - # Returns true if +thing+ is a valid Nokogiri::XML::Document or - # file. - def valid?(thing) - validate(thing).empty? + # + # :call-seq: valid?(input) → Boolean + # + # Validate +input+ and return a Boolean indicating whether the document is valid + # + # [Parameters] + # - +input+ (Nokogiri::XML::Document, String) + # + # A parsed document, or a string containing a local filename. + # + # [Returns] Boolean + # + # *Example:* Validate an existing XML::Document +document+ + # + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # return unless schema.valid?(document) + # + # *Example:* Validate an XML document on disk + # + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # return unless schema.valid?("/path/to/file.xml") + # + def valid?(input) + validate(input).empty? end end end diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb index 3bc6bcb645a..405a2c18ff3 100644 --- a/test/xml/test_relax_ng.rb +++ b/test/xml/test_relax_ng.rb @@ -11,8 +11,9 @@ def setup end def test_parse_with_memory - assert_instance_of(Nokogiri::XML::RelaxNG, @schema) - assert_equal(0, @schema.errors.length) + schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE)) + assert_instance_of(Nokogiri::XML::RelaxNG, schema) + assert_equal(0, schema.errors.length) end def test_new