From 49c5b8b3dd71f73bb8bae6b9c6a5c9c49999ef36 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 24 Jun 2024 17:09:16 -0400 Subject: [PATCH] fix: RelaxNG.read_memory is now reliable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On both JRuby and CRuby impls, replace the native `read_memory` method from XML::Schema and XML::RelaxNG with a ruby method that uses `from_document`. `read_memory` was buggy on both platforms, but especially on the JRuby impl. This is comparable in performance on CRuby. From a benchmark taken before this change: Warming up -------------------------------------- Schema.new 1.219k i/100ms Schema.from_document 1.258k i/100ms Schema.read_memory 1.118k i/100ms Calculating ------------------------------------- Schema.new 12.160k (± 8.3%) i/s - 121.900k in 10.093638s Schema.from_document 12.216k (± 8.6%) i/s - 122.026k in 10.059696s Schema.read_memory 12.790k (±10.7%) i/s - 127.452k in 10.105931s Comparison: Schema.read_memory: 12789.6 i/s Schema.from_document: 12215.9 i/s - same-ish: difference falls within error Schema.new: 12160.1 i/s - same-ish: difference falls within error IMHO the resulting code is less buggy and easier to maintain, and the slight (if any) performance hit is worth it. Closes #2113 Closes #2115 --- CHANGELOG.md | 1 + ext/java/nokogiri/XmlSchema.java | 13 ------------ ext/nokogiri/xml_relax_ng.c | 34 ------------------------------- ext/nokogiri/xml_schema.c | 35 -------------------------------- lib/nokogiri/xml/relax_ng.rb | 25 +++++++++++++++++++++-- lib/nokogiri/xml/schema.rb | 21 ++++++++++++++++++- test/xml/test_relax_ng.rb | 5 +++-- 7 files changed, 47 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 222aec55782..b6b68ad30c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Improved * Documentation has been improved for `CSS.xpath_for`. [#3224] @flavorjones +* `XML::Schema#read_memory` and `XML::RelaxNG#read_memory` are now Ruby methods that call `#from_document`. Previously these were native functions, but they were buggy on both CRuby and JRuby (but worse on JRuby) and so this is now useful, comparable in performance, and simpler code that is easier to maintain. [#2113, #2115] @flavorjones * [CRuby] When compiling packaged libraries from source, allow users' `AR` and `LD` environment variables to set the archiver and linker commands, respectively. This augments the existing `CC` environment variable to set the compiler command. [#3165] @ziggythehamster * [CRuby] The HTML5 parse methods accept a `:parse_noscript_content_as_text` keyword argument which will emulate the parsing behavior of a browser which has scripting enabled. [#3178, #3231] @stevecheckoway * [CRuby] `HTML5::DocumentFragment.parse` and `.new` accept a `:context` keyword argument that is the parse context node or element name. Previously this could only be passed in as a positional argument to `.new` and not at all to `.parse`. @flavorjones diff --git a/ext/java/nokogiri/XmlSchema.java b/ext/java/nokogiri/XmlSchema.java index 2a58d31a498..be7471c394d 100644 --- a/ext/java/nokogiri/XmlSchema.java +++ b/ext/java/nokogiri/XmlSchema.java @@ -175,19 +175,6 @@ public class XmlSchema extends RubyObject return getSchema(context, (RubyClass)klazz, source, parseOptions); } - @JRubyMethod(meta = true, required = 1, optional = 1) - public static IRubyObject - read_memory(ThreadContext context, IRubyObject klazz, IRubyObject[] args) - { - IRubyObject content = args[0]; - IRubyObject parseOptions = null; - if (args.length > 1) { - parseOptions = args[1]; - } - String data = content.convertToString().asJavaString(); - return getSchema(context, (RubyClass) klazz, new StreamSource(new StringReader(data)), parseOptions); - } - private static IRubyObject getSchema(ThreadContext context, RubyClass klazz, Source source, IRubyObject parseOptions) { diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index 8d303db0285..40670ef0bb3 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -100,39 +100,6 @@ xml_relax_ng_parse_schema( return rb_schema; } -/* - * :call-seq: - * read_memory(input) → Nokogiri::XML::RelaxNG - * read_memory(input, parse_options) → Nokogiri::XML::RelaxNG - * - * Parse a RELAX NG schema definition and create a new Schema object. - * - * 💡 Note that the limitation of this method relative to RelaxNG.new is that +input+ must be type - * String, whereas RelaxNG.new also supports IO types. - * - * [Parameters] - * - +input+ (String) RELAX NG schema definition - * - +parse_options+ (Nokogiri::XML::ParseOptions) Defaults to ParseOptions::DEFAULT_SCHEMA - * - * [Returns] Nokogiri::XML::RelaxNG - */ -static VALUE -noko_xml_relax_ng_s_read_memory(int argc, VALUE *argv, VALUE rb_class) -{ - VALUE rb_content; - VALUE rb_parse_options; - xmlRelaxNGParserCtxtPtr c_parser_context; - - rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options); - - c_parser_context = xmlRelaxNGNewMemParserCtxt( - (const char *)StringValuePtr(rb_content), - (int)RSTRING_LEN(rb_content) - ); - - return xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options); -} - /* * :call-seq: * from_document(document) → Nokogiri::XML::RelaxNG @@ -173,7 +140,6 @@ noko_init_xml_relax_ng(void) assert(cNokogiriXmlSchema); cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", noko_xml_relax_ng_s_read_memory, -1); rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1); rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1); diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index 464d879ae94..6a085d703ad 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -140,40 +140,6 @@ xml_schema_parse_schema( return rb_schema; } -/* - * :call-seq: - * read_memory(input) → Nokogiri::XML::Schema - * read_memory(input, parse_options) → Nokogiri::XML::Schema - * - * Parse an XSD schema definition and create a new Schema object. - * - * 💡 Note that the limitation of this method relative to Schema.new is that +input+ must be type - * String, whereas Schema.new also supports IO types. - * - * [parameters] - * - +input+ (String) XSD schema definition - * - +parse_options+ (Nokogiri::XML::ParseOptions) - * Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA - * - * [Returns] Nokogiri::XML::Schema - */ -static VALUE -xml_schema_s_read_memory(int argc, VALUE *argv, VALUE rb_class) -{ - VALUE rb_content; - VALUE rb_parse_options; - xmlSchemaParserCtxtPtr c_parser_context; - - rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options); - - c_parser_context = xmlSchemaNewMemParserCtxt( - (const char *)StringValuePtr(rb_content), - (int)RSTRING_LEN(rb_content) - ); - - return xml_schema_parse_schema(rb_class, c_parser_context, rb_parse_options); -} - /* * :call-seq: * from_document(document) → Nokogiri::XML::Schema @@ -239,7 +205,6 @@ noko_init_xml_schema(void) rb_undef_alloc_func(cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", xml_schema_s_read_memory, -1); rb_define_singleton_method(cNokogiriXmlSchema, "from_document", noko_xml_schema_s_from_document, -1); rb_define_private_method(cNokogiriXmlSchema, "validate_document", noko_xml_schema__validate_document, 1); diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb index b607205679d..dbe6c0e24e3 100644 --- a/lib/nokogiri/xml/relax_ng.rb +++ b/lib/nokogiri/xml/relax_ng.rb @@ -52,12 +52,33 @@ class RelaxNG < Nokogiri::XML::Schema # [Parameters] # - +input+ (String, IO) RELAX NG schema definition # - +parse_options+ (Nokogiri::XML::ParseOptions) - # Defaults to ParseOptions::DEFAULT_SCHEMA + # Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused # # [Returns] Nokogiri::XML::RelaxNG # + # ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for + # future functionality. def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) - from_document(Nokogiri::XML(input), parse_options) + read_memory(input, parse_options) + end + + # :call-seq: + # read_memory(input) → Nokogiri::XML::RelaxNG + # read_memory(input, parse_options) → Nokogiri::XML::RelaxNG + # + # Parse a RELAX NG schema definition and create a new Schema object. + # + # [Parameters] + # - +input+ (String) RELAX NG schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused + # + # [Returns] Nokogiri::XML::RelaxNG + # + # ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for + # future functionality. + def self.read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML::Document.parse(input), parse_options) end end end diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb index 52bd4335956..db3419012bf 100644 --- a/lib/nokogiri/xml/schema.rb +++ b/lib/nokogiri/xml/schema.rb @@ -68,7 +68,26 @@ class Schema # [Returns] Nokogiri::XML::Schema # def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) - from_document(Nokogiri::XML(input), parse_options) + read_memory(input, parse_options) + end + + # :call-seq: + # read_memory(input) → Nokogiri::XML::Schema + # read_memory(input, parse_options) → Nokogiri::XML::Schema + # + # Parse an XSD schema definition and create a new Schema object. + # + # 💡 Note that the limitation of this method relative to Schema.new is that +input+ must be type + # String, whereas Schema.new also supports IO types. + # + # [parameters] + # - +input+ (String) XSD schema definition + # - +parse_options+ (Nokogiri::XML::ParseOptions) + # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA + # + # [Returns] Nokogiri::XML::Schema + def self.read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML::Document.parse(input), parse_options) end # diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb index 3bc6bcb645a..405a2c18ff3 100644 --- a/test/xml/test_relax_ng.rb +++ b/test/xml/test_relax_ng.rb @@ -11,8 +11,9 @@ def setup end def test_parse_with_memory - assert_instance_of(Nokogiri::XML::RelaxNG, @schema) - assert_equal(0, @schema.errors.length) + schema = Nokogiri::XML::RelaxNG.read_memory(File.read(ADDRESS_SCHEMA_FILE)) + assert_instance_of(Nokogiri::XML::RelaxNG, schema) + assert_equal(0, schema.errors.length) end def test_new