From e09e587f598c03d75c4a84bf6694c7c2b473376d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 31 Jan 2024 16:21:59 -0500 Subject: [PATCH] fix: XML::Document#clone copies the singleton class Partially fixes #316 --- ext/java/nokogiri/XmlDocument.java | 4 +-- ext/nokogiri/xml_document.c | 55 +++++++++++++----------------- lib/nokogiri/xml/document.rb | 37 +++++++++++++++++++- test/xml/test_document.rb | 38 +++++++++++++++++++++ 4 files changed, 100 insertions(+), 34 deletions(-) diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java index efc1d5f1bee..033a0c64b8c 100644 --- a/ext/java/nokogiri/XmlDocument.java +++ b/ext/java/nokogiri/XmlDocument.java @@ -416,9 +416,9 @@ private static class DocumentBuilderFactoryHolder @JRubyMethod(visibility = Visibility.PROTECTED) public IRubyObject - initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level, IRubyObject _ignored) + initialize_copy_with_args(ThreadContext context, IRubyObject other, IRubyObject level) { - super.initialize_copy_with_args(context, other, level, _ignored); + super.initialize_copy_with_args(context, other, level, null); resetCache(); return this; } diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index c5be895aba2..e0396390ef1 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -171,6 +171,25 @@ _xml_document_data_ptr_set(VALUE rb_document, xmlDocPtr c_document) return; } +/* :nodoc: */ +static VALUE +rb_xml_document_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level) +{ + xmlDocPtr c_other, c_self; + int c_level; + + c_other = noko_xml_document_unwrap(rb_other); + c_level = (int)NUM2INT(rb_level); + + c_self = xmlCopyDoc(c_other, c_level); + if (c_self == NULL) { return Qnil; } + + c_self->type = c_other->type; + _xml_document_data_ptr_set(rb_self, c_self); +// rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors")); + + return rb_self ; +} static void recursively_remove_namespaces_from_node(xmlNodePtr node) @@ -435,36 +454,6 @@ read_memory(VALUE klass, return document; } -/* - * call-seq: - * dup - * - * Copy this Document. An optional depth may be passed in, but it defaults - * to a deep copy. 0 is a shallow copy, 1 is a deep copy. - */ -static VALUE -duplicate_document(int argc, VALUE *argv, VALUE self) -{ - xmlDocPtr doc, dup; - VALUE copy; - VALUE level; - - if (rb_scan_args(argc, argv, "01", &level) == 0) { - level = INT2NUM((long)1); - } - - doc = noko_xml_document_unwrap(self); - - dup = xmlCopyDoc(doc, (int)NUM2INT(level)); - - if (dup == NULL) { return Qnil; } - - dup->type = doc->type; - copy = noko_xml_document_wrap(rb_obj_class(self), dup); - rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors")); - return copy ; -} - /* * call-seq: * new(version = default) @@ -781,6 +770,8 @@ noko_init_xml_document(void) */ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode); + rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc); + rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4); rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4); rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1); @@ -791,8 +782,10 @@ noko_init_xml_document(void) rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1); rb_define_method(cNokogiriXmlDocument, "version", version, 0); rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1); - rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1); rb_define_method(cNokogiriXmlDocument, "url", url, 0); rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1); rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0); + + rb_define_protected_method(cNokogiriXmlDocument, "initialize_copy_with_args", rb_xml_document_initialize_copy_with_args, + 2); } diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb index 1a4ee5f0ab1..aaa0e4353c2 100644 --- a/lib/nokogiri/xml/document.rb +++ b/lib/nokogiri/xml/document.rb @@ -19,6 +19,10 @@ class Document < Nokogiri::XML::Node NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9" NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/ + OBJECT_DUP_METHOD = Object.instance_method(:dup) + OBJECT_CLONE_METHOD = Object.instance_method(:clone) + private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD + class << self # Parse an XML file. # @@ -180,6 +184,38 @@ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper @namespace_inheritance = false end + # + # :call-seq: + # dup → Nokogiri::XML::Document + # dup(level) → Nokogiri::XML::Document + # + # Duplicate this node. + # + # [Parameters] + # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy. + # [Returns] The new Nokogiri::XML::Document + # + def dup(level = 1) + copy = OBJECT_DUP_METHOD.bind_call(self) + copy.initialize_copy_with_args(self, level) + end + + # + # :call-seq: + # clone → Nokogiri::XML::Document + # clone(level) → Nokogiri::XML::Document + # + # Clone this node. + # + # [Parameters] + # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy. + # [Returns] The new Nokogiri::XML::Document + # + def clone(level = 1) + copy = OBJECT_CLONE_METHOD.bind_call(self) + copy.initialize_copy_with_args(self, level) + end + # :call-seq: # create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element # @@ -372,7 +408,6 @@ def decorate(node) end alias_method :to_xml, :serialize - alias_method :clone, :dup # Get the hash of namespaces on the root Nokogiri::XML::Node def namespaces diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index b5dbe1b8a2a..7ccb2df8e68 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -444,6 +444,44 @@ def test_validate_no_internal_subset end end + def test_dup_should_not_copy_singleton_class + # https://github.com/sparklemotion/nokogiri/issues/316 + m = Module.new do + def foo; end + end + + doc = Nokogiri::XML::Document.parse("") + doc.extend(m) + + assert_respond_to(doc, :foo) + refute_respond_to(doc.dup, :foo) + end + + def test_clone_should_copy_singleton_class + # https://github.com/sparklemotion/nokogiri/issues/316 + m = Module.new do + def foo; end + end + + doc = Nokogiri::XML::Document.parse("") + doc.extend(m) + + assert_respond_to(doc, :foo) + assert_respond_to(doc.clone, :foo) + end + + def test_inspect_object_with_no_data_ptr + # test for the edge case when an exception is thrown during object construction/copy + doc = Nokogiri::XML("") + refute_includes(doc.inspect, "(no data)") + + if doc.respond_to?(:data_ptr?) + doc.stub(:data_ptr?, false) do + assert_includes(doc.inspect, "(no data)") + end + end + end + def test_document_should_not_have_default_ns doc = Nokogiri::XML::Document.new