From df74a0b60967de1b6e78fe8ad0fb58fb85192130 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sat, 28 Sep 2024 00:26:52 +0900 Subject: [PATCH 1/5] test: Add test for parsing attributes prefixed by "xml" namespace name --- test/parser/test_base_parser.rb | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/parser/test_base_parser.rb b/test/parser/test_base_parser.rb index 17d01979..246a8475 100644 --- a/test/parser/test_base_parser.rb +++ b/test/parser/test_base_parser.rb @@ -23,5 +23,40 @@ def test_large_xml parser.position < xml.bytesize end end + + def test_attribute_prefixed_by_xml + xml = <<-XML + + + + + XHTML Document + + +

XHTML Document

+

この段落は日本語です。

+ + + XML + + parser = REXML::Parsers::BaseParser.new(xml) + 5.times {parser.pull} + + html = parser.pull + assert_equal [:start_element, + "html", + {"xmlns" => "http://www.w3.org/1999/xhtml", + "xml:lang" => "en", + "lang" => "en"}], + html + + 15.times {parser.pull} + + p = parser.pull + assert_equal [:start_element, + "p", + {"xml:lang" => "ja", "lang" => "ja"}], + p + end end end From f98d418a18440e8f9dd9c5eaea7952e2c1ccacef Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sat, 28 Sep 2024 00:54:32 +0900 Subject: [PATCH 2/5] Set up "xml:" namespace on initialize --- lib/rexml/parsers/baseparser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 89a9d0b6..849c21e8 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -185,7 +185,7 @@ def stream=( source ) @tags = [] @stack = [] @entities = [] - @namespaces = {} + @namespaces = {"xml" => "http://www.w3.org/XML/1998/namespace"} @namespaces_restore_stack = [] end From f531b709f4349df1c79c199fcad53ed10d3a2b05 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sat, 28 Sep 2024 00:54:55 +0900 Subject: [PATCH 3/5] Make "xml:" namespace a constant --- lib/rexml/parsers/baseparser.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 849c21e8..6101f7a9 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -140,6 +140,8 @@ class BaseParser "apos" => [/'/, "'", "'", /'/] } + XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace" + module Private PEREFERENCE_PATTERN = /#{PEREFERENCE}/um TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um @@ -185,7 +187,7 @@ def stream=( source ) @tags = [] @stack = [] @entities = [] - @namespaces = {"xml" => "http://www.w3.org/XML/1998/namespace"} + @namespaces = {"xml" => XML_PREFIXED_NAMESPACE} @namespaces_restore_stack = [] end @@ -790,7 +792,7 @@ def parse_attributes(prefixes) @source.match(/\s*/um, true) if prefix == "xmlns" if local_part == "xml" - if value != "http://www.w3.org/XML/1998/namespace" + if value != XML_PREFIXED_NAMESPACE msg = "The 'xml' prefix must not be bound to any other namespace "+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" raise REXML::ParseException.new( msg, @source, self ) From 77913ffdefcd4221c6b95bb96d00dcaf8768a52e Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 29 Sep 2024 00:29:22 +0900 Subject: [PATCH 4/5] Complete parens for readability --- test/parser/test_base_parser.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/parser/test_base_parser.rb b/test/parser/test_base_parser.rb index 246a8475..da169a25 100644 --- a/test/parser/test_base_parser.rb +++ b/test/parser/test_base_parser.rb @@ -43,20 +43,20 @@ def test_attribute_prefixed_by_xml 5.times {parser.pull} html = parser.pull - assert_equal [:start_element, + assert_equal([:start_element, "html", {"xmlns" => "http://www.w3.org/1999/xhtml", "xml:lang" => "en", "lang" => "en"}], - html + html) 15.times {parser.pull} p = parser.pull - assert_equal [:start_element, + assert_equal([:start_element, "p", {"xml:lang" => "ja", "lang" => "ja"}], - p + p) end end end From 6d4a7cde168c2ab2e0e13b892b24e9435f074b79 Mon Sep 17 00:00:00 2001 From: Kitaiti Makoto Date: Sun, 29 Sep 2024 00:31:27 +0900 Subject: [PATCH 5/5] Move BaseParser::PRE_DEFINED_TERM_PATTERNS under BaseParser::Private to mark it private --- lib/rexml/parsers/baseparser.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 6101f7a9..a567e045 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -140,8 +140,6 @@ class BaseParser "apos" => [/'/, "'", "'", /'/] } - XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace" - module Private PEREFERENCE_PATTERN = /#{PEREFERENCE}/um TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um @@ -158,6 +156,7 @@ module Private default_entities.each do |term| DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/ end + XML_PREFIXED_NAMESPACE = "http://www.w3.org/XML/1998/namespace" end private_constant :Private @@ -187,7 +186,7 @@ def stream=( source ) @tags = [] @stack = [] @entities = [] - @namespaces = {"xml" => XML_PREFIXED_NAMESPACE} + @namespaces = {"xml" => Private::XML_PREFIXED_NAMESPACE} @namespaces_restore_stack = [] end @@ -792,7 +791,7 @@ def parse_attributes(prefixes) @source.match(/\s*/um, true) if prefix == "xmlns" if local_part == "xml" - if value != XML_PREFIXED_NAMESPACE + if value != Private::XML_PREFIXED_NAMESPACE msg = "The 'xml' prefix must not be bound to any other namespace "+ "(http://www.w3.org/TR/REC-xml-names/#ns-decl)" raise REXML::ParseException.new( msg, @source, self )