From 7b579169560d3b509f1fe5ee60696efe9f4e7728 Mon Sep 17 00:00:00 2001 From: Leonid Dubinsky Date: Sun, 2 Apr 2023 17:37:01 -0400 Subject: [PATCH] Demonstrate XInclude work - and fail --- build.sbt | 1 + .../resources/scala/xml/archive/books.xml | 3 + .../scala/xml/archive/books/book/author.xml | 3 + .../archive/books/book/author/volume/1.xml | 1 + jvm/src/test/resources/scala/xml/includee.xml | 3 + jvm/src/test/resources/scala/xml/includer.xml | 3 + jvm/src/test/resources/scala/xml/site.xml | 3 + jvm/src/test/scala/scala/xml/XMLTest.scala | 74 ++++++++++++++++++- 8 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 jvm/src/test/resources/scala/xml/archive/books.xml create mode 100644 jvm/src/test/resources/scala/xml/archive/books/book/author.xml create mode 100644 jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml create mode 100644 jvm/src/test/resources/scala/xml/includee.xml create mode 100644 jvm/src/test/resources/scala/xml/includer.xml create mode 100644 jvm/src/test/resources/scala/xml/site.xml diff --git a/build.sbt b/build.sbt index 257bc67f..3a0d6b68 100644 --- a/build.sbt +++ b/build.sbt @@ -113,6 +113,7 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform) libraryDependencies += "junit" % "junit" % "4.13.2" % Test, libraryDependencies += "com.github.sbt" % "junit-interface" % "0.13.3" % Test, libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % Test, + libraryDependencies += "xerces" % "xercesImpl" % "2.12.2" % Test, libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match { case Some((3, _)) => Seq() diff --git a/jvm/src/test/resources/scala/xml/archive/books.xml b/jvm/src/test/resources/scala/xml/archive/books.xml new file mode 100644 index 00000000..00a49111 --- /dev/null +++ b/jvm/src/test/resources/scala/xml/archive/books.xml @@ -0,0 +1,3 @@ + + + diff --git a/jvm/src/test/resources/scala/xml/archive/books/book/author.xml b/jvm/src/test/resources/scala/xml/archive/books/book/author.xml new file mode 100644 index 00000000..3f2d2a9c --- /dev/null +++ b/jvm/src/test/resources/scala/xml/archive/books/book/author.xml @@ -0,0 +1,3 @@ + + + diff --git a/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml b/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml new file mode 100644 index 00000000..7577b32c --- /dev/null +++ b/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml @@ -0,0 +1 @@ + diff --git a/jvm/src/test/resources/scala/xml/includee.xml b/jvm/src/test/resources/scala/xml/includee.xml new file mode 100644 index 00000000..151eda26 --- /dev/null +++ b/jvm/src/test/resources/scala/xml/includee.xml @@ -0,0 +1,3 @@ + + Blah! + diff --git a/jvm/src/test/resources/scala/xml/includer.xml b/jvm/src/test/resources/scala/xml/includer.xml new file mode 100644 index 00000000..52195478 --- /dev/null +++ b/jvm/src/test/resources/scala/xml/includer.xml @@ -0,0 +1,3 @@ + + + diff --git a/jvm/src/test/resources/scala/xml/site.xml b/jvm/src/test/resources/scala/xml/site.xml new file mode 100644 index 00000000..9c77a297 --- /dev/null +++ b/jvm/src/test/resources/scala/xml/site.xml @@ -0,0 +1,3 @@ + + + diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index 196085f4..90e4a65c 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -510,8 +510,15 @@ class XMLTestJVM { } } + // With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want: + def xercesInternal: javax.xml.parsers.SAXParserFactory = + javax.xml.parsers.SAXParserFactory.newInstance("com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl", null) + + def xercesExternal: javax.xml.parsers.SAXParserFactory = + javax.xml.parsers.SAXParserFactory.newInstance("org.apache.xerces.jaxp.SAXParserFactoryImpl", null) + /** Default SAXParserFactory */ - val defaultParserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance + val defaultParserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal @throws(classOf[org.xml.sax.SAXNotRecognizedException]) def issue17UnrecognizedFeature(): Unit = { @@ -629,7 +636,7 @@ class XMLTestJVM { // using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events; // see https://github.com/scala/scala-xml/issues/506 def roundtrip(namespaceAware: Boolean, xml: String): Unit = { - val parserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance() + val parserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal parserFactory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true) parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false) parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true) @@ -656,7 +663,7 @@ class XMLTestJVM { @UnitTest def useXMLReaderWithXMLFilter(): Unit = { - val parent: org.xml.sax.XMLReader = javax.xml.parsers.SAXParserFactory.newInstance.newSAXParser.getXMLReader + val parent: org.xml.sax.XMLReader = xercesInternal.newSAXParser.getXMLReader val filter: org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl(parent) { override def characters(ch: Array[Char], start: Int, length: Int): Unit = { for (i <- 0 until length) if (ch(start+i) == 'a') ch(start+i) = 'b' @@ -682,6 +689,67 @@ class XMLTestJVM { assertTrue(gotAnError) } + // Now that we can use XML parser configured to be namespace-aware, + // we can also configure it to be XInclude-aware and process XML Includes: + def check( + parserFactory: javax.xml.parsers.SAXParserFactory, + resourceName: String, + expected: String + ): Unit = { + parserFactory.setNamespaceAware(true) + parserFactory.setXIncludeAware(true) + val actual: String = XML + .withSAXParser(parserFactory.newSAXParser) + .load(getClass.getResource(resourceName).toString) + .toString + + assertEquals(expected, actual) + } + + // Here we demonstrate that XInclude works with both the external and the built-in Xerces: + + val includerExpected: String = + s""" + | + | Blah! + | + |""".stripMargin + + @UnitTest def xIncludeWithExternalXerces(): Unit = check(xercesExternal, "includer.xml", includerExpected) + @UnitTest def xIncludeWithInternalXerces(): Unit = check(xercesInternal, "includer.xml", includerExpected) + + // And here we demonstrate that both external and built-in Xerces report incorrect `xml:base` + // when the XML file included contains its own include, and included files are not in the same directory: + // `xml:base` on the `` element is incorrect + // books/book/author/volume/1.xml instead of the correct + // archive/books/book/author/volume/1.xml! + val siteUnfortunatelyExpected: String = + s""" + | + | + | + | + | + |""".stripMargin + + // Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102: + // - the bug was reported in October 2005 - more then seventeen years ago; + // - a patch fixing it (that I have not verified personally) was submitted many years ago; + // - the bug is still not fixed in the 2023 release of Xerces; + // - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664, + // and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear. + // + // In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account + // specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great: + // - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug; + // - use privately patched version of Xerces; + // - use Saxon DOM parsing instead of Xerces' SAX. + // + // I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere + // has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs: + @UnitTest def xIncludeFailWithExternalXerces(): Unit = check(xercesExternal, "site.xml", siteUnfortunatelyExpected) + @UnitTest def xIncludeFailWithInternalXerces(): Unit = check(xercesInternal, "site.xml", siteUnfortunatelyExpected) + @UnitTest def nodeSeqNs(): Unit = { val x: NodeBuffer = {