Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demonstrate XInclude work - and fail #658

Merged
merged 1 commit into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
libraryDependencies += "junit" % "junit" % "4.13.2" % Test,
libraryDependencies += "com.github.sbt" % "junit-interface" % "0.13.3" % Test,
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % Test,
libraryDependencies += "xerces" % "xercesImpl" % "2.12.2" % Test,
libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match {
case Some((3, _)) =>
Seq()
Expand Down
3 changes: 3 additions & 0 deletions jvm/src/test/resources/scala/xml/archive/books.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<store xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="books/book/author.xml"/>
</store>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<store xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="author/volume/1.xml"/>
</store>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<collection n="1"/>
3 changes: 3 additions & 0 deletions jvm/src/test/resources/scala/xml/includee.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<includee>
<content>Blah!</content>
</includee>
3 changes: 3 additions & 0 deletions jvm/src/test/resources/scala/xml/includer.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<includer>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="includee.xml"/>
</includer>
3 changes: 3 additions & 0 deletions jvm/src/test/resources/scala/xml/site.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<site xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="archive/books.xml"/>
</site>
74 changes: 71 additions & 3 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,15 @@ class XMLTestJVM {
}
}

// With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want:
def xercesInternal: javax.xml.parsers.SAXParserFactory =
javax.xml.parsers.SAXParserFactory.newInstance("com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl", null)

def xercesExternal: javax.xml.parsers.SAXParserFactory =
javax.xml.parsers.SAXParserFactory.newInstance("org.apache.xerces.jaxp.SAXParserFactoryImpl", null)

/** Default SAXParserFactory */
val defaultParserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance
val defaultParserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal

@throws(classOf[org.xml.sax.SAXNotRecognizedException])
def issue17UnrecognizedFeature(): Unit = {
Expand Down Expand Up @@ -629,7 +636,7 @@ class XMLTestJVM {
// using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events;
// see https://github.com/scala/scala-xml/issues/506
def roundtrip(namespaceAware: Boolean, xml: String): Unit = {
val parserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance()
val parserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal
parserFactory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true)
Expand All @@ -656,7 +663,7 @@ class XMLTestJVM {

@UnitTest
def useXMLReaderWithXMLFilter(): Unit = {
val parent: org.xml.sax.XMLReader = javax.xml.parsers.SAXParserFactory.newInstance.newSAXParser.getXMLReader
val parent: org.xml.sax.XMLReader = xercesInternal.newSAXParser.getXMLReader
val filter: org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl(parent) {
override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
for (i <- 0 until length) if (ch(start+i) == 'a') ch(start+i) = 'b'
Expand All @@ -682,6 +689,67 @@ class XMLTestJVM {
assertTrue(gotAnError)
}

// Now that we can use XML parser configured to be namespace-aware,
// we can also configure it to be XInclude-aware and process XML Includes:
def check(
parserFactory: javax.xml.parsers.SAXParserFactory,
resourceName: String,
expected: String
): Unit = {
parserFactory.setNamespaceAware(true)
parserFactory.setXIncludeAware(true)
val actual: String = XML
.withSAXParser(parserFactory.newSAXParser)
.load(getClass.getResource(resourceName).toString)
.toString

assertEquals(expected, actual)
}

// Here we demonstrate that XInclude works with both the external and the built-in Xerces:

val includerExpected: String =
s"""<includer>
| <includee xml:base="includee.xml">
| <content>Blah!</content>
|</includee>
|</includer>""".stripMargin

@UnitTest def xIncludeWithExternalXerces(): Unit = check(xercesExternal, "includer.xml", includerExpected)
@UnitTest def xIncludeWithInternalXerces(): Unit = check(xercesInternal, "includer.xml", includerExpected)

// And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
// when the XML file included contains its own include, and included files are not in the same directory:
// `xml:base` on the `<collection>` element is incorrect
// books/book/author/volume/1.xml instead of the correct
// archive/books/book/author/volume/1.xml!
val siteUnfortunatelyExpected: String =
s"""<site xmlns:xi="http://www.w3.org/2001/XInclude">
| <store xml:base="archive/books.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
| <store xml:base="archive/books/book/author.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
| <collection n="1" xml:base="books/book/author/volume/1.xml"/>
|</store>
|</store>
|</site>""".stripMargin

// Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102:
// - the bug was reported in October 2005 - more then seventeen years ago;
// - a patch fixing it (that I have not verified personally) was submitted many years ago;
// - the bug is still not fixed in the 2023 release of Xerces;
// - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664,
// and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear.
//
// In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account
// specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great:
// - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug;
// - use privately patched version of Xerces;
// - use Saxon DOM parsing instead of Xerces' SAX.
//
// I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
// has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
@UnitTest def xIncludeFailWithExternalXerces(): Unit = check(xercesExternal, "site.xml", siteUnfortunatelyExpected)
@UnitTest def xIncludeFailWithInternalXerces(): Unit = check(xercesInternal, "site.xml", siteUnfortunatelyExpected)

@UnitTest
def nodeSeqNs(): Unit = {
val x: NodeBuffer = {
Expand Down