Skip to content

Commit

Permalink
For #170 and #161 - Support for character entities.
Browse files Browse the repository at this point in the history
Created two DTDs, one with just character entities for XHTML and one
combined character entities for XHTML and MathML. Also:
+ Minor refactoring of entity resolver and catalog.
+ Documented the two new doctypes in author’s guide.
+ Made sure that other doctypes resolve to the empty string.
  • Loading branch information
danfickle committed Mar 12, 2018
1 parent c584289 commit 19828ff
Show file tree
Hide file tree
Showing 7 changed files with 3,052 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.io.InputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
Expand All @@ -56,9 +57,6 @@
* @author Patrick Wright
*/
public class FSCatalog {
/**
* Default constructor
*/
public FSCatalog() {
}

Expand All @@ -68,17 +66,17 @@ public FSCatalog() {
*
* @param catalogURI A String URI to a catalog XML file on the classpath.
*/
public Map parseCatalog(String catalogURI) {
public Map<String, String> parseCatalog(String catalogURI) {
URL url;
Map map = null;
Map<String, String> map = null;
InputStream s = null;
try {
url = FSCatalog.class.getClassLoader().getResource(catalogURI);
s = new BufferedInputStream(url.openStream());
map = parseCatalog(new InputSource(s));
} catch (Exception ex) {
XRLog.xmlEntities(Level.WARNING, "Could not open XML catalog from URI '" + catalogURI + "'", ex);
map = new HashMap();
map = Collections.emptyMap();
} finally {
try {
if (s != null) {
Expand All @@ -97,7 +95,7 @@ public Map parseCatalog(String catalogURI) {
*
* @param inputSource A SAX InputSource to a catalog XML file on the classpath.
*/
public Map parseCatalog(InputSource inputSource) {
public Map<String, String> parseCatalog(InputSource inputSource) {
XMLReader xmlReader = XMLResource.newXMLReader();

CatalogContentHandler ch = new CatalogContentHandler();
Expand Down Expand Up @@ -151,16 +149,16 @@ public void warning(SAXParseException ex) {
* parse, then call getEntityMap().
*/
private static class CatalogContentHandler extends DefaultHandler {
private Map entityMap;
private final Map<String, String> entityMap;

public CatalogContentHandler() {
this.entityMap = new HashMap();
this.entityMap = new HashMap<String, String>();
}

/**
* Returns a Map of public Ids to local URIs
*/
public Map getEntityMap() {
public Map<String, String> getEntityMap() {
return entityMap;
}

Expand Down Expand Up @@ -197,5 +195,4 @@ private void setFeature(XMLReader xmlReader, String featureUri, boolean value) {
featureUri + ". Feature may be properly named, but not recognized by this parser.");
}
}
}// end class

}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
Expand Down Expand Up @@ -57,49 +59,26 @@ public class FSEntityResolver implements EntityResolver {
/**
* Singleton instance, use {@link #instance()} to retrieve.
*/
private static FSEntityResolver instance;
private static final FSEntityResolver instance = new FSEntityResolver();

private final Map entities = new HashMap();

// fill the list of URLs
private final Map<String, String> entities = new HashMap<String, String>();

/**
* Constructor for the FSEntityResolver object
* Constructor for the FSEntityResolver object, fill the map of public ids to local urls.
*/
private FSEntityResolver() {
FSCatalog catalog = new FSCatalog();

// The HTML 4.01 DTDs; includes entities. Load from catalog file.
entities.putAll(catalog.parseCatalog("resources/schema/html-4.01/catalog-html-4.01.xml"));

// XHTML common (shared declarations)
entities.putAll(catalog.parseCatalog("resources/schema/xhtml/catalog-xhtml-common.xml"));

// The XHTML 1.0 DTDs
entities.putAll(catalog.parseCatalog("resources/schema/xhtml/catalog-xhtml-1.0.xml"));

// The XHMTL 1.1 DTD
entities.putAll(catalog.parseCatalog("resources/schema/xhtml/catalog-xhtml-1.1.xml"));

// DocBook DTDs
entities.putAll(catalog.parseCatalog("resources/schema/docbook/catalog-docbook.xml"));

// The XHTML 1.1 element sets
entities.putAll(catalog.parseCatalog("resources/schema/openhtmltopdf/catalog-special.xml"));
}

/**
* Description of the Method
*
* @param publicID PARAM
* @param systemID PARAM
* @return Returns
* @throws SAXException Throws
*/
@Override
public InputSource resolveEntity(String publicID,
String systemID)
throws SAXException {

InputSource local = null;
String url = (String) getEntities().get(publicID);
String url = getEntities().get(publicID);

if (url != null) {
URL realUrl = GeneralUtil.getURLFromClasspath(this, url);
InputStream is = null;
Expand All @@ -123,63 +102,26 @@ public InputSource resolveEntity(String publicID,
XRLog.xmlEntities(Level.FINE, "Entity public: " + publicID + " -> " + url +
(local == null ? ", NOT FOUND" : " (local)"));
} else {
XRLog.xmlEntities("Entity public: " + publicID + ", no local mapping. Parser will probably pull from network.");
XRLog.xmlEntities("Entity public: " + publicID + ", no local mapping. Returning empty entity to avoid pulling from network.");
local = new InputSource(new StringReader(""));
}
return local;
}

/**
* Gets an instance of this class.
*
* @return An instance of .
* @return An instance of FSEntityResolver.
*/
public static synchronized FSEntityResolver instance() {
if (instance == null) {
instance = new FSEntityResolver();
}
public static FSEntityResolver instance() {
return instance;
}

/**
* Returns a map of entities parsed by this resolver.
* @return a map of entities parsed by this resolver.
* Returns an unmodifiable map of entities parsed by this resolver.
* @return an unmodifiable map of entities parsed by this resolver.
*/
public Map getEntities() {
return new HashMap(entities);
public Map<String, String> getEntities() {
return Collections.unmodifiableMap(entities);
}
}

/*
* $Id$
*
* $Log$
* Revision 1.8 2008/12/01 20:37:24 pdoubleya
* Expose copy of parsed entities from catalog.
*
* Revision 1.7 2007/05/21 22:13:02 peterbrant
* Code cleanup (patch from Sean Bright)
*
* Revision 1.6 2007/05/20 23:25:34 peterbrant
* Various code cleanups (e.g. remove unused imports)
*
* Patch from Sean Bright
*
* Revision 1.5 2005/06/13 06:50:15 tobega
* Fixed a bug in table content resolution.
* Various "tweaks" in other stuff.
*
* Revision 1.4 2005/03/28 14:24:48 pdoubleya
* Changed to resolve all entities using simple catalog files.
*
* Revision 1.3 2005/03/27 18:36:26 pdoubleya
* Added separate logging for entity resolution.
*
* Revision 1.2 2005/03/21 09:13:50 pdoubleya
* Added XHTML 1.1 references (Kevin).
*
* Revision 1.1 2005/02/03 20:39:34 pdoubleya
* Added to CVS.
*
*
*/

Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ private void setXmlReaderSecurityFeatures(XMLReader xmlReader) {
xmlReader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", false);
xmlReader.setFeature("http://xml.org/sax/features/external-general-entities", false);
xmlReader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);
xmlReader.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (SAXNotSupportedException e) {
XRLog.load(Level.SEVERE, "Unable to disable XML External Entities, which might put you at risk to XXE attacks", e);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version='1.0'?>
<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog" prefer="public">

<!-- ...................................................................... -->
<!-- XML Catalog data for special DTD created for OpenHTMLToPDF ................................ -->
<!-- ...................................................................... -->

<public publicId="-//OPENHTMLTOPDF//DOC XHTML Character Entities Only 1.0//EN" uri="resources/schema/openhtmltopdf/char-entities-xhtml-only.ent" />
<public publicId="-//OPENHTMLTOPDF//MATH XHTML Character Entities With MathML 1.0//EN" uri="resources/schema/openhtmltopdf/char-entities-xhtml-mathml.ent" />

<!-- ...................................................................... -->

</catalog>
Loading

0 comments on commit 19828ff

Please sign in to comment.