Skip to content

Commit

Permalink
Merge pull request #664 from qligier/fix_pdfa_ua
Browse files Browse the repository at this point in the history
Improve support for PDF/A and PDF/UA
  • Loading branch information
danfickle authored Mar 8, 2021
2 parents c739738 + 19e9ec7 commit 138b5b9
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ enum LogMessageId0Param implements LogMessageId {
GENERAL_PDF_SPECIFIED_FONTS_DONT_CONTAIN_A_SPACE_CHARACTER(XRLog.GENERAL, "Specified fonts don't contain a space character!"),
GENERAL_PDF_USING_FAST_MODE(XRLog.GENERAL, "Using fast-mode renderer. Prepare to fly."),
GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_TITLE_PROVIDED(XRLog.GENERAL, "No document title provided. Document will not be PDF/UA compliant."),
GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_DESCRIPTION_PROVIDED(XRLog.GENERAL, "No document description provided. Document will not be PDF/UA compliant."),
GENERAL_PDF_USING_GET_REQUEST_FOR_FORM(XRLog.GENERAL, "Using GET request method for form. You probably meant to add a method=\"post\" attribute to your form"),
GENERAL_PDF_ACROBAT_READER_DOES_NOT_SUPPORT_FORMS_WITH_FILE_INPUT(XRLog.GENERAL, "Acrobat Reader does not support forms with file input controls"),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.openhtmltopdf.outputdevice.helper.ExternalResourceControlPriority;
import com.openhtmltopdf.outputdevice.helper.ExternalResourceType;
import com.openhtmltopdf.extend.FSDOMMutator;
import com.openhtmltopdf.outputdevice.helper.NullUserInterface;
import com.openhtmltopdf.outputdevice.helper.PageDimensions;
import com.openhtmltopdf.outputdevice.helper.UnicodeImplementation;
import com.openhtmltopdf.pdfboxout.PdfBoxSlowOutputDevice.Metadata;
Expand Down Expand Up @@ -65,11 +66,7 @@
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.schema.XMPSchema;
import org.apache.xmpbox.schema.*;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;
import org.w3c.dom.Document;
Expand All @@ -84,6 +81,8 @@
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;
import java.util.logging.Level;
Expand Down Expand Up @@ -604,10 +603,8 @@ private void writePDFFast(List<PageBox> pages, RenderingContext c, Rectangle2D f
firePreWrite(pageCount); // opportunity to adjust meta data
setDidValues(doc); // set PDF header fields from meta data

if (_pdfUaConformance) {
addPdfUaXMPSchema(doc);
} else if (_pdfAConformance != PdfAConformance.NONE) {
addPdfASchema(doc, _pdfAConformance.getPart(), _pdfAConformance.getConformanceValue());
if (_pdfUaConformance || _pdfAConformance != PdfAConformance.NONE) {
addPdfASchema(doc, _pdfAConformance, _pdfUaConformance);
}

DisplayListCollector dlCollector = new DisplayListCollector(_root.getLayer().getPages());
Expand Down Expand Up @@ -685,8 +682,8 @@ private void writePDF(List<PageBox> pages, RenderingContext c, Rectangle2D first
firePreWrite(pageCount); // opportunity to adjust meta data
setDidValues(doc); // set PDF header fields from meta data

if (_pdfAConformance != PdfAConformance.NONE) {
addPdfASchema(doc, _pdfAConformance.getPart(), _pdfAConformance.getConformanceValue());
if (_pdfUaConformance || _pdfAConformance != PdfAConformance.NONE) {
addPdfASchema(doc, _pdfAConformance, _pdfUaConformance);
}

for (int i = 0; i < pageCount; i++) {
Expand All @@ -712,107 +709,105 @@ private void writePDF(List<PageBox> pages, RenderingContext c, Rectangle2D first

// Kindly provided by GurpusMaximus at:
// https://stackoverflow.com/questions/49682339/how-can-i-create-an-accessible-pdf-with-java-pdfbox-2-0-8-library-that-is-also-v
private void addPdfUaXMPSchema(PDDocument doc) {
try
{
PDDocumentCatalog catalog = doc.getDocumentCatalog();
String lang = _doc.getDocumentElement().getAttribute("lang");
catalog.setLanguage(!lang.isEmpty() ? lang : "EN-US");
catalog.setViewerPreferences(new PDViewerPreferences(new COSDictionary()));
catalog.getViewerPreferences().setDisplayDocTitle(true);

PDMarkInfo markInfo = new PDMarkInfo();
markInfo.setMarked(true);
catalog.setMarkInfo(markInfo);

PDDocumentInformation info = doc.getDocumentInformation();
String title = info.getTitle() != null ? info.getTitle() : "";

if (title.isEmpty()) {
XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_TITLE_PROVIDED);
}

XMPMetadata xmp = XMPMetadata.createXMPMetadata();
xmp.createAndAddDublinCoreSchema();
xmp.getDublinCoreSchema().setTitle(title);
String metaDescription = _outputDevice.getMetadataByName("description");
xmp.getDublinCoreSchema().setDescription(metaDescription != null ? metaDescription : title);
xmp.createAndAddPDFAExtensionSchemaWithDefaultNS();
xmp.getPDFExtensionSchema().addNamespace(
"http://www.aiim.org/pdfa/ns/schema#", "pdfaSchema");
xmp.getPDFExtensionSchema().addNamespace(
"http://www.aiim.org/pdfa/ns/property#", "pdfaProperty");
xmp.getPDFExtensionSchema().addNamespace(
"http://www.aiim.org/pdfua/ns/id/", "pdfuaid");
XMPSchema uaSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaSchema", "pdfaSchema", "pdfaSchema");
uaSchema.setTextPropertyValue("schema",
"PDF/UA Universal Accessibility Schema");
uaSchema.setTextPropertyValue("namespaceURI",
"http://www.aiim.org/pdfua/ns/id/");
uaSchema.setTextPropertyValue("prefix", "pdfuaid");
XMPSchema uaProp = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaProperty", "pdfaProperty", "pdfaProperty");
uaProp.setTextPropertyValue("name", "part");
uaProp.setTextPropertyValue("valueType", "Integer");
uaProp.setTextPropertyValue("category", "internal");
uaProp.setTextPropertyValue("description",
"Indicates, which part of ISO 14289 standard is followed");
uaSchema.addUnqualifiedSequenceValue("property", uaProp);
xmp.getPDFExtensionSchema().addBagValue("schemas", uaSchema);
xmp.getPDFExtensionSchema().setPrefix("pdfuaid");
xmp.getPDFExtensionSchema().setTextPropertyValue("part", "1");
XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(xmp, baos, true);
PDMetadata metadata = new PDMetadata(doc);
metadata.importXMPMetadata(baos.toByteArray());
doc.getDocumentCatalog().setMetadata(metadata);
} catch (IOException|TransformerException e) {
throw new RuntimeException(e);
}
}

private void addPdfASchema(PDDocument document, int part, String conformance) {
private void addPdfASchema(PDDocument document, PdfAConformance pdfAConformance, boolean isPdfUa) {
PDDocumentInformation information = document.getDocumentInformation();
XMPMetadata metadata = XMPMetadata.createXMPMetadata();

try {
// NOTE: These XMP metadata MUST match up with the document information dictionary
// to be a valid PDF/A document, As per ISO 19005-1:2005/Cor.1:2007, 6.7.2
String title = information.getTitle();
String author = information.getAuthor();
String subject = information.getSubject();
String keywords = information.getKeywords();
String creator = information.getCreator();
String producer = information.getProducer();

// NOTE: The XMP metadata MUST match up with the document information dictionary
// to be a valid PDF/A document.

PDFAIdentificationSchema pdfaid = metadata.createAndAddPFAIdentificationSchema();
pdfaid.setConformance(conformance);
pdfaid.setPart(part);
Calendar creationDate = information.getCreationDate();
Calendar modDate = information.getModificationDate();

AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
if (keywords != null) {
pdfSchema.setKeywords(keywords);
}
if (producer != null) {
pdfSchema.setProducer(producer);
if (isPdfUa && (title == null || title.isEmpty())) {
XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_TITLE_PROVIDED);
}

XMPBasicSchema xmpBasicSchema = metadata.createAndAddXMPBasicSchema();
xmpBasicSchema.setCreateDate(information.getCreationDate());

DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema();
if (author != null) {
dc.addCreator(author);

if (pdfAConformance != PdfAConformance.NONE) {
PDFAIdentificationSchema pdfaid = metadata.createAndAddPFAIdentificationSchema();
pdfaid.setConformance(pdfAConformance.getConformanceValue());
pdfaid.setPart(pdfAConformance.getPart());

AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setPDFVersion(String.valueOf(pdfAConformance.getPdfVersion()));
if (keywords != null) {
pdfSchema.setKeywords(keywords);
}
if (producer != null) {
pdfSchema.setProducer(producer);
}

XMPBasicSchema xmpBasicSchema = metadata.createAndAddXMPBasicSchema();
if (creator != null) {
xmpBasicSchema.setCreatorTool(creator);
}
if (creationDate != null) {
xmpBasicSchema.setCreateDate(creationDate);
}
if (modDate != null) {
xmpBasicSchema.setModifyDate(modDate);
}


DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema();
dc.setFormat("application/pdf");
if (author != null) {
dc.addCreator(author);
}
if (title != null) {
dc.setTitle(title);
}
if (subject != null) {
dc.setDescription(subject);
} else if (isPdfUa) {
XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_DESCRIPTION_PROVIDED);
}
}
if (title != null) {
dc.setTitle(title);

PDFAExtensionSchema pdfAExt = metadata.createAndAddPDFAExtensionSchemaWithDefaultNS();
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/extension/", "pdfaExtension");
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/schema#", "pdfaSchema");
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/property#", "pdfaProperty");

if (pdfAConformance != PdfAConformance.NONE) {
// Description of Adobe PDF Schema
List<XMPSchema> pdfProperties = new ArrayList<>(3);
pdfProperties.add(
createPdfaProperty("internal", "The PDF file version.", "PDFVersion", "Text"));
pdfProperties.add(
createPdfaProperty("external", "Keywords.", "Keywords", "Text"));
pdfProperties.add(
createPdfaProperty("internal", "The name of the tool that created the PDF document.", "Producer", "AgentName"));
pdfAExt.addBagValue("schemas",
createPdfaSchema("Adobe PDF Schema", "http://ns.adobe.com/pdf/1.3/", "pdf", pdfProperties));

// Description of PDF/A ID Schema
List<XMPSchema> pdfaidProperties = new ArrayList<>(2);
pdfaidProperties.add(
createPdfaProperty("internal", "Part of PDF/A standard", "part", "Integer"));
pdfaidProperties.add(
createPdfaProperty("internal", "Conformance level of PDF/A standard", "conformance", "Text"));
pdfAExt.addBagValue("schemas",
createPdfaSchema("PDF/A ID Schema", "http://www.aiim.org/pdfa/ns/id/", "pdfaid", pdfaidProperties));
}
if (subject != null) {
dc.setDescription(subject);
if (isPdfUa) {
// Description of PDF/UA
List<XMPSchema> pdfUaProperties = new ArrayList<>(1);
pdfUaProperties.add(
createPdfaProperty("internal", "Indicates, which part of ISO 14289 standard is followed", "part", "Integer"));
XMPSchema pdfUa = createPdfaSchema("PDF/UA Universal Accessibility Schema", "http://www.aiim.org/pdfua/ns/id/", "pdfuaid" , pdfUaProperties);
pdfAExt.addBagValue("schemas", pdfUa);
pdfAExt.addNamespace("http://www.aiim.org/pdfua/ns/id/", "pdfuaid");
pdfAExt.setPrefix("pdfuaid");
pdfAExt.setTextPropertyValue("part", "1");
}

PDMetadata metadataStream = new PDMetadata(document);
PDMarkInfo markInfo = new PDMarkInfo();
markInfo.setMarked(true);
Expand All @@ -830,7 +825,10 @@ private void addPdfASchema(PDDocument document, int part, String conformance) {
XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(metadata, baos, true);
metadataStream.importXMPMetadata( baos.toByteArray() );
String xmp = baos.toString("UTF-8");
// Fix for bad XML generation by some transformers
xmp = xmp.replace(" lang=\"x-default\"", " xml:lang=\"x-default\"");
metadataStream.importXMPMetadata(xmp.getBytes(StandardCharsets.UTF_8));

if (_colorProfile != null) {
ByteArrayInputStream colorProfile = new ByteArrayInputStream(_colorProfile);
Expand All @@ -846,6 +844,30 @@ private void addPdfASchema(PDDocument document, int part, String conformance) {
}
}

// Creates an XML Schema to be used in the PDFA Extension
private XMPSchema createPdfaSchema(String schema, String namespace, String prefix, List<XMPSchema> properties) {
XMPSchema xmpSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaSchema", "pdfaSchema", "pdfaSchema");
xmpSchema.setTextPropertyValue("schema", schema);
xmpSchema.setTextPropertyValue("namespaceURI", namespace);
xmpSchema.setTextPropertyValue("prefix", prefix);
for (XMPSchema property : properties) {
xmpSchema.addUnqualifiedSequenceValue("property", property);
}
return xmpSchema;
}

// Creates an XML Property to be used in the PDFA Extension
private XMPSchema createPdfaProperty(String category, String description, String name, String valueType) {
XMPSchema xmpSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaProperty", "pdfaProperty", "pdfaProperty");
xmpSchema.setTextPropertyValue("name", name);
xmpSchema.setTextPropertyValue("valueType", valueType);
xmpSchema.setTextPropertyValue("category", category);
xmpSchema.setTextPropertyValue("description", description);
return xmpSchema;
}

// Sets the document information dictionary values from html metadata
private void setDidValues(PDDocument doc) {
PDDocumentInformation info = new PDDocumentInformation();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ public PdfRendererBuilder usePdfVersion(float version) {
*/
public PdfRendererBuilder usePdfAConformance(PdfAConformance pdfAConformance) {
this.state._pdfAConformance = pdfAConformance;
this.state._pdfVersion = pdfAConformance.getPdfVersion();
return this;
}

Expand Down Expand Up @@ -301,18 +302,20 @@ public PdfRendererBuilder useSlowMode() {
* PDF/A-1, PDF/A-2 and PDF/A-3
*/
public enum PdfAConformance {
NONE(-1, ""),
PDFA_1_A(1, "A"), PDFA_1_B(1, "B"),
PDFA_2_A(2, "A"), PDFA_2_B(2, "B"), PDFA_2_U(2, "U"),
PDFA_3_A(3, "A"), PDFA_3_B(3, "B"), PDFA_3_U(3, "U");
NONE(-1, "", 0f),
PDFA_1_A(1, "A", 1.4f), PDFA_1_B(1, "B", 1.4f),
PDFA_2_A(2, "A", 1.7f), PDFA_2_B(2, "B", 1.7f), PDFA_2_U(2, "U", 1.7f),
PDFA_3_A(3, "A", 1.7f), PDFA_3_B(3, "B", 1.7f), PDFA_3_U(3, "U", 1.7f);

PdfAConformance(int part, String value) {
PdfAConformance(int part, String value, float pdfVersion) {
this.part = part;
this.value = value;
this.pdfVersion = pdfVersion;
}

private final int part;
private final String value;
private final float pdfVersion;

public String getConformanceValue() {
return this.value;
Expand All @@ -321,6 +324,10 @@ public String getConformanceValue() {
public int getPart() {
return this.part;
}

public float getPdfVersion() {
return this.pdfVersion;
}
}
}

0 comments on commit 138b5b9

Please sign in to comment.