Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/annotation #307

Merged
merged 9 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SHELL ?= /bin/bash
endif

#JAR_VERSION := $(shell mvn -q -Dexec.executable="echo" -Dexec.args='$${project.version}' --non-recursive exec:exec -DforceStdout)
JAR_VERSION := 2.06
JAR_VERSION := 2.07
JAR_FILE := mn2pdf-$(JAR_VERSION).jar

all: target/$(JAR_FILE)
Expand Down
10 changes: 5 additions & 5 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ You will need the Java Development Kit (JDK) version 8, Update 241 (8u241) or hi

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file <XML-FileName> --xsl-file <XSLT-FileName> --pdf-file <Output-PDF-FileName> [--syntax-highlight]
----

e.g.

[source,sh]
----
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.06.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
java -Xss5m -Xmx2048m -jar target/mn2pdf-2.07.jar --xml-file tests/G.191.xml --xsl-file tests/itu.recommendation.xsl --pdf-file tests/G.191.pdf
----

=== PDF encryption features
Expand Down Expand Up @@ -100,7 +100,7 @@ Update version in `pom.xml`, e.g.:
----
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>2.06</version>
<version>2.07</version>
<name>Metanorma XML to PDF converter</name>
----

Expand All @@ -111,8 +111,8 @@ Tag the same version in Git:

[source,xml]
----
git tag v2.06
git push origin v2.06
git tag v2.07
git push origin v2.07
----

Then the corresponding GitHub release will be automatically created at:
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.metanorma.fop</groupId>
<artifactId>mn2pdf</artifactId>
<version>2.06</version>
<version>2.07</version>
<name>Metanorma XML to PDF converter</name>
<packaging>jar</packaging>
<url>https://www.metanorma.org</url>
Expand Down
249 changes: 224 additions & 25 deletions src/main/java/org/metanorma/fop/annotations/Annotation.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import java.util.ArrayList;
import java.util.*;
import java.io.StringReader;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
Expand All @@ -31,7 +27,20 @@
import javax.xml.xpath.XPathException;
import javax.xml.xpath.XPathFactory;

import org.apache.fop.pdf.PDFObject;
import org.apache.pdfbox.cos.*;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.COSObjectable;
import org.apache.pdfbox.pdmodel.common.PDNumberTreeNode;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDObjectReference;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDParentTreeValue;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDStructureTreeRoot;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import org.apache.pdfbox.pdmodel.documentinterchange.taggedpdf.StandardStructureTypes;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.xml.sax.InputSource;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.fdf.FDFAnnotation;
Expand All @@ -56,7 +65,12 @@ public class Annotation {
protected static final Logger logger = Logger.getLogger(LoggerHelper.LOGGER_NAME);

private boolean DEBUG = false;


private final String ANNOT_PREFIX = "Annot___";
private HashMap<String,PDAnnotation> hashMapDocumentAnnotations = new HashMap<>();

private PDStructureTreeRoot structureTreeRoot;

public void process(File pdf, String xmlReview) throws IOException {
PDDocument document = null;

Expand Down Expand Up @@ -114,9 +128,9 @@ public void process(File pdf, String xmlReview) throws IOException {


if (DEBUG) {
System.out.println("page=" + page);
/*System.out.println("page=" + page);
System.out.println("x=" + x);
System.out.println("y=" + y);
System.out.println("y=" + y);*/
}

AnnotationArea annotationArea = new AnnotationArea();
Expand Down Expand Up @@ -225,7 +239,7 @@ public void process(File pdf, String xmlReview) throws IOException {
}

if (DEBUG) {
System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition()));
//System.out.println("postItPopup position=" + Arrays.toString(annotationArea.getPosition()));
}

Node node_popup = ((Element)node_annotation).getElementsByTagName("popup").item(0);
Expand Down Expand Up @@ -253,55 +267,240 @@ public void process(File pdf, String xmlReview) throws IOException {
xmlwriter.write(updatedXMLReview);
}
}



// import XFDF annotation xml

FDFDocument fdfDoc = FDFDocument.loadXFDF(new ByteArrayInputStream(updatedXMLReview.getBytes(StandardCharsets.UTF_8)));
List<FDFAnnotation> fdfAnnots = fdfDoc.getCatalog().getFDF().getAnnotations();

// group annotations relate to one page and add them into page
HashMap<Integer,List<PDAnnotation>> map_pdfannots = new HashMap<>();

HashMap<Integer,List<PDAnnotation>> mapPDFannots = new HashMap<>();
for (int i=0; i<fdfDoc.getCatalog().getFDF().getAnnotations().size(); i++) {
FDFAnnotation fdfannot = fdfAnnots.get(i);
int page = fdfannot.getPage();

PDAnnotation pdfannot = PDAnnotation.createAnnotation(fdfannot.getCOSObject());

pdfannot.constructAppearances(); // requires for PDF/A

if (map_pdfannots.get(page) == null) {
map_pdfannots.put(page, new ArrayList<PDAnnotation>());
if (mapPDFannots.get(page) == null) {
mapPDFannots.put(page, new ArrayList<PDAnnotation>());
}
map_pdfannots.get(page).add(pdfannot);
mapPDFannots.get(page).add(pdfannot);
}
for (Map.Entry<Integer,List<PDAnnotation>> set: map_pdfannots.entrySet()) {

for (Map.Entry<Integer,List<PDAnnotation>> set: mapPDFannots.entrySet()) {
PDPage page = document.getPage(set.getKey());
List<PDAnnotation> pageAnotations = page.getAnnotations();
// merge existing annotations (including hyperlinks) and new annotations
pageAnotations.addAll(set.getValue());
//document.getPage(set.getKey()).setAnnotations(set.getValue());
document.getPage(set.getKey()).setAnnotations(pageAnotations);
}

fdfDoc.close();

document.save(pdf);

} catch (IOException | NumberFormatException | ParserConfigurationException | DOMException | TransformerException | SAXException | XPathException ex) {
logger.severe("Can't read annotation data from xml.");
ex.printStackTrace();
}



// add Annot tag for the text annotation
try {
document = PDDocument.load(pdf); // important
hashMapDocumentAnnotations = getAnnotationIDmap(document);

structureTreeRoot = document.getDocumentCatalog().getStructureTreeRoot();
COSArray aDocument = (COSArray) structureTreeRoot.getK();
fixAnnotationTags(aDocument, null, 0);

clearEmptyAnnotations(document);

document.save(pdf);
} catch (IOException ex) {
logger.severe("Can't enclose the annotation into the Annot tag.");
ex.printStackTrace();
}
// END Annot tag adding

} finally {
if( document != null ) {
document.close();
}
}

}


private HashMap<String,PDAnnotation> getAnnotationIDmap(PDDocument document) throws IOException {
HashMap<String,PDAnnotation> hashMapDocumentAnnotations = new HashMap<>();
for(int i = 0; i< document.getNumberOfPages(); i++) {
PDPage page = document.getPage(i);
for (PDAnnotation pdAnnotation: page.getAnnotations()){
COSDictionary pdAnnotationDict = pdAnnotation.getCOSObject();
if (pdAnnotationDict != null) {
// subject contains id 'Annot___', see xfdf_simple.xsl, attribute 'subject'
String subj = pdAnnotationDict.getString(COSName.SUBJ);
if (subj != null && subj.startsWith(ANNOT_PREFIX)) {
hashMapDocumentAnnotations.put(subj, pdAnnotation);
}
}
}
}
return hashMapDocumentAnnotations;
}

private void fixAnnotationTags(COSArray oArray, COSObject parentObject, int level) throws IOException {

if (oArray != null) {
for(int i = 0; i < oArray.size(); i++) {
COSObject oArrayItem = (COSObject) oArray.get(i);

COSName cName = (COSName) oArrayItem.getItem(COSName.S);
if (cName != null) {
String tagName = cName.getName();

String levelPrefix = Collections.nCopies(level, " ").toString()
.replace("[", "")
.replace("]", "")
.replace(", ", "");
if (DEBUG) {
System.out.println(levelPrefix + tagName);
}

if (tagName.equals("Annot")) {
COSBase cbAlt = oArrayItem.getItem(COSName.ALT);
if (cbAlt != null) {
String tagAlt = ((COSString)cbAlt).toString();
String COSSTRING_PREFIX = "COSString{";
if (tagAlt.startsWith(COSSTRING_PREFIX + ANNOT_PREFIX)) {
// here replace exising tag Annot with new tag Annot

String annotationId = tagAlt.substring(COSSTRING_PREFIX.length(), tagAlt.length() - 1);

if (DEBUG) {
System.out.println(levelPrefix + "id=" + tagAlt);
}

// add the annotation element
COSDictionary anDict = new COSDictionary();
// set Tag name (S)
anDict.setItem(COSName.S, COSName.ANNOT);
// set Parent (P)
anDict.setItem(COSName.P, parentObject); //oArrayItem oArray
// set Page (PG)
COSArray oArrayK = (COSArray) oArrayItem.getItem(COSName.K);
anDict.setItem(COSName.PG, ((COSObject)oArrayK.get(0)).getItem(COSName.PG));

PDObjectReference objRef = new PDObjectReference();
anDict.setItem(COSName.K, objRef);

PDAnnotation foundAnnotation = hashMapDocumentAnnotations.get(annotationId);
objRef.setReferencedObject(foundAnnotation);

if (DEBUG) {
System.out.println(oArrayItem.getItem(COSName.K));
}

try {
oArrayItem.setObject(anDict);

// from https://stackoverflow.com/questions/79083813/how-to-add-the-annotation-tag-in-tagged-pdf-using-pdfbox

int parentTreeNextKey = structureTreeRoot.getParentTreeNextKey(); // -1, ignored here

// assign a number to the annotation and insert the annotation element into the parent tree, and set ParentTreeNextKey
PDNumberTreeNode parentTree = structureTreeRoot.getParentTree();
Map<Integer, COSObjectable> numberTreeAsMap = getNumberTreeAsMap(parentTree);
Set<Integer> keySet = numberTreeAsMap.keySet();

if (parentTreeNextKey == -1) {
parentTreeNextKey = keySet.stream().reduce(Integer::max).get() + 1;
}

foundAnnotation.setStructParent(parentTreeNextKey);
structureTreeRoot.setParentTreeNextKey(parentTreeNextKey + 1);
numberTreeAsMap.put(parentTreeNextKey, anDict);
parentTree = new PDNumberTreeNode(PDParentTreeValue.class);
parentTree.setNumbers(numberTreeAsMap);
structureTreeRoot.setParentTree(parentTree);
// END from stackoverflow
} catch (IOException e) {
logger.severe("ParentTreeKey update error:" + e.toString());
}
}
}
}
}
try {
COSArray oA_K = (COSArray) oArrayItem.getItem(COSName.K);
fixAnnotationTags(oA_K, oArrayItem, ++level);
} catch (Exception e) {
//
}
}
}
}

private void clearEmptyAnnotations(PDDocument document) throws IOException {
for (int i = 0; i < document.getNumberOfPages(); i++)
{
List<PDAnnotation> pageAnnotations = new ArrayList<>();
PDPage page = document.getPage(i);
for(PDAnnotation pageAnnotation: page.getAnnotations()) {
boolean process = true;
if(pageAnnotation.getContents() != null && pageAnnotation.getContents().startsWith(ANNOT_PREFIX)) {
process = false;
}
// if link with alt-text Annot___ placed near the clause block, then the Contents changed to something like '1 Scope'
// therefore need remove links with small difference between coordinates
if (process) {
COSArray rect = pageAnnotation.getCOSObject().getCOSArray(COSName.RECT);
if (rect != null) {
float x1 = ((COSFloat)rect.get(0)).floatValue();
float x2 = ((COSFloat)rect.get(2)).floatValue();
if (x2 - x1 < 0.07f) {
process = false;
}
}
}

if (process) {
// clear Subject field with 'Annot___', see xfdf_simple.xsl, attribute 'subject'
String subj = pageAnnotation.getCOSObject().getString(COSName.SUBJ);
if (subj != null && subj.startsWith(ANNOT_PREFIX)) {
pageAnnotation.getCOSObject().setItem(COSName.SUBJ, null);
}
pageAnnotations.add(pageAnnotation);
}
}
document.getPage(i).setAnnotations(pageAnnotations);
}
}

private Map<Integer, COSObjectable> getNumberTreeAsMap(PDNumberTreeNode tree) throws IOException {
if (tree == null)
{
return new LinkedHashMap<>();
}
Map<Integer, COSObjectable> numbers = tree.getNumbers();
if (numbers == null)
{
numbers = new LinkedHashMap<>();
}
else
{
// must copy because the map is read only
numbers = new LinkedHashMap<>(numbers);
}
List<PDNumberTreeNode> kids = tree.getKids();
if (kids != null)
{
for (PDNumberTreeNode kid : kids)
{
numbers.putAll(getNumberTreeAsMap(kid));
}
}
return numbers;
}

}
2 changes: 2 additions & 0 deletions src/main/resources/xfdf_simple.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@
<xsl:attribute name="page"><xsl:value-of select="$page - 1"/></xsl:attribute>
<xsl:attribute name="rect"><xsl:value-of select="concat($element_from/@x,',',$element_from/@y)"/></xsl:attribute>
<xsl:attribute name="title"><xsl:value-of select="$reviewer"/></xsl:attribute>
<!-- for relationship between common.xsl alt-text Annot___@id and this annotation -->
<xsl:attribute name="subject">Annot___<xsl:value-of select="@id"/></xsl:attribute>

<xsl:element name="contents-richtext">
<body xmlns="http://www.w3.org/1999/xhtml">
Expand Down
Loading