Skip to content

Commit

Permalink
Improve Dublin Core (#3710)
Browse files Browse the repository at this point in the history
This fixes #938

- Reading and writing multiple dublinCore entries works: XMPUtilWriter supports mutliple metadata entries in dublinCore and a single entry in the PDDocumentInformation. If you want to test the reading of multiple entries, the PDF file JabRef_multipleMetaEntries.pdf contains three metadata entries in DublinCore for testing locally.
- Removed to much code when refactoring the XMPUtil. Non XMP metadata are also relevent, when retrieving org.apache.pdfbox.pdmodel.PDDocumentInformation
- Update pdfbox and fontbox from 1.8.13 to 2.0.8 and migritate from jempbox to xmpbox.  See pull #1096.
- Refactor extraction from DublinCoreSchema
- The tests cover the most important use cases, which include reading and writing metadata from pdf files. Both formats, DublinCore and PDMetadata (which are no XMP metadata) are tested.
- Separated XMPUtils in a reader and a writer utitlity class.
- add meaningful names in DublinCoreExtractor and use StringUtils.isNullOrEmpty
- Log exception in XMPUtilShared
  • Loading branch information
johannes-manner authored and koppor committed Feb 20, 2018
1 parent e9e7bcc commit ea8ccb3
Show file tree
Hide file tree
Showing 39 changed files with 1,389 additions and 3,268 deletions.
17 changes: 3 additions & 14 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,9 @@ dependencies {
compile 'com.jgoodies:jgoodies-common:1.8.1'
compile 'com.jgoodies:jgoodies-forms:1.9.0'

// update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517
compile 'org.apache.pdfbox:pdfbox:1.8.13'
compile 'org.apache.pdfbox:fontbox:1.8.13'
compile 'org.apache.pdfbox:jempbox:1.8.13'
compile 'org.apache.pdfbox:pdfbox:2.0.8'
compile 'org.apache.pdfbox:fontbox:2.0.8'
compile 'org.apache.pdfbox:xmpbox:2.0.8'

// required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635
compile 'org.bouncycastle:bcprov-jdk15on:1.59'
Expand Down Expand Up @@ -216,16 +215,6 @@ dependencyUpdates.resolutionStrategy = {
selection.reject("Cannot be upgraded to version 2")
}
}
withModule("org.apache.pdfbox:fontbox") { ComponentSelection selection ->
if (selection.candidate.version ==~ /2.*/) {
selection.reject("update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517")
}
}
withModule("org.apache.pdfbox:pdfbox") { ComponentSelection selection ->
if (selection.candidate.version ==~ /2.*/) {
selection.reject("update to 2.0.x is not possible - see https://github.com/JabRef/jabref/pull/1096#issuecomment-208857517")
}
}
withModule("mysql:mysql-connector-java") { ComponentSelection selection ->
if (selection.candidate.version ==~ /[6-9].*/) {
selection.reject("http://dev.mysql.com/downloads/connector/j/ lists the version 5.* as last stable version.")
Expand Down
85 changes: 41 additions & 44 deletions src/main/java/org/jabref/cli/XMPUtilMain.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
package org.jabref.cli;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
Expand All @@ -17,14 +16,15 @@
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.fileformat.BibtexParser;
import org.jabref.logic.xmp.XMPPreferences;
import org.jabref.logic.xmp.XMPUtil;
import org.jabref.logic.xmp.XmpPreferences;
import org.jabref.logic.xmp.XmpUtilReader;
import org.jabref.logic.xmp.XmpUtilWriter;
import org.jabref.model.database.BibDatabaseMode;
import org.jabref.model.entry.BibEntry;
import org.jabref.preferences.JabRefPreferences;

import org.apache.jempbox.impl.XMLUtil;
import org.apache.jempbox.xmp.XMPMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.xml.XmpSerializer;

public class XMPUtilMain {

Expand Down Expand Up @@ -62,18 +62,16 @@ public static void main(String[] args) throws IOException, TransformerException
Globals.prefs = JabRefPreferences.getInstance();
}

XMPPreferences xmpPreferences = Globals.prefs.getXMPPreferences();
XmpPreferences xmpPreferences = Globals.prefs.getXMPPreferences();
ImportFormatPreferences importFormatPreferences = Globals.prefs.getImportFormatPreferences();

switch (args.length) {
case 0:
int argsLength = args.length;
if (argsLength == 0) {
usage();
break;
case 1:

} else if (argsLength == 1) {
if (args[0].endsWith(".pdf")) {
// Read from pdf and write as BibTex
List<BibEntry> l = XMPUtil.readXMP(new File(args[0]), xmpPreferences);
List<BibEntry> l = XmpUtilReader.readXmp(args[0], xmpPreferences);

BibEntryWriter bibtexEntryWriter = new BibEntryWriter(
new LatexFieldFormatter(Globals.prefs.getLatexFieldFormatterPreferences()), false);
Expand All @@ -92,63 +90,62 @@ public static void main(String[] args) throws IOException, TransformerException

if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
System.out.println(XMPUtil.toXMP(entries, result.getDatabase(), xmpPreferences));
}
}
} else {
usage();
}
break;
case 2:
} else if (argsLength == 2) {
if ("-x".equals(args[0]) && args[1].endsWith(".pdf")) {
// Read from pdf and write as BibTex
Optional<XMPMetadata> meta = XMPUtil.readRawXMP(new File(args[1]));
List<XMPMetadata> meta = XmpUtilReader.readRawXmp(Paths.get(args[1]));

if (meta.isPresent()) {
XMLUtil.save(meta.get().getXMPDocument(), System.out, StandardCharsets.UTF_8.name());
if (!meta.isEmpty()) {
XmpSerializer serializer = new XmpSerializer();
serializer.serialize(meta.get(0), System.out, true);
} else {
System.err.println("The given pdf does not contain any XMP-metadata.");
}
break;
return;
}

if (args[0].endsWith(".bib") && args[1].endsWith(".pdf")) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[0]));
try (FileReader reader = new FileReader(args[0])) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);

Collection<BibEntry> entries = result.getDatabase().getEntries();
List<BibEntry> entries = result.getDatabase().getEntries();

if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
XMPUtil.writeXMP(new File(args[1]), entries, result.getDatabase(), false, xmpPreferences);
System.out.println("XMP written.");
if (entries.isEmpty()) {
System.err.println("Could not find BibEntry in " + args[0]);
} else {
XmpUtilWriter.writeXmp(Paths.get(args[1]), entries, result.getDatabase(), xmpPreferences);
System.out.println("XMP written.");
}
}
break;
return;
}

usage();
break;
case 3:
} else if (argsLength == 3) {
if (!args[1].endsWith(".bib") && !args[2].endsWith(".pdf")) {
usage();
break;
return;
}

ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(new FileReader(args[1]));
try (FileReader reader = new FileReader(args[1])) {
ParserResult result = new BibtexParser(importFormatPreferences, Globals.getFileUpdateMonitor()).parse(reader);

Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);
Optional<BibEntry> bibEntry = result.getDatabase().getEntryByKey(args[0]);

if (bibEntry.isPresent()) {
XMPUtil.writeXMP(new File(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);
if (bibEntry.isPresent()) {
XmpUtilWriter.writeXmp(Paths.get(args[2]), bibEntry.get(), result.getDatabase(), xmpPreferences);

System.out.println("XMP written.");
} else {
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
System.out.println("XMP written.");
} else {
System.err.println("Could not find BibEntry " + args[0] + " in " + args[0]);
}
}
break;

default:
} else {
usage();
}
}
Expand All @@ -167,13 +164,13 @@ private static void usage() {
System.out.println("Read from PDF and print raw XMP:");
System.out.println(" xmpUtil -x <pdf>");
System.out
.println("Write the entry in <bib> given by <key> to the PDF:");
.println("Write the entry in <bib> given by <key> to the PDF:");
System.out.println(" xmpUtil <key> <bib> <pdf>");
System.out.println("Write all entries in <bib> to the PDF:");
System.out.println(" xmpUtil <bib> <pdf>");
System.out.println("");
System.out
.println("To report bugs visit https://issues.jabref.org");
.println("To report bugs visit https://issues.jabref.org");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
import javafx.embed.swing.SwingFXUtils;
import javafx.scene.image.Image;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;

/**
* Represents the view model of a pdf page backed by a {@link PDPage}.
Expand All @@ -18,10 +21,12 @@ public class PdfDocumentPageViewModel extends DocumentPageViewModel {

private final PDPage page;
private final int pageNumber;
private final PDDocument document;

public PdfDocumentPageViewModel(PDPage page, int pageNumber) {
public PdfDocumentPageViewModel(PDPage page, int pageNumber, PDDocument document) {
this.page = Objects.requireNonNull(page);
this.pageNumber = pageNumber;
this.document = document;
}

// Taken from http://stackoverflow.com/a/9417836/873661
Expand All @@ -37,10 +42,12 @@ private static BufferedImage resize(BufferedImage img, int newWidth, int newHeig
}

@Override
// Taken from https://stackoverflow.com/questions/23326562/apache-pdfbox-convert-pdf-to-images
public Image render(int width, int height) {
PDFRenderer renderer = new PDFRenderer(document);
try {
int resolution = 96;
BufferedImage image = page.convertToImage(BufferedImage.TYPE_INT_RGB, 2 * resolution);
BufferedImage image = renderer.renderImageWithDPI(pageNumber, 2 * resolution, ImageType.RGB);
return SwingFXUtils.toFXImage(resize(image, width, height), null);
} catch (IOException e) {
// TODO: LOG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import javafx.collections.ObservableList;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;

public class PdfDocumentViewModel extends DocumentViewModel {

Expand All @@ -21,13 +21,12 @@ public PdfDocumentViewModel(PDDocument document) {

@Override
public ObservableList<DocumentPageViewModel> getPages() {
@SuppressWarnings("unchecked")
List<PDPage> pages = document.getDocumentCatalog().getAllPages();
PDPageTree pages = document.getDocumentCatalog().getPages();

// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
List<PdfDocumentPageViewModel> pdfPages = new ArrayList<>();
for (int i = 0; i < pages.size(); i++) {
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1));
// There is apparently no neat way to get the page number from a PDPage...thus this old-style for loop
for (int i = 0; i < pages.getCount(); i++) {
pdfPages.add(new PdfDocumentPageViewModel(pages.get(i), i + 1, document));
}
return FXCollections.observableArrayList(pdfPages);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import org.jabref.gui.util.DefaultTaskExecutor;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.logic.xmp.XMPUtil;
import org.jabref.logic.xmp.XmpUtilReader;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
Expand Down Expand Up @@ -228,7 +228,7 @@ private boolean tryXmpImport(String fileName, ExternalFileType fileType, NamedCo

List<BibEntry> xmpEntriesInFile;
try {
xmpEntriesInFile = XMPUtil.readXMP(fileName, Globals.prefs.getXMPPreferences());
xmpEntriesInFile = XmpUtilReader.readXmp(fileName, Globals.prefs.getXMPPreferences());
} catch (IOException e) {
LOGGER.warn("Problem reading XMP", e);
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import org.jabref.gui.keyboard.KeyBinding;
import org.jabref.gui.worker.AbstractWorker;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.xmp.XMPUtil;
import org.jabref.logic.xmp.XmpUtilWriter;
import org.jabref.model.database.BibDatabase;
import org.jabref.model.entry.BibEntry;

Expand Down Expand Up @@ -133,7 +133,7 @@ public void run() {
for (Path file : files) {
if (Files.exists(file)) {
try {
XMPUtil.writeXMP(file.toFile(), entry, database, Globals.prefs.getXMPPreferences());
XmpUtilWriter.writeXmp(file, entry, database, Globals.prefs.getXMPPreferences());
SwingUtilities.invokeLater(
() -> optDiag.getProgressArea().append(" " + Localization.lang("OK") + ".\n"));
entriesChanged++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import org.jabref.logic.cleanup.RenamePdfCleanup;
import org.jabref.logic.l10n.Localization;
import org.jabref.logic.util.io.FileUtil;
import org.jabref.logic.xmp.XMPUtil;
import org.jabref.logic.xmp.XmpUtilWriter;
import org.jabref.model.database.BibDatabaseContext;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.LinkedFile;
Expand Down Expand Up @@ -331,7 +331,7 @@ public void writeXMPMetadata() {
// Localization.lang("PDF does not exist");
} else {
try {
XMPUtil.writeXMP(file.get(), entry, databaseContext.getDatabase(), Globals.prefs.getXMPPreferences());
XmpUtilWriter.writeXmp(file.get(), entry, databaseContext.getDatabase(), Globals.prefs.getXMPPreferences());
} catch (IOException | TransformerException ex) {
// TODO: Print error message
// Localization.lang("Error while writing") + " '" + file.toString() + "': " + ex;
Expand Down
Loading

0 comments on commit ea8ccb3

Please sign in to comment.