diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java index 66ccb5de..0b57f1c7 100644 --- a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java +++ b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/font/PDFontTest.java @@ -18,6 +18,7 @@ package com.tom_roush.pdfbox.pdmodel.font; import android.content.Context; +import android.util.Log; import androidx.test.platform.app.InstrumentationRegistry; @@ -28,10 +29,15 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; import com.tom_roush.fontbox.ttf.TTFParser; +import com.tom_roush.fontbox.ttf.TrueTypeCollection; import com.tom_roush.fontbox.ttf.TrueTypeFont; +import com.tom_roush.fontbox.util.autodetect.FontFileFinder; import com.tom_roush.pdfbox.android.PDFBoxResourceLoader; import com.tom_roush.pdfbox.android.TestResourceGenerator; import com.tom_roush.pdfbox.cos.COSName; @@ -205,6 +211,53 @@ public void testPDFox4318() throws IOException } } + @Test + public void testFullEmbeddingTTC() throws IOException + { + FontFileFinder fff = new FontFileFinder(); + TrueTypeCollection ttc = null; + for (URI uri : fff.find()) + { + if (uri.getPath().endsWith(".ttc")) + { + File file = new File(uri); + Log.i("PdfBox-Android", "TrueType collection file: " + file); + ttc = new TrueTypeCollection(file); + break; + } + } + if (ttc == null) + { + Log.i("PdfBox-Android", "testFullEmbeddingTTC skipped, no .ttc files available"); + return; + } + + final List names = new ArrayList(); + ttc.processAllFonts(new TrueTypeCollection.TrueTypeFontProcessor() + { + @Override + public void process(TrueTypeFont ttf) throws IOException + { + Log.i("PdfBox-Android", "TrueType font in collection: " + ttf.getName()); + names.add(ttf.getName()); + } + }); + + TrueTypeFont ttf = ttc.getFontByName(names.get(0)); // take the first one + Log.i("PdfBox-Android", "TrueType font used for test: " + ttf.getName()); + + try + { + PDType0Font.load(new PDDocument(), ttf, false); + } + catch (IOException ex) + { + Assert.assertEquals("Full embedding of TrueType font collections not supported", ex.getMessage()); + return; + } + Assert.fail("should have thrown IOException"); + } + private void testPDFBox3826checkFonts(byte[] byteArray, File fontFile) throws IOException { PDDocument doc = PDDocument.load(byteArray); diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsInstrumentationTest.java similarity index 91% rename from library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java rename to library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsInstrumentationTest.java index 8b6f9b7c..10594d98 100644 --- a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java +++ b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsInstrumentationTest.java @@ -24,24 +24,23 @@ import java.io.File; import java.io.IOException; +import com.tom_roush.pdfbox.android.PDFBoxResourceLoader; import com.tom_roush.pdfbox.pdmodel.PDDocument; import com.tom_roush.pdfbox.rendering.TestRendering; -import com.tom_roush.pdfbox.android.PDFBoxResourceLoader; import org.junit.After; import org.junit.Before; import org.junit.Test; -public class MultilineFieldsTest +public class MultilineFieldsInstrumentationTest { private static File OUT_DIR; private static final String IN_DIR = "pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form"; private static final String NAME_OF_PDF = "MultilineFields.pdf"; - private static final String TEST_VALUE = - "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, " + - "sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam"; + private static final String TEST_VALUE = "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, " + + "sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam"; - Context testContext; + private Context testContext; private PDDocument document; private PDAcroForm acroForm; @@ -51,7 +50,6 @@ public void setUp() throws IOException { testContext = InstrumentationRegistry.getInstrumentation().getContext(); PDFBoxResourceLoader.init(testContext); - System.out.println("Working Directory = " + System.getProperty("user.dir")); document = PDDocument.load(testContext.getAssets().open(IN_DIR + "/" + NAME_OF_PDF)); acroForm = document.getDocumentCatalog().getAcroForm(); OUT_DIR = new File(testContext.getCacheDir(), "pdfbox-test-output"); @@ -110,4 +108,5 @@ public void tearDown() throws IOException { document.close(); } + } diff --git a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java index 3022256f..9b0117e0 100644 --- a/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java +++ b/library/src/androidTest/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroFormFlattenTest.java @@ -284,6 +284,20 @@ public void testFlattenPDFBox4788() throws IOException flattenAndCompare(sourceUrl, targetFileName); } + /** + * PDFBOX-4889: appearance streams with empty /BBox. + * + * @throws IOException + */ + @Test + public void testFlattenPDFBox4889() throws IOException + { + String sourceUrl = "https://issues.apache.org/jira/secure/attachment/13005793/f1040sb%20test.pdf"; + String targetFileName = "PDFBOX-4889.pdf"; + + flattenAndCompare(sourceUrl, targetFileName); + } + /* * Flatten and compare with generated image samples. */ diff --git a/library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java b/library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java index f4b3fda5..ac182c48 100644 --- a/library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java +++ b/library/src/main/java/com/tom_roush/fontbox/cmap/CMapParser.java @@ -16,6 +16,7 @@ */ package com.tom_roush.fontbox.cmap; +import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -122,27 +123,27 @@ public CMap parse(InputStream input) throws IOException if (previousToken != null) { - if (op.op.equals("usecmap")) + if (op.op.equals("usecmap") && previousToken instanceof LiteralName) { parseUsecmap((LiteralName) previousToken, result); } - else if (op.op.equals("begincodespacerange")) + else if (op.op.equals("begincodespacerange") && previousToken instanceof Number) { parseBegincodespacerange((Number) previousToken, cmapStream, result); } - else if (op.op.equals("beginbfchar")) + else if (op.op.equals("beginbfchar") && previousToken instanceof Number) { parseBeginbfchar((Number) previousToken, cmapStream, result); } - else if (op.op.equals("beginbfrange")) + else if (op.op.equals("beginbfrange") && previousToken instanceof Number) { parseBeginbfrange((Number) previousToken, cmapStream, result); } - else if (op.op.equals("begincidchar")) + else if (op.op.equals("begincidchar") && previousToken instanceof Number) { parseBegincidchar((Number) previousToken, cmapStream, result); } - else if (op.op.equals("begincidrange")) + else if (op.op.equals("begincidrange") && previousToken instanceof Integer) { parseBegincidrange((Integer) previousToken, cmapStream, result); } @@ -452,12 +453,7 @@ protected InputStream getExternalCMap(String name) throws IOException return PDFBoxResourceLoader.getStream("com/tom_roush/fontbox/resources/cmap/" + name); } - InputStream is = getClass().getResourceAsStream("/com/tom_roush/fontbox/resources/cmap/" + name); - if (is == null) - { - throw new IOException("Error: Could not find referenced cmap stream " + name); - } - return is; + return new BufferedInputStream(getClass().getResourceAsStream("/com/tom_roush/fontbox/resources/cmap/" + name)); } private Object parseNextToken(PushbackInputStream is) throws IOException diff --git a/library/src/main/java/com/tom_roush/fontbox/cmap/CodespaceRange.java b/library/src/main/java/com/tom_roush/fontbox/cmap/CodespaceRange.java index 4e394fb5..65f47dc5 100644 --- a/library/src/main/java/com/tom_roush/fontbox/cmap/CodespaceRange.java +++ b/library/src/main/java/com/tom_roush/fontbox/cmap/CodespaceRange.java @@ -37,19 +37,26 @@ public class CodespaceRange * <8140> to <9FFC> defines a rectangular range. The high byte has to be within 0x81 and 0x9F and the * low byte has to be within 0x40 and 0xFC * + * @param startBytes + * @param endBytes */ public CodespaceRange(byte[] startBytes, byte[] endBytes) { - if (startBytes.length != endBytes.length) + byte[] correctedStartBytes = startBytes; + if (startBytes.length != endBytes.length && startBytes.length == 1 && startBytes[0] == 0) + { + correctedStartBytes = new byte[endBytes.length]; + } + else if (startBytes.length != endBytes.length) { throw new IllegalArgumentException( "The start and the end values must not have different lengths."); } - start = new int[startBytes.length]; + start = new int[correctedStartBytes.length]; end = new int[endBytes.length]; - for (int i = 0; i < startBytes.length; i++) + for (int i = 0; i < correctedStartBytes.length; i++) { - start[i] = startBytes[i] & 0xFF; + start[i] = correctedStartBytes[i] & 0xFF; end[i] = endBytes[i] & 0xFF; } codeLength = endBytes.length; diff --git a/library/src/main/java/com/tom_roush/fontbox/pfb/PfbParser.java b/library/src/main/java/com/tom_roush/fontbox/pfb/PfbParser.java index be66df1d..6c99e94f 100644 --- a/library/src/main/java/com/tom_roush/fontbox/pfb/PfbParser.java +++ b/library/src/main/java/com/tom_roush/fontbox/pfb/PfbParser.java @@ -140,11 +140,21 @@ private void parsePfb(final byte[] pfb) throws IOException size += in.read() << 8; size += in.read() << 16; size += in.read() << 24; + if (size < 0) + { + throw new IOException("PFB record size is negative: " + size); + } lengths[records] = size; if (pointer >= pfbdata.length) { throw new EOFException("attempted to read past EOF"); } + if (size > pfbdata.length - pointer) + { + throw new IOException("PFB record size (" + size + + ") doesn't fit in buffer, position: " + pointer + + ", total length: " + pfbdata.length); + } int got = in.read(pfbdata, pointer, size); if (got < 0) { diff --git a/library/src/main/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFile.java b/library/src/main/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFile.java index b9cab5c4..00ba94aa 100644 --- a/library/src/main/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFile.java +++ b/library/src/main/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFile.java @@ -148,24 +148,37 @@ private void invalidate() throws IOException @Override public int read(byte[] b, int off, int len) throws IOException { - int leftover = bufend - bufpos; - if (len <= leftover) - { - System.arraycopy(buffer, bufpos, b, off, len); - bufpos += len; - return len; - } - System.arraycopy(buffer, bufpos, b, off, leftover); - bufpos += leftover; - if (fillBuffer() > 0) + int curLen = len; // length of what is left to read (shrinks) + int curOff = off; // offset where to put read data (grows) + int totalRead = 0; + + while (true) { - int bytesRead = read(b, off + leftover, len - leftover); - if (bytesRead > 0) + int leftover = bufend - bufpos; + if (curLen <= leftover) + { + System.arraycopy(buffer, bufpos, b, curOff, curLen); + bufpos += curLen; + return totalRead + curLen; + } + // curLen > leftover, we need to read more than what remains in buffer + System.arraycopy(buffer, bufpos, b, curOff, leftover); + totalRead += leftover; + bufpos += leftover; + if (fillBuffer() > 0) + { + curOff += leftover; + curLen -= leftover; + } + else { - leftover += bytesRead; + if (totalRead == 0) + { + return -1; + } + return totalRead; } } - return leftover > 0 ? leftover : -1; } /** diff --git a/library/src/main/java/com/tom_roush/fontbox/ttf/GlyphSubstitutionTable.java b/library/src/main/java/com/tom_roush/fontbox/ttf/GlyphSubstitutionTable.java index 012d0092..16675ceb 100644 --- a/library/src/main/java/com/tom_roush/fontbox/ttf/GlyphSubstitutionTable.java +++ b/library/src/main/java/com/tom_roush/fontbox/ttf/GlyphSubstitutionTable.java @@ -402,7 +402,7 @@ private Collection getLangSysTables(String scriptTag) * * @param langSysTables The {@code LangSysTable}s indicating {@code FeatureRecord}s to search * for - * @param enabledFeatures An optional whitelist of feature tags ({@code null} to allow all) + * @param enabledFeatures An optional list of feature tags ({@code null} to allow all) * @return The indicated {@code FeatureRecord}s */ private List getFeatureRecords(Collection langSysTables, @@ -510,8 +510,8 @@ private int doLookup(LookupTable lookupTable, int gid) /** * Apply glyph substitutions to the supplied gid. The applicable substitutions are determined by - * the {@code scriptTags} which indicate the language of the gid, and by the - * {@code enabledFeatures} which acts as a whitelist. + * the {@code scriptTags} which indicate the language of the gid, and by the list of + * {@code enabledFeatures}. * * To ensure that a single gid isn't mapped to multiple substitutions, subsequent invocations * with the same gid will return the same result as the first, regardless of script or enabled @@ -519,7 +519,7 @@ private int doLookup(LookupTable lookupTable, int gid) * * @param gid GID * @param scriptTags Script tags applicable to the gid (see {@link OpenTypeScript}) - * @param enabledFeatures Whitelist of features to apply + * @param enabledFeatures list of features to apply */ public int getSubstitution(int gid, String[] scriptTags, List enabledFeatures) { diff --git a/library/src/main/java/com/tom_roush/fontbox/ttf/OS2WindowsMetricsTable.java b/library/src/main/java/com/tom_roush/fontbox/ttf/OS2WindowsMetricsTable.java index 87f7758d..a3cd2707 100644 --- a/library/src/main/java/com/tom_roush/fontbox/ttf/OS2WindowsMetricsTable.java +++ b/library/src/main/java/com/tom_roush/fontbox/ttf/OS2WindowsMetricsTable.java @@ -152,7 +152,7 @@ public class OS2WindowsMetricsTable extends TTFTable *

For Restricted License embedding to take effect, it must be the only level of embedding * selected. */ - public static final short FSTYPE_RESTRICTED = 0x0001; + public static final short FSTYPE_RESTRICTED = 0x0002; /** * Preview and Print embedding: the font may be embedded, and temporarily loaded on the diff --git a/library/src/main/java/com/tom_roush/fontbox/ttf/OpenTypeScript.java b/library/src/main/java/com/tom_roush/fontbox/ttf/OpenTypeScript.java index d2bbef7b..feabbbeb 100644 --- a/library/src/main/java/com/tom_roush/fontbox/ttf/OpenTypeScript.java +++ b/library/src/main/java/com/tom_roush/fontbox/ttf/OpenTypeScript.java @@ -18,6 +18,7 @@ import android.util.Log; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -222,21 +223,13 @@ public final class OpenTypeScript { if (PDFBoxResourceLoader.isReady()) { - input = PDFBoxResourceLoader.getStream(path); + input = new BufferedInputStream(PDFBoxResourceLoader.getStream(path)); } else { - // Fallback - input = OpenTypeScript.class.getResourceAsStream(path); - } - if (input != null) - { - parseScriptsFile(input); - } - else - { - Log.w("PdfBox-Android", "Could not find '" + path + "', mirroring char map will be empty: "); + input = new BufferedInputStream(OpenTypeScript.class.getResourceAsStream(path)); } + parseScriptsFile(input); } catch (IOException e) { diff --git a/library/src/main/java/com/tom_roush/pdfbox/contentstream/PDFStreamEngine.java b/library/src/main/java/com/tom_roush/pdfbox/contentstream/PDFStreamEngine.java index 4015b9c9..a399e3de 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/contentstream/PDFStreamEngine.java +++ b/library/src/main/java/com/tom_roush/pdfbox/contentstream/PDFStreamEngine.java @@ -405,7 +405,12 @@ protected final void processTilingPattern(PDTilingPattern tilingPattern, PDColor // clip to bounding box clipToRect(tilingPattern.getBBox()); + // save text matrices (pattern stream may contain BT/ET, see PDFBOX-4896) + Matrix textMatrixSave = textMatrix; + Matrix textLineMatrixSave = textLineMatrix; processStreamOperators(tilingPattern); + textMatrix = textMatrixSave; + textLineMatrix = textLineMatrixSave; initialMatrix = parentMatrix; restoreGraphicsStack(savedStack); @@ -735,13 +740,7 @@ protected void showText(byte[] string) throws IOException Vector w = font.getDisplacement(code); // process the decoded glyph - saveGraphicsState(); - Matrix textMatrixOld = textMatrix; - Matrix textLineMatrixOld = textLineMatrix; showGlyph(textRenderingMatrix, font, code, w); - textMatrix = textMatrixOld; - textLineMatrix = textLineMatrixOld; - restoreGraphicsState(); // calculate the combined displacements float tx; @@ -802,8 +801,8 @@ protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Vector displacement) throws IOException { - // call deprecated method to ensure binary compatibility - showGlyph(textRenderingMatrix, font, code, null, displacement); + // call deprecated method to ensure binary compatibility if not overridden + showGlyph(textRenderingMatrix, font, code, font.toUnicode(code), displacement); } /** @@ -840,7 +839,7 @@ protected void showFontGlyph(Matrix textRenderingMatrix, PDFont font, int code, { // overridden in subclasses // call deprecated method to ensure binary compatibility if not overridden - showFontGlyph(textRenderingMatrix, font, code, null, displacement); + showFontGlyph(textRenderingMatrix, font, code, font.toUnicode(code), displacement); } /** @@ -853,6 +852,8 @@ protected void showFontGlyph(Matrix textRenderingMatrix, PDFont font, int code, * @param unicode the Unicode text for this glyph, or null if the PDF does provide it * @param displacement the displacement (i.e. advance) of the glyph in text space * @throws IOException if the glyph cannot be processed + * + * @deprecated use {@link #showType3Glyph(Matrix, PDType3Font, int, Vector)} instead */ protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int code, String unicode, Vector displacement) throws IOException @@ -878,7 +879,7 @@ protected void showType3Glyph(Matrix textRenderingMatrix, PDType3Font font, int Vector displacement) throws IOException { // call deprecated method to ensure binary compatibility if not overridden - showType3Glyph(textRenderingMatrix, font, code, null, displacement); + showType3Glyph(textRenderingMatrix, font, code, font.toUnicode(code), displacement); } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/cos/COSFloat.java b/library/src/main/java/com/tom_roush/pdfbox/cos/COSFloat.java index 68038897..94cfddb7 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/cos/COSFloat.java +++ b/library/src/main/java/com/tom_roush/pdfbox/cos/COSFloat.java @@ -107,13 +107,13 @@ private void checkMinMaxValues() // check for very small values else if (floatValue == 0 && doubleValue != 0 && Math.abs(doubleValue) < Float.MIN_NORMAL) { - floatValue = Float.MIN_NORMAL; - floatValue *= doubleValue >= 0 ? 1 : -1; + // values smaller than the smallest possible float value are converted to 0 + // see PDF spec, chapter 2 of Appendix C Implementation Limits valueReplaced = true; } if (valueReplaced) { - value = new BigDecimal(floatValue); + value = BigDecimal.valueOf(floatValue); valueAsString = removeNullDigits(value.toPlainString()); } } @@ -146,6 +146,8 @@ public float floatValue() * The value of the double object that this one wraps. * * @return The double of this object. + * + * @deprecated will be removed in a future release */ @Override public double doubleValue() diff --git a/library/src/main/java/com/tom_roush/pdfbox/cos/COSInteger.java b/library/src/main/java/com/tom_roush/pdfbox/cos/COSInteger.java index 2ed92c7a..76e4e8d2 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/cos/COSInteger.java +++ b/library/src/main/java/com/tom_roush/pdfbox/cos/COSInteger.java @@ -143,6 +143,8 @@ public float floatValue() * polymorphic access to value as float. * * @return The double value of this object. + * + * @deprecated will be removed in a future release */ @Override public double doubleValue() diff --git a/library/src/main/java/com/tom_roush/pdfbox/cos/COSNumber.java b/library/src/main/java/com/tom_roush/pdfbox/cos/COSNumber.java index aca4fa48..101fba16 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/cos/COSNumber.java +++ b/library/src/main/java/com/tom_roush/pdfbox/cos/COSNumber.java @@ -49,6 +49,8 @@ public abstract class COSNumber extends COSBase * This will get the double value of this number. * * @return The double value of this number. + * + * @deprecated will be removed in a future release */ public abstract double doubleValue(); @@ -82,7 +84,7 @@ public static COSNumber get( String number ) throws IOException char digit = number.charAt(0); if ('0' <= digit && digit <= '9') { - return COSInteger.get(digit - '0'); + return COSInteger.get((long) digit - '0'); } else if (digit == '-' || digit == '.') { @@ -94,25 +96,43 @@ else if (digit == '-' || digit == '.') throw new IOException("Not a number: " + number); } } - else if (number.indexOf('.') == -1 && (number.toLowerCase().indexOf('e') == -1)) + if (isFloat(number)) + { + return new COSFloat(number); + } + try { - try + if (number.charAt(0) == '+') { - if (number.charAt(0) == '+') - { - return COSInteger.get(Long.parseLong(number.substring(1))); - } - return COSInteger.get(Long.parseLong(number)); + // PDFBOX-2569: some numbers start with "+" + return COSInteger.get(Long.parseLong(number.substring(1))); } - catch( NumberFormatException e ) + return COSInteger.get(Long.parseLong(number)); + } + catch (NumberFormatException e) + { + // check if the given string could be a number at all + String numberString = number.startsWith("+") || number.startsWith("-") + ? number.substring(1) : number; + if (!numberString.matches("[0-9]*")) { - // might be a huge number, see PDFBOX-3116 - return new COSFloat(number); + throw new IOException("Not a number: " + number); } + return null; } - else + } + + private static boolean isFloat( String number ) + { + int length = number.length(); + for (int i = 0; i < length; i++) { - return new COSFloat(number); + char digit = number.charAt(i); + if (digit == '.' || digit == 'e') + { + return true; + } } + return false; } } diff --git a/library/src/main/java/com/tom_roush/pdfbox/filter/Predictor.java b/library/src/main/java/com/tom_roush/pdfbox/filter/Predictor.java index 31d8b278..aedc6261 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/filter/Predictor.java +++ b/library/src/main/java/com/tom_roush/pdfbox/filter/Predictor.java @@ -17,13 +17,11 @@ import java.io.FilterOutputStream; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.Arrays; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSName; -import com.tom_roush.pdfbox.io.IOUtils; /** * Helper class to contain predictor decoding used by Flate and LZW filter. @@ -203,53 +201,6 @@ else if (absb <= absc) } } - static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream in, OutputStream out) - throws IOException - { - if (predictor == 1) - { - // no prediction - IOUtils.copy(in, out); - } - else - { - // calculate sizes - final int rowlength = calculateRowLength(colors, bitsPerComponent, columns); - byte[] actline = new byte[rowlength]; - byte[] lastline = new byte[rowlength]; - - int linepredictor = predictor; - - while (in.available() > 0) - { - // test for PNG predictor; each value >= 10 (not only 15) indicates usage of PNG predictor - if (predictor >= 10) - { - // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) - // read per line predictor - linepredictor = in.read(); - if (linepredictor == -1) - { - return; - } - // add 10 to tread value 0 as 10, 1 as 11, ... - linepredictor += 10; - } - - // read line - int i, offset = 0; - while (offset < rowlength && ((i = in.read(actline, offset, rowlength - offset)) != -1)) - { - offset += i; - } - - decodePredictorRow(linepredictor, colors, bitsPerComponent, columns, actline, lastline); - System.arraycopy(actline, 0, lastline, 0, rowlength); - out.write(actline); - } - } - } - static int calculateRowLength(int colors, int bitsPerComponent, int columns) { final int bitsPerPixel = colors * bitsPerComponent; diff --git a/library/src/main/java/com/tom_roush/pdfbox/multipdf/Overlay.java b/library/src/main/java/com/tom_roush/pdfbox/multipdf/Overlay.java index 07965001..501179c5 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/multipdf/Overlay.java +++ b/library/src/main/java/com/tom_roush/pdfbox/multipdf/Overlay.java @@ -128,7 +128,8 @@ public PDDocument overlay(Map specificPageOverlayFile) throws I } /** - * This will add overlays documents to a document. + * This will add overlays documents to a document. If you created the overlay documents with + * subsetted fonts, you need to save them first so that the subsetting gets done. * * @param specificPageOverlayDocuments Optional map of overlay documents for specific pages. The * page numbers are 1-based. The map must be empty (but not null) if no specific mappings are @@ -633,7 +634,8 @@ public void setDefaultOverlayFile(String defaultOverlayFile) } /** - * Sets the default overlay PDF. + * Sets the default overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param defaultOverlayPDF the default overlay PDF */ @@ -663,7 +665,8 @@ public void setFirstPageOverlayFile(String firstPageOverlayFile) } /** - * Sets the first page overlay PDF. + * Sets the first page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param firstPageOverlayPDF the first page overlay PDF */ @@ -683,7 +686,8 @@ public void setLastPageOverlayFile(String lastPageOverlayFile) } /** - * Sets the last page overlay PDF. + * Sets the last page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param lastPageOverlayPDF the last page overlay PDF */ @@ -703,7 +707,8 @@ public void setAllPagesOverlayFile(String allPagesOverlayFile) } /** - * Sets the all pages overlay PDF. + * Sets the all pages overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param allPagesOverlayPDF the all pages overlay PDF. This should not be a PDDocument that you * created on the fly, it should be saved first, if it contains any fonts that are subset. @@ -724,7 +729,8 @@ public void setOddPageOverlayFile(String oddPageOverlayFile) } /** - * Sets the odd page overlay PDF. + * Sets the odd page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param oddPageOverlayPDF the odd page overlay PDF */ @@ -744,7 +750,8 @@ public void setEvenPageOverlayFile(String evenPageOverlayFile) } /** - * Sets the even page overlay PDF. + * Sets the even page overlay PDF. If you created the overlay document with + * subsetted fonts, you need to save it first so that the subsetting gets done. * * @param evenPageOverlayPDF the even page overlay PDF */ diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/BaseParser.java b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/BaseParser.java index 9fefb8d6..4b29c23c 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/BaseParser.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/BaseParser.java @@ -473,7 +473,6 @@ else if( ch == '\\' ) case '5': case '6': case '7': - { StringBuilder octal = new StringBuilder(); octal.append( next ); c = seqSource.read(); @@ -508,13 +507,10 @@ else if( ch == '\\' ) } out.write(character); break; - } default: - { // dropping the backslash // see 7.3.4.2 Literal Strings for further information out.write(next); - } } } else @@ -835,15 +831,11 @@ else if( c == 'f' ) */ protected COSBase parseDirObject() throws IOException { - COSBase retval = null; - skipSpaces(); - int nextByte = seqSource.peek(); - char c = (char)nextByte; + char c = (char)seqSource.peek(); switch(c) { case '<': - { // pull off first left bracket int leftBracket = seqSource.read(); // check for second left bracket @@ -852,92 +844,57 @@ protected COSBase parseDirObject() throws IOException if(c == '<') { - retval = parseCOSDictionary(); + COSDictionary retval = parseCOSDictionary(); skipSpaces(); + return retval; } else { - retval = parseCOSString(); + return parseCOSString(); } - break; - } case '[': - { // array - retval = parseCOSArray(); - break; - } + return parseCOSArray(); case '(': - retval = parseCOSString(); - break; + return parseCOSString(); case '/': // name - retval = parseCOSName(); - break; + return parseCOSName(); case 'n': - { // null readExpectedString(NULL); - retval = COSNull.NULL; - break; - } + return COSNull.NULL; case 't': - { String trueString = new String( seqSource.readFully(4), ISO_8859_1 ); if( trueString.equals( TRUE ) ) { - retval = COSBoolean.TRUE; + return COSBoolean.TRUE; } else { throw new IOException( "expected true actual='" + trueString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } - break; - } case 'f': - { String falseString = new String( seqSource.readFully(5), ISO_8859_1 ); if( falseString.equals( FALSE ) ) { - retval = COSBoolean.FALSE; + return COSBoolean.FALSE; } else { throw new IOException( "expected false actual='" + falseString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } - break; - } case 'R': seqSource.read(); - retval = new COSObject(null); - break; + return new COSObject(null); case (char)-1: return null; default: - { if( Character.isDigit(c) || c == '-' || c == '+' || c == '.') { - StringBuilder buf = new StringBuilder(); - int ic = seqSource.read(); - c = (char)ic; - while( Character.isDigit( c )|| - c == '-' || - c == '+' || - c == '.' || - c == 'E' || - c == 'e' ) - { - buf.append( c ); - ic = seqSource.read(); - c = (char)ic; - } - if( ic != -1 ) - { - seqSource.unread(ic); - } - retval = COSNumber.get( buf.toString() ); + return parseCOSNumber(); } else { @@ -960,9 +917,26 @@ protected COSBase parseDirObject() throws IOException seqSource.unread(badString.getBytes(ISO_8859_1)); } } - } } - return retval; + return null; + } + + private COSNumber parseCOSNumber() throws IOException + { + StringBuilder buf = new StringBuilder(); + int ic = seqSource.read(); + char c = (char) ic; + while (Character.isDigit(c) || c == '-' || c == '+' || c == '.' || c == 'E' || c == 'e') + { + buf.append(c); + ic = seqSource.read(); + c = (char) ic; + } + if (ic != -1) + { + seqSource.unread(ic); + } + return COSNumber.get(buf.toString()); } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/COSParser.java b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/COSParser.java index 676a5592..5ae70e61 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/COSParser.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/COSParser.java @@ -2744,9 +2744,13 @@ protected boolean parseXrefTable(long startByteOffset) throws IOException try { long currOffset = Long.parseLong(splitString[0]); - int currGenID = Integer.parseInt(splitString[1]); - COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); - xrefTrailerResolver.setXRef(objKey, currOffset); + // skip 0 offsets + if (currOffset > 0) + { + int currGenID = Integer.parseInt(splitString[1]); + COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); + xrefTrailerResolver.setXRef(objKey, currOffset); + } } catch (NumberFormatException e) { diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/PDFStreamParser.java b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/PDFStreamParser.java index 74a9e92e..c97597cd 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdfparser/PDFStreamParser.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdfparser/PDFStreamParser.java @@ -133,19 +133,15 @@ public List getTokens() */ public Object parseNextToken() throws IOException { - Object retval; - skipSpaces(); - int nextByte = seqSource.peek(); - if( ((byte)nextByte) == -1 ) + if (seqSource.isEOF()) { return null; } - char c = (char)nextByte; + char c = (char) seqSource.peek(); switch (c) { case '<': - { // pull off first left bracket int leftBracket = seqSource.read(); @@ -157,74 +153,57 @@ public Object parseNextToken() throws IOException if (c == '<') { - retval = parseCOSDictionary(); + return parseCOSDictionary(); } else { - retval = parseCOSString(); + return parseCOSString(); } - break; - } case '[': - { // array - retval = parseCOSArray(); - break; - } + return parseCOSArray(); case '(': // string - retval = parseCOSString(); - break; + return parseCOSString(); case '/': // name - retval = parseCOSName(); - break; + return parseCOSName(); case 'n': - { // null String nullString = readString(); if( nullString.equals( "null") ) { - retval = COSNull.NULL; + return COSNull.NULL; } else { - retval = Operator.getOperator(nullString); + return Operator.getOperator(nullString); } - break; - } case 't': case 'f': - { String next = readString(); if( next.equals( "true" ) ) { - retval = COSBoolean.TRUE; - break; + return COSBoolean.TRUE; } else if( next.equals( "false" ) ) { - retval = COSBoolean.FALSE; + return COSBoolean.FALSE; } else { - retval = Operator.getOperator(next); + return Operator.getOperator(next); } - break; - } case 'R': - { String line = readString(); if( line.equals( "R" ) ) { - retval = new COSObject( null ); + return new COSObject(null); } else { - retval = Operator.getOperator(line); + return Operator.getOperator(line); } - break; - } case '0': case '1': case '2': @@ -238,7 +217,6 @@ else if( next.equals( "false" ) ) case '-': case '+': case '.': - { /* We will be filling buf with the rest of the number. Only * allow 1 "." and "-" and "+" at start of number. */ StringBuilder buf = new StringBuilder(); @@ -266,39 +244,35 @@ else if( next.equals( "false" ) ) dotNotRead = false; } } - retval = COSNumber.get( buf.toString() ); - break; - } + return COSNumber.get(buf.toString()); case 'B': - { - String next = readString(); - retval = Operator.getOperator(next); - if (next.equals(OperatorName.BEGIN_INLINE_IMAGE)) + String nextOperator = readString(); + Operator beginImageOP = Operator.getOperator(nextOperator); + if (nextOperator.equals(OperatorName.BEGIN_INLINE_IMAGE)) { - Operator beginImageOP = (Operator)retval; COSDictionary imageParams = new COSDictionary(); - beginImageOP.setImageParameters( imageParams ); + beginImageOP.setImageParameters(imageParams); Object nextToken = null; - while( (nextToken = parseNextToken()) instanceof COSName ) + while ((nextToken = parseNextToken()) instanceof COSName) { Object value = parseNextToken(); - imageParams.setItem( (COSName)nextToken, (COSBase)value ); + imageParams.setItem((COSName) nextToken, (COSBase) value); } - //final token will be the image data, maybe?? + // final token will be the image data, maybe?? if (nextToken instanceof Operator) { Operator imageData = (Operator) nextToken; - if (imageData.getImageData() == null || imageData.getImageData().length == 0) + if (imageData.getImageData() == null + || imageData.getImageData().length == 0) { - Log.w("PdfBox-Android", "empty inline image at stream offset " + seqSource.getPosition()); + Log.w("PdfBox-Android", "empty inline image at stream offset " + + seqSource.getPosition()); } beginImageOP.setImageData(imageData.getImageData()); } } - break; - } + return beginImageOP; case 'I': - { //Special case for ID operator String id = Character.toString((char) seqSource.read()) + (char) seqSource.read(); if (!id.equals(OperatorName.BEGIN_INLINE_IMAGE_DATA)) @@ -329,37 +303,26 @@ else if( next.equals( "false" ) ) currentByte = seqSource.read(); } // the EI operator isn't unread, as it won't be processed anyway - retval = Operator.getOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA); + Operator beginImageDataOP = Operator + .getOperator(OperatorName.BEGIN_INLINE_IMAGE_DATA); // save the image data to the operator, so that it can be accessed later - ((Operator)retval).setImageData( imageData.toByteArray() ); - break; - } + beginImageDataOP.setImageData(imageData.toByteArray()); + return beginImageDataOP; case ']': - { // some ']' around without its previous '[' // this means a PDF is somewhat corrupt but we will continue to parse. seqSource.read(); - // must be a better solution than null... - retval = COSNull.NULL; - break; - } + return COSNull.NULL; default: - { - //we must be an operator + // we must be an operator String operator = readOperator(); - if( operator.trim().length() == 0 ) - { - //we have a corrupt stream, stop reading here - retval = null; - } - else + if (operator.trim().length() > 0) { - retval = Operator.getOperator(operator); + return Operator.getOperator(operator); } - } } - return retval; + return null; } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayList.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayList.java index cc9953d6..8ad9f0d1 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayList.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayList.java @@ -60,7 +60,7 @@ public COSArrayList() } /** - * Create the COSArrayList specifing the List and the backing COSArray. + * Create the COSArrayList specifying the List and the backing COSArray. * *

User of this constructor need to ensure that the entries in the List and * the backing COSArray are matching i.e. the COSObject of the List entry is diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/PDNameTreeNode.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/PDNameTreeNode.java index 96f39417..752e1ed3 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/PDNameTreeNode.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/PDNameTreeNode.java @@ -112,7 +112,7 @@ public boolean isRootNode() public List> getKids() { List> retval = null; - COSArray kids = (COSArray)node.getDictionaryObject( COSName.KIDS ); + COSArray kids = node.getCOSArray(COSName.KIDS); if( kids != null ) { List> pdObjects = new ArrayList>(); @@ -255,13 +255,18 @@ public T getValue( String name ) throws IOException */ public Map getNames() throws IOException { - COSArray namesArray = (COSArray)node.getDictionaryObject( COSName.NAMES ); + COSArray namesArray = node.getCOSArray(COSName.NAMES); if( namesArray != null ) { Map names = new LinkedHashMap(); for( int i=0; i names ) public String getUpperLimit() { String retval = null; - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr != null ) { retval = arr.getString( 1 ); @@ -343,7 +348,7 @@ public String getUpperLimit() */ private void setUpperLimit( String upper ) { - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr == null ) { arr = new COSArray(); @@ -362,7 +367,7 @@ private void setUpperLimit( String upper ) public String getLowerLimit() { String retval = null; - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr != null ) { retval = arr.getString( 0 ); @@ -377,7 +382,7 @@ public String getLowerLimit() */ private void setLowerLimit( String lower ) { - COSArray arr = (COSArray)node.getDictionaryObject( COSName.LIMITS ); + COSArray arr = node.getCOSArray(COSName.LIMITS); if( arr == null ) { arr = new COSArray(); diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunction.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunction.java index 83698f97..ddabe022 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunction.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunction.java @@ -303,7 +303,7 @@ protected float[] clipToRange(float[] inputValues) { COSArray rangesArray = getRangeValues(); float[] result; - if (rangesArray != null) + if (rangesArray != null && rangesArray.size() > 0) { float[] rangeValues = rangesArray.toFloatArray(); int numberOfRanges = rangeValues.length/2; diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunctionType0.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunctionType0.java index 52576045..d2a3272d 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunctionType0.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/common/function/PDFunctionType0.java @@ -19,6 +19,7 @@ import android.util.Log; import java.io.IOException; +import java.io.InputStream; import com.tom_roush.harmony.javax.imageio.stream.ImageInputStream; import com.tom_roush.harmony.javax.imageio.stream.MemoryCacheImageInputStream; @@ -389,7 +390,8 @@ private int[][] getSamples() // PDF spec 1.7 p.171: // Each sample value is represented as a sequence of BitsPerSample bits. // Successive values are adjacent in the bit stream; there is no padding at byte boundaries. - ImageInputStream mciis = new MemoryCacheImageInputStream(getPDStream().createInputStream()); + InputStream inputStream = getPDStream().createInputStream(); + ImageInputStream mciis = new MemoryCacheImageInputStream(inputStream); for (int i = 0; i < arraySize; i++) { for (int k = 0; k < nOut; k++) @@ -400,6 +402,7 @@ private int[][] getSamples() index++; } mciis.close(); + inputStream.close(); } catch (IOException exception) { diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/encryption/StandardSecurityHandler.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/encryption/StandardSecurityHandler.java index e8ce16fd..fa0c31c0 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/encryption/StandardSecurityHandler.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/encryption/StandardSecurityHandler.java @@ -1079,7 +1079,7 @@ private static byte[] computeHash2B(byte[] input, byte[] password, byte[] userKe { try { - MessageDigest md = MessageDigest.getInstance("SHA-256"); + MessageDigest md = MessageDigests.getSHA256(); byte[] k = md.digest(input); byte[] e = null; @@ -1149,19 +1149,11 @@ private static byte[] computeHash2B(byte[] input, byte[] password, byte[] userKe } private static byte[] computeSHA256(byte[] input, byte[] password, byte[] userKey) - throws IOException { - try - { - MessageDigest md = MessageDigest.getInstance("SHA-256"); - md.update(input); - md.update(password); - return userKey == null ? md.digest() : md.digest(userKey); - } - catch (NoSuchAlgorithmException e) - { - throw new IOException(e); - } + MessageDigest md = MessageDigests.getSHA256(); + md.update(input); + md.update(password); + return userKey == null ? md.digest() : md.digest(userKey); } private static byte[] concat(byte[] a, byte[] b) diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/FontMapperImpl.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/FontMapperImpl.java index 71ab6df0..33ad96ef 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/FontMapperImpl.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/FontMapperImpl.java @@ -18,6 +18,7 @@ import android.util.Log; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -115,21 +116,15 @@ final class FontMapperImpl implements FontMapper try { String ttfName = "com/tom_roush/pdfbox/resources/ttf/LiberationSans-Regular.ttf"; - InputStream ttfStream = null; + InputStream ttfStream; if (PDFBoxResourceLoader.isReady()) { ttfStream = PDFBoxResourceLoader.getStream(ttfName); } - - if (ttfStream == null) - { - // Fallback - ttfStream = FontMapper.class.getResourceAsStream("/" + ttfName); - } - - if (ttfStream == null) + else { - throw new IOException("Error loading resource: " + ttfName); + ttfStream = + new BufferedInputStream(FontMapper.class.getResourceAsStream("/" + ttfName)); } TTFParser ttfParser = new TTFParser(); lastResortFont = ttfParser.parse(ttfStream); diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFont.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFont.java index 911f7cb3..c90db8b7 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFont.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFont.java @@ -391,7 +391,7 @@ public float getAverageFontWidth() { float totalWidth = 0.0f; float characterCount = 0.0f; - COSArray widths = (COSArray) dict.getDictionaryObject(COSName.WIDTHS); + COSArray widths = dict.getCOSArray(COSName.WIDTHS); if (widths != null) { for (int i = 0; i < widths.size(); i++) @@ -501,7 +501,7 @@ protected final List getWidths() { if (widths == null) { - COSArray array = (COSArray) dict.getDictionaryObject(COSName.WIDTHS); + COSArray array = dict.getCOSArray(COSName.WIDTHS); if (array != null) { widths = COSArrayList.convertFloatCOSArrayToList(array); diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFontDescriptor.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFontDescriptor.java index 7d2b6d0b..74a126e1 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFontDescriptor.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDFontDescriptor.java @@ -429,7 +429,7 @@ public void setFlags( int flags ) */ public PDRectangle getFontBoundingBox() { - COSArray rect = (COSArray)dic.getDictionaryObject( COSName.FONT_BBOX ); + COSArray rect = dic.getCOSArray(COSName.FONT_BBOX); PDRectangle retval = null; if( rect != null ) { diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType0Font.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType0Font.java index f3fde873..dc1f5e97 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType0Font.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType0Font.java @@ -77,7 +77,7 @@ public static PDType0Font load(PDDocument doc, File file) throws IOException */ public static PDType0Font load(PDDocument doc, InputStream input) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(input), true, true, false); + return load(doc, input, true); } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType1Font.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType1Font.java index cdfe6d18..4b15affb 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType1Font.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/PDType1Font.java @@ -172,15 +172,7 @@ else if ("Symbol".equals(baseFont)) */ public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException { - PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, null); - encoding = embedder.getFontEncoding(); - glyphList = embedder.getGlyphList(); - type1font = embedder.getType1Font(); - genericFont = embedder.getType1Font(); - isEmbedded = true; - isDamaged = false; - fontMatrixTransform = new AffineTransform(); - codeToBytesMap = new HashMap(); + this(doc, pfbIn, null); } /** @@ -194,7 +186,7 @@ public PDType1Font(PDDocument doc, InputStream pfbIn) throws IOException public PDType1Font(PDDocument doc, InputStream pfbIn, Encoding encoding) throws IOException { PDType1FontEmbedder embedder = new PDType1FontEmbedder(doc, dict, pfbIn, encoding); - this.encoding = encoding; + this.encoding = encoding == null ? embedder.getFontEncoding() : encoding; glyphList = embedder.getGlyphList(); type1font = embedder.getType1Font(); genericFont = embedder.getType1Font(); diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/Standard14Fonts.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/Standard14Fonts.java index a0a8026b..ec7a815e 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/Standard14Fonts.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/Standard14Fonts.java @@ -17,11 +17,11 @@ package com.tom_roush.pdfbox.pdmodel.font; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -37,103 +37,99 @@ */ final class Standard14Fonts { - private static final Set STANDARD_14_NAMES = new HashSet(34); - private static final Map STANDARD_14_MAPPING = new HashMap(34); - private static final Map STANDARD14_AFM_MAP = new HashMap(34); + /** + * Contains all base names and alias names for the known fonts. + * For base fonts both the key and the value will be the base name. + * For aliases, the key is an alias, and the value is a base name. + * We want a single lookup in the map to find the font both by a base name or an alias. + */ + private static final Map ALIASES = new HashMap(38); + + /** + * Contains the font metrics for the base fonts. + * The key is a base font name, value is a FontMetrics instance. + * Metrics are loaded into this map on demand, only if needed. + * @see #getAFM + */ + private static final Map FONTS = new HashMap(14); + static { - try - { - addAFM("Courier-Bold"); - addAFM("Courier-BoldOblique"); - addAFM("Courier"); - addAFM("Courier-Oblique"); - addAFM("Helvetica"); - addAFM("Helvetica-Bold"); - addAFM("Helvetica-BoldOblique"); - addAFM("Helvetica-Oblique"); - addAFM("Symbol"); - addAFM("Times-Bold"); - addAFM("Times-BoldItalic"); - addAFM("Times-Italic"); - addAFM("Times-Roman"); - addAFM("ZapfDingbats"); - - // alternative names from Adobe Supplement to the ISO 32000 - addAFM("CourierCourierNew", "Courier"); - addAFM("CourierNew", "Courier"); - addAFM("CourierNew,Italic", "Courier-Oblique"); - addAFM("CourierNew,Bold", "Courier-Bold"); - addAFM("CourierNew,BoldItalic", "Courier-BoldOblique"); - addAFM("Arial", "Helvetica"); - addAFM("Arial,Italic", "Helvetica-Oblique"); - addAFM("Arial,Bold", "Helvetica-Bold"); - addAFM("Arial,BoldItalic", "Helvetica-BoldOblique"); - addAFM("TimesNewRoman", "Times-Roman"); - addAFM("TimesNewRoman,Italic", "Times-Italic"); - addAFM("TimesNewRoman,Bold", "Times-Bold"); - addAFM("TimesNewRoman,BoldItalic", "Times-BoldItalic"); - - // Acrobat treats these fonts as "standard 14" too (at least Acrobat preflight says so) - addAFM("Symbol,Italic", "Symbol"); - addAFM("Symbol,Bold", "Symbol"); - addAFM("Symbol,BoldItalic", "Symbol"); - addAFM("Times", "Times-Roman"); - addAFM("Times,Italic", "Times-Italic"); - addAFM("Times,Bold", "Times-Bold"); - addAFM("Times,BoldItalic", "Times-BoldItalic"); - - // PDFBOX-3457: PDF.js file bug864847.pdf - addAFM("ArialMT", "Helvetica"); - addAFM("Arial-ItalicMT", "Helvetica-Oblique"); - addAFM("Arial-BoldMT", "Helvetica-Bold"); - addAFM("Arial-BoldItalicMT", "Helvetica-BoldOblique"); - } - catch (IOException e) - { - throw new RuntimeException(e); - } + // the 14 standard fonts + mapName("Courier-Bold"); + mapName("Courier-BoldOblique"); + mapName("Courier"); + mapName("Courier-Oblique"); + mapName("Helvetica"); + mapName("Helvetica-Bold"); + mapName("Helvetica-BoldOblique"); + mapName("Helvetica-Oblique"); + mapName("Symbol"); + mapName("Times-Bold"); + mapName("Times-BoldItalic"); + mapName("Times-Italic"); + mapName("Times-Roman"); + mapName("ZapfDingbats"); + + // alternative names from Adobe Supplement to the ISO 32000 + mapName("CourierCourierNew", "Courier"); + mapName("CourierNew", "Courier"); + mapName("CourierNew,Italic", "Courier-Oblique"); + mapName("CourierNew,Bold", "Courier-Bold"); + mapName("CourierNew,BoldItalic", "Courier-BoldOblique"); + mapName("Arial", "Helvetica"); + mapName("Arial,Italic", "Helvetica-Oblique"); + mapName("Arial,Bold", "Helvetica-Bold"); + mapName("Arial,BoldItalic", "Helvetica-BoldOblique"); + mapName("TimesNewRoman", "Times-Roman"); + mapName("TimesNewRoman,Italic", "Times-Italic"); + mapName("TimesNewRoman,Bold", "Times-Bold"); + mapName("TimesNewRoman,BoldItalic", "Times-BoldItalic"); + + // Acrobat treats these fonts as "standard 14" too (at least Acrobat preflight says so) + mapName("Symbol,Italic", "Symbol"); + mapName("Symbol,Bold", "Symbol"); + mapName("Symbol,BoldItalic", "Symbol"); + mapName("Times", "Times-Roman"); + mapName("Times,Italic", "Times-Italic"); + mapName("Times,Bold", "Times-Bold"); + mapName("Times,BoldItalic", "Times-BoldItalic"); + + // PDFBOX-3457: PDF.js file bug864847.pdf + mapName("ArialMT", "Helvetica"); + mapName("Arial-ItalicMT", "Helvetica-Oblique"); + mapName("Arial-BoldMT", "Helvetica-Bold"); + mapName("Arial-BoldItalicMT", "Helvetica-BoldOblique"); } private Standard14Fonts() { } - private static void addAFM(String fontName) throws IOException - { - addAFM(fontName, fontName); - } - - private static void addAFM(String fontName, String afmName) throws IOException + /** + * Loads the metrics for the base font specified by name. Metric file must exist in the pdfbox + * jar under /org/apache/pdfbox/resources/afm/ + * + * @param fontName one of the standard 14 font names for which to lod the metrics. + * @throws IOException if no metrics exist for that font. + */ + private static void loadMetrics(String fontName) throws IOException { - STANDARD_14_NAMES.add(fontName); - STANDARD_14_MAPPING.put(fontName, afmName); - - if (STANDARD14_AFM_MAP.containsKey(afmName)) - { - STANDARD14_AFM_MAP.put(fontName, STANDARD14_AFM_MAP.get(afmName)); - } - - String resourceName = "com/tom_roush/pdfbox/resources/afm/" + afmName + ".afm"; + String resourceName = "com/tom_roush/pdfbox/resources/afm/" + fontName + ".afm"; InputStream afmStream; if (PDFBoxResourceLoader.isReady()) { - afmStream = PDFBoxResourceLoader.getStream(resourceName); + afmStream = new BufferedInputStream(PDFBoxResourceLoader.getStream(resourceName)); } else { - afmStream = PDType1Font.class.getResourceAsStream("/" + resourceName); - } - - if (afmStream == null) - { - throw new IOException(resourceName + " not found"); + afmStream = new BufferedInputStream(PDType1Font.class.getResourceAsStream("/" + resourceName)); } try { AFMParser parser = new AFMParser(afmStream); FontMetrics metric = parser.parse(true); - STANDARD14_AFM_MAP.put(fontName, metric); + FONTS.put(fontName, metric); } finally { @@ -142,37 +138,95 @@ private static void addAFM(String fontName, String afmName) throws IOException } /** - * Returns the AFM for the given font. - * @param baseName base name of font + * Adds a standard font name to the map of known aliases, to simplify the logic of finding + * font metrics by name. We want a single lookup in the map to find the font both by a base name or + * an alias. + * + * @see #getAFM + * @param baseName the base name of the font; must be one of the 14 standard fonts + */ + private static void mapName(String baseName) + { + ALIASES.put(baseName, baseName); + } + + /** + * Adds an alias name for a standard font to the map of known aliases to the map of aliases + * (alias as key, standard name as value). We want a single lookup in the map to find the font + * both by a base name or an alias. + * + * @param alias an alias for the font + * @param baseName the base name of the font; must be one of the 14 standard fonts */ - public static FontMetrics getAFM(String baseName) + private static void mapName(String alias, String baseName) { - return STANDARD14_AFM_MAP.get(baseName); + ALIASES.put(alias, baseName); + } + + /** + * Returns the metrics for font specified by fontName. Loads the font metrics if not already + * loaded. + * + * @param fontName name of font; either a base name or alias + * @return the font metrics or null if the name is not one of the known names + * @throws IllegalArgumentException if no metrics exist for that font. + */ + public static FontMetrics getAFM(String fontName) + { + String baseName = ALIASES.get(fontName); + if (baseName == null) + { + return null; + } + + if (FONTS.get(baseName) == null) + { + synchronized (FONTS) + { + if (FONTS.get(baseName) == null) + { + try + { + loadMetrics(baseName); + } + catch (IOException ex) + { + throw new IllegalArgumentException(ex); + } + } + } + } + + return FONTS.get(baseName); } /** - * Returns true if the given font name a Standard 14 font. - * @param baseName base name of font + * Returns true if the given font name is one of the known names, including alias. + * + * @param fontName the name of font, either a base name or alias + * @return true if the name is one of the known names */ - public static boolean containsName(String baseName) + public static boolean containsName(String fontName) { - return STANDARD_14_NAMES.contains(baseName); + return ALIASES.containsKey(fontName); } /** - * Returns the set of Standard 14 font names, including additional names. + * Returns the set of known font names, including aliases. */ public static Set getNames() { - return Collections.unmodifiableSet(STANDARD_14_NAMES); + return Collections.unmodifiableSet(ALIASES.keySet()); } /** - * Returns the name of the actual font which the given font name maps to. - * @param baseName base name of font + * Returns the base name of the font which the given font name maps to. + * + * @param fontName name of font, either a base name or an alias + * @return the base name or null if this is not one of the known names */ - public static String getMappedFontName(String baseName) + public static String getMappedFontName(String fontName) { - return STANDARD_14_MAPPING.get(baseName); + return ALIASES.get(fontName); } } diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/TrueTypeEmbedder.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/TrueTypeEmbedder.java index b1db745b..82250d24 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/TrueTypeEmbedder.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/TrueTypeEmbedder.java @@ -93,7 +93,26 @@ abstract class TrueTypeEmbedder implements Subsetter if (!embedSubset) { // full embedding - PDStream stream = new PDStream(document, ttf.getOriginalData(), COSName.FLATE_DECODE); + + // TrueType collections are not supported + InputStream is = ttf.getOriginalData(); + byte[] b = new byte[4]; + is.mark(b.length); + if (is.read(b) == b.length && new String(b).equals("ttcf")) + { + is.close(); + throw new IOException("Full embedding of TrueType font collections not supported"); + } + if (is.markSupported()) + { + is.reset(); + } + else + { + is.close(); + is = ttf.getOriginalData(); + } + PDStream stream = new PDStream(document, is, COSName.FLATE_DECODE); stream.getCOSObject().setLong(COSName.LENGTH1, ttf.getOriginalDataSize()); fontDescriptor.setFontFile2(stream); } @@ -140,15 +159,13 @@ private boolean isEmbeddingPermitted(TrueTypeFont ttf) throws IOException if (ttf.getOS2Windows() != null) { int fsType = ttf.getOS2Windows().getFsType(); - int exclusive = fsType & 0x8; // bits 0-3 are a set of exclusive bits - - if ((exclusive & OS2WindowsMetricsTable.FSTYPE_RESTRICTED) == + if ((fsType & OS2WindowsMetricsTable.FSTYPE_RESTRICTED) == OS2WindowsMetricsTable.FSTYPE_RESTRICTED) { // restricted License embedding return false; } - else if ((exclusive & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == + else if ((fsType & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) { // bitmap embedding only @@ -184,7 +201,15 @@ private PDFontDescriptor createFontDescriptor(TrueTypeFont ttf) throws IOExcepti fd.setFontName(ttf.getName()); OS2WindowsMetricsTable os2 = ttf.getOS2Windows(); + if (os2 == null) + { + throw new IOException("os2 table is missing in font " + ttf.getName()); + } PostScriptTable post = ttf.getPostScript(); + if (post == null) + { + throw new IOException("post table is missing in font " + ttf.getName()); + } // Flags fd.setFixedPitch(post.getIsFixedPitch() > 0 || diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java index 53fb23c1..57862ef7 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/DictionaryEncoding.java @@ -180,6 +180,11 @@ public COSBase getCOSObject() @Override public String getEncodingName() { + if (baseEncoding == null) + { + // In type 3 the /Differences array shall specify the complete character encoding + return "differences"; + } return baseEncoding.getEncodingName() + " with differences"; } } diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java index 0f2f7eab..faf7a772 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/MacRomanEncoding.java @@ -242,7 +242,7 @@ public class MacRomanEncoding extends Encoding {0172, "z"}, {060, "zero"}, // adding an additional mapping as defined in Appendix D of the pdf spec - {0312, "space"} + {0312, "nbspace"} }; /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java index 31b93645..da6c888c 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/font/encoding/WinAnsiEncoding.java @@ -251,7 +251,7 @@ public class WinAnsiEncoding extends Encoding {0236, "zcaron"}, {060, "zero"}, // adding some additional mappings as defined in Appendix D of the pdf spec - {0240, "space"}, + {0240, "nbspace"}, {0255, "hyphen"} }; diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/image/PDImageXObject.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/image/PDImageXObject.java index 130c30df..3655ff3d 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/image/PDImageXObject.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/image/PDImageXObject.java @@ -528,6 +528,11 @@ private float[] extractMatte(PDImageXObject softMask) throws IOException // see PDF specification 1.7, 11.6.5.3 Soft-Mask Images matte = ((COSArray) base).toFloatArray(); // convert to RGB + if (matte.length < getColorSpace().getNumberOfComponents()) + { + Log.e("PdfBox-Android", "Image /Matte entry not long enough for colorspace, skipped"); + return null; + } matte = getColorSpace().toRGB(matte); } return matte; diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java index 03513e30..5fc0b6e2 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java @@ -224,7 +224,11 @@ public void setBaseState(BaseState state) */ public String[] getGroupNames() { - COSArray ocgs = (COSArray)dict.getDictionaryObject(COSName.OCGS); + COSArray ocgs = dict.getCOSArray(COSName.OCGS); + if (ocgs == null) + { + return new String[0]; + } int size = ocgs.size(); String[] groups = new String[size]; for (int i = 0; i < size; i++) diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java index 2259b31b..30cdf6fb 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java @@ -797,7 +797,7 @@ private float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOE { float width = contentRect.getWidth() - contentRect.getLowerLeftX(); float fs = MINIMUM_FONT_SIZE; - while (fs <= MAXIMUM_FONT_SIZE) + while (fs <= DEFAULT_FONT_SIZE) { // determine the number of lines needed for this font and contentRect int numLines = 0; @@ -817,7 +817,7 @@ private float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOE } fs++; } - return Math.min(fs, MAXIMUM_FONT_SIZE); + return Math.min(fs, DEFAULT_FONT_SIZE); } // Acrobat defaults to 12 for multiline text with size 0 diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroForm.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroForm.java index 710cba6d..07f93e93 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroForm.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDAcroForm.java @@ -316,9 +316,7 @@ public void flatten(List fields, boolean refreshAppearances) throws IOE { annotations.add(annotation); } - else if (!annotation.isInvisible() && !annotation.isHidden() && - annotation.getNormalAppearanceStream() != null && - annotation.getNormalAppearanceStream().getBBox() != null) + else if (isVisibleAnnotation(annotation)) { contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, !isContentStreamWrapped); isContentStreamWrapped = true; @@ -392,7 +390,21 @@ else if (!annotation.isInvisible() && !annotation.isHidden() && // remove XFA for hybrid forms dictionary.removeItem(COSName.XFA); + } + private boolean isVisibleAnnotation(PDAnnotation annotation) + { + if (annotation.isInvisible() || annotation.isHidden()) + { + return false; + } + PDAppearanceStream normalAppearanceStream = annotation.getNormalAppearanceStream(); + if (normalAppearanceStream == null) + { + return false; + } + PDRectangle bbox = normalAppearanceStream.getBBox(); + return bbox != null && bbox.getWidth() > 0 && bbox.getHeight() > 0; } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDSignatureField.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDSignatureField.java index 53f00fbc..45e8dac1 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDSignatureField.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PDSignatureField.java @@ -73,16 +73,13 @@ public PDSignatureField(PDAcroForm acroForm) throws IOException private String generatePartialName() { String fieldName = "Signature"; - Set sigNames = new HashSet(); + Set nameSet = new HashSet(); for (PDField field : getAcroForm().getFieldTree()) { - if(field instanceof PDSignatureField) - { - sigNames.add(field.getPartialName()); - } + nameSet.add(field.getPartialName()); } int i = 1; - while(sigNames.contains(fieldName+i)) + while (nameSet.contains(fieldName + i)) { ++i; } diff --git a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PlainText.java b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PlainText.java index 8659aa2b..372cfa12 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PlainText.java +++ b/library/src/main/java/com/tom_roush/pdfbox/pdmodel/interactive/form/PlainText.java @@ -173,6 +173,9 @@ List getLines(PDFont font, float fontSize, float width) throws IOException String word = textContent.substring(start,end); float wordWidth = font.getStringWidth(word) * scale; + boolean wordNeedsSplit = false; + int splitOffset = end - start; + lineWidth = lineWidth + wordWidth; // check if the last word would fit without the whitespace ending it @@ -182,7 +185,7 @@ List getLines(PDFont font, float fontSize, float width) throws IOException lineWidth = lineWidth - whitespaceWidth; } - if (lineWidth >= width) + if (lineWidth >= width && !textLine.getWords().isEmpty()) { textLine.setWidth(textLine.calculateWidth(font, fontSize)); textLines.add(textLine); @@ -190,13 +193,40 @@ List getLines(PDFont font, float fontSize, float width) throws IOException lineWidth = font.getStringWidth(word) * scale; } + if (wordWidth > width && textLine.getWords().isEmpty()) + { + // single word does not fit into width + wordNeedsSplit = true; + while (true) + { + splitOffset--; + String substring = word.trim().substring(0, splitOffset); + float substringWidth = font.getStringWidth(substring) * scale; + if (substringWidth < width) + { + word = substring; + wordWidth = font.getStringWidth(word) * scale; + lineWidth = wordWidth; + break; + } + } + } + AttributedString as = new AttributedString(word); as.addAttribute(TextAttribute.WIDTH, wordWidth); Word wordInstance = new Word(word); wordInstance.setAttributes(as); textLine.addWord(wordInstance); - start = end; - end = iterator.next(); + + if (wordNeedsSplit) + { + start = start + splitOffset; + } + else + { + start = end; + end = iterator.next(); + } } textLine.setWidth(textLine.calculateWidth(font, fontSize)); textLines.add(textLine); diff --git a/library/src/main/java/com/tom_roush/pdfbox/rendering/PageDrawer.java b/library/src/main/java/com/tom_roush/pdfbox/rendering/PageDrawer.java index 1a93a93a..ca04596e 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/rendering/PageDrawer.java +++ b/library/src/main/java/com/tom_roush/pdfbox/rendering/PageDrawer.java @@ -772,11 +772,12 @@ public void drawImage(PDImage pdImage) throws IOException if (!pdImage.getInterpolate()) { - boolean isScaledUp = pdImage.getWidth() < Math.round(at.getScaleX()) || - pdImage.getHeight() < Math.round(at.getScaleY()); - // if the image is scaled down, we use smooth interpolation, eg PDFBOX-2364 // only when scaled up do we use nearest neighbour, eg PDFBOX-2302 / mori-cvpr01.pdf + // PDFBOX-4930: we use the sizes of the ARGB image. These can be different + // than the original sizes of the base image, when the mask is bigger. + boolean isScaledUp = pdImage.getImage().getWidth() < Math.round(at.getScaleX()) || + pdImage.getImage().getHeight() < Math.round(at.getScaleY()); if (isScaledUp) { // graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, @@ -850,18 +851,16 @@ private void drawBitmap(Bitmap image, AffineTransform at) throws IOException // graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite()); setClip(); AffineTransform imageTransform = new AffineTransform(at); + int width = image.getWidth(); + int height = image.getHeight(); + imageTransform.scale(1.0 / width, -1.0 / height); + imageTransform.translate(0, -height); + PDSoftMask softMask = getGraphicsState().getSoftMask(); if( softMask != null ) { - imageTransform.scale(1, -1); - imageTransform.translate(0, -1); -// Paint awtPaint = new TexturePaint(image, -// new Rectangle2D.Double(imageTransform.getTranslateX(), imageTransform.getTranslateY(), -// imageTransform.getScaleX(), imageTransform.getScaleY())); -// awtPaint = applySoftMaskToPaint(awtPaint, softMask); -// graphics.setPaint(awtPaint); - RectF unitRect = new RectF(0, 0, 1, 1); -// graphics.fill(at.createTransformedShape(unitRect)); + RectF rectangle = new RectF(0, 0, width, height); +// Paint awtPaint; TODO: PdfBox-Android } else { @@ -871,11 +870,6 @@ private void drawBitmap(Bitmap image, AffineTransform at) throws IOException image = applyTransferFunction(image, transfer); } - int width = image.getWidth(); - int height = image.getHeight(); - imageTransform.scale(1.0 / width, -1.0 / height); - imageTransform.translate(0, -height); - canvas.drawBitmap(image, imageTransform.toMatrix(), paint); } } diff --git a/library/src/main/java/com/tom_roush/pdfbox/text/LegacyPDFStreamEngine.java b/library/src/main/java/com/tom_roush/pdfbox/text/LegacyPDFStreamEngine.java index 13f8eec8..4c351546 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/text/LegacyPDFStreamEngine.java +++ b/library/src/main/java/com/tom_roush/pdfbox/text/LegacyPDFStreamEngine.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.InputStream; +import java.util.Map; +import java.util.WeakHashMap; import com.tom_roush.fontbox.ttf.TrueTypeFont; import com.tom_roush.fontbox.util.BoundingBox; @@ -47,6 +49,7 @@ import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextAdjusted; import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextLine; import com.tom_roush.pdfbox.contentstream.operator.text.ShowTextLineAndSpace; +import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.pdmodel.PDPage; import com.tom_roush.pdfbox.pdmodel.common.PDRectangle; import com.tom_roush.pdfbox.pdmodel.font.PDCIDFont; @@ -78,6 +81,7 @@ class LegacyPDFStreamEngine extends PDFStreamEngine private PDRectangle pageSize; private Matrix translateMatrix; private final GlyphList glyphList; + private final Map fontHeightMap = new WeakHashMap(); /** * Constructor. @@ -150,7 +154,9 @@ public void processPage(PDPage page) throws IOException * written by Ben Litchfield for PDFStreamEngine. */ @Override - protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Vector displacement) + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, + String unicode, + Vector displacement) throws IOException { // @@ -166,48 +172,6 @@ protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, Vect float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f; Matrix textMatrix = getTextMatrix(); - BoundingBox bbox = font.getBoundingBox(); - if (bbox.getLowerLeftY() < Short.MIN_VALUE) - { - // PDFBOX-2158 and PDFBOX-3130 - // files by Salmat eSolutions / ClibPDF Library - bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); - } - // 1/2 the bbox is used as the height todo: why? - float glyphHeight = bbox.getHeight() / 2; - - // sometimes the bbox has very high values, but CapHeight is OK - PDFontDescriptor fontDescriptor = font.getFontDescriptor(); - if (fontDescriptor != null) - { - float capHeight = fontDescriptor.getCapHeight(); - if (Float.compare(capHeight, 0) != 0 && - (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = capHeight; - } - // PDFBOX-3464, PDFBOX-4480, PDFBOX-4553: - // sometimes even CapHeight has very high value, but Ascent and Descent are ok - float ascent = fontDescriptor.getAscent(); - float descent = fontDescriptor.getDescent(); - if (capHeight > ascent && ascent > 0 && descent < 0 && - ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = (ascent - descent) / 2; - } - } - - // transformPoint from glyph space -> text space - float height; - if (font instanceof PDType3Font) - { - height = font.getFontMatrix().transformPoint(0, glyphHeight).y; - } - else - { - height = glyphHeight / 1000; - } - float displacementX = displacement.getX(); // the sorting algorithm is based on the width of the character. As the displacement // for vertical characters doesn't provide any suitable value for it, we have to @@ -257,7 +221,13 @@ else if (font instanceof PDType0Font) // (modified) width and height calculations float dxDisplay = nextX - textRenderingMatrix.getTranslateX(); - float dyDisplay = height * textRenderingMatrix.getScalingFactorY(); + Float fontHeight = fontHeightMap.get(font.getCOSObject()); + if (fontHeight == null) + { + fontHeight = computeFontHeight(font); + fontHeightMap.put(font.getCOSObject(), fontHeight); + } + float dyDisplay = fontHeight * textRenderingMatrix.getScalingFactorY(); // // start of the original method @@ -301,17 +271,17 @@ else if (font instanceof PDType0Font) float spaceWidthDisplay = spaceWidthText * textRenderingMatrix.getScalingFactorX(); // use our additional glyph list for Unicode mapping - String unicode = font.toUnicode(code, glyphList); + String unicodeMapping = font.toUnicode(code, glyphList); // when there is no Unicode mapping available, Acrobat simply coerces the character code // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want // this, which is why we leave it until this point in PDFTextStreamEngine. - if (unicode == null) + if (unicodeMapping == null) { if (font instanceof PDSimpleFont) { char c = (char) code; - unicode = new String(new char[] { c }); + unicodeMapping = new String(new char[] { c }); } else { @@ -337,10 +307,66 @@ else if (font instanceof PDType0Font) processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), pageSize.getHeight(), translatedTextRenderingMatrix, nextX, nextY, Math.abs(dyDisplay), dxDisplay, - Math.abs(spaceWidthDisplay), unicode, new int[] { code } , font, fontSize, + Math.abs(spaceWidthDisplay), unicodeMapping, new int[] { code }, font, + fontSize, (int)(fontSize * textMatrix.getScalingFactorX()))); } + /** + * Compute the font height. Override this if you want to use own calculations. + * + * @param font the font. + * @return the font height. + * + * @throws IOException if there is an error while getting the font bounding box. + */ + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(- (bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 && + (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-4480, PDFBOX-4553: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (capHeight > ascent && ascent > 0 && descent < 0 && + ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + /** * A method provided as an event interface to allow a subclass to perform some specific * functionality when text needs to be processed. @@ -351,5 +377,4 @@ protected void processTextPosition(TextPosition text) { // subclasses can override to provide specific functionality } - } diff --git a/library/src/main/java/com/tom_roush/pdfbox/text/PDFTextStripper.java b/library/src/main/java/com/tom_roush/pdfbox/text/PDFTextStripper.java index 0dfb44e2..4efb7269 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/text/PDFTextStripper.java +++ b/library/src/main/java/com/tom_roush/pdfbox/text/PDFTextStripper.java @@ -18,6 +18,7 @@ import android.util.Log; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -216,6 +217,11 @@ public PDFTextStripper() throws IOException * This will return the text of a document. See writeText.
* NOTE: The document must not be encrypted when coming into this method. * + *

IMPORTANT: By default, text extraction is done in the same sequence as the text in the PDF page content stream. + * PDF is a graphic format, not a text format, and unlike HTML, it has no requirements that text one on page + * be rendered in a certain order. The order is the one that was determined by the software that created the + * PDF. To get text sorted from left to right and top to botton, use {@link #setSortByPosition(boolean)}. + * * @param doc The document to get the text from. * @return The text of the PDF document. * @throws IOException if the doc state is invalid or it is encrypted. @@ -1846,21 +1852,14 @@ private String handleDirection(String word) { if (PDFBoxResourceLoader.isReady()) { - input = PDFBoxResourceLoader.getStream(path); + input = new BufferedInputStream(PDFBoxResourceLoader.getStream(path)); } else { - input = PDFTextStripper.class.getResourceAsStream("/" + path); + input = new BufferedInputStream(PDFTextStripper.class.getResourceAsStream("/" + path)); } - if (input != null) - { - parseBidiFile(input); - } - else - { - Log.w("PdfBox-Android", "Could not find '" + path + "', mirroring char map will be empty: "); - } + parseBidiFile(input); } catch (IOException e) { diff --git a/library/src/main/java/com/tom_roush/pdfbox/util/Matrix.java b/library/src/main/java/com/tom_roush/pdfbox/util/Matrix.java index c9ed3536..bdd69cfa 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/util/Matrix.java +++ b/library/src/main/java/com/tom_roush/pdfbox/util/Matrix.java @@ -33,32 +33,42 @@ */ public final class Matrix implements Cloneable { - static final float[] DEFAULT_SINGLE = - { - 1,0,0, // a b 0 sx hy 0 note: hx and hy are reversed vs. the PDF spec as we use - 0,1,0, // c d 0 = hx sy 0 AffineTransform's definition x and y shear - 0,0,1 // tx ty 1 tx ty 1 - }; - - private final float[] single; + public static final int SIZE = 9; + private float[] single; + private static final float MAX_FLOAT_VALUE = 3.4028235E38f; /** * Constructor. This produces an identity matrix. */ public Matrix() { - single = new float[DEFAULT_SINGLE.length]; - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + // a b 0 + // c d 0 + // tx ty 1 + // note: hx and hy are reversed vs.the PDF spec as we use AffineTransform's definition x and y shear + // sx hy 0 + // hx sy 0 + // tx ty 1 + single = new float[] { 1, 0, 0, 0, 1, 0, 0, 0, 1 }; + } + + /** + * Constructor. This produces a matrix with the given array as data. + * The source array is not copied or cloned. + */ + private Matrix(float[] src) + { + single = src; } /** * Creates a matrix from a 6-element (a b c d e f) COS array. * - * @param array + * @param array source array, elements must be or extend COSNumber */ public Matrix(COSArray array) { - single = new float[DEFAULT_SINGLE.length]; + single = new float[SIZE]; single[0] = ((COSNumber)array.getObject(0)).floatValue(); single[1] = ((COSNumber)array.getObject(1)).floatValue(); single[3] = ((COSNumber)array.getObject(2)).floatValue(); @@ -74,6 +84,11 @@ public Matrix(COSArray array) * specification. For simple purposes (rotate, scale, translate) it is recommended to use the * static methods below. * + * Produces the following matrix: + * a b 0 + * c d 0 + * e f 1 + * * @see Matrix#getRotateInstance(double, float, float) * @see Matrix#getScaleInstance(float, float) * @see Matrix#getTranslateInstance(float, float) @@ -87,7 +102,7 @@ public Matrix(COSArray array) */ public Matrix(float a, float b, float c, float d, float e, float f) { - single = new float[DEFAULT_SINGLE.length]; + single = new float[SIZE]; single[0] = a; single[1] = b; single[3] = c; @@ -99,18 +114,23 @@ public Matrix(float a, float b, float c, float d, float e, float f) /** * Creates a matrix with the same elements as the given AffineTransform. - * @param at + * @param at matrix elements will be initialize with the values from this affine transformation, as follows: + * + * scaleX shearY 0 + * shearX scaleY 0 + * transX transY 1 + * */ public Matrix(AffineTransform at) { - single = new float[DEFAULT_SINGLE.length]; - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + single = new float[SIZE]; single[0] = (float)at.getScaleX(); single[1] = (float)at.getShearY(); single[3] = (float)at.getShearX(); single[4] = (float)at.getScaleY(); single[6] = (float)at.getTranslateX(); single[7] = (float)at.getTranslateY(); + single[8] = 1; } /** @@ -152,7 +172,10 @@ public static Matrix createMatrix(COSBase base) @Deprecated public void reset() { - System.arraycopy(DEFAULT_SINGLE, 0, single, 0, DEFAULT_SINGLE.length); + Arrays.fill(single, 0); + single[0] = 1; + single[4] = 1; + single[8] = 1; } /** @@ -269,8 +292,7 @@ public void concatenate(Matrix matrix) */ public void translate(Vector vector) { - Matrix m = Matrix.getTranslateInstance(vector.getX(), vector.getY()); - concatenate(m); + concatenate(Matrix.getTranslateInstance(vector.getX(), vector.getY())); } /** @@ -281,8 +303,7 @@ public void translate(Vector vector) */ public void translate(float tx, float ty) { - Matrix m = Matrix.getTranslateInstance(tx, ty); - concatenate(m); + concatenate(Matrix.getTranslateInstance(tx, ty)); } /** @@ -293,8 +314,7 @@ public void translate(float tx, float ty) */ public void scale(float sx, float sy) { - Matrix m = Matrix.getScaleInstance(sx, sy); - concatenate(m); + concatenate(Matrix.getScaleInstance(sx, sy)); } /** @@ -304,113 +324,81 @@ public void scale(float sx, float sy) */ public void rotate(double theta) { - Matrix m = Matrix.getRotateInstance(theta, 0, 0); - concatenate(m); + concatenate(Matrix.getRotateInstance(theta, 0, 0)); } /** - * This will take the current matrix and multiply it with a matrix that is passed in. - * - * @param b The matrix to multiply by. + * This method multiplies this Matrix with the specified other Matrix, storing the product in a new instance. It is + * allowed to have (other == this). * - * @return The result of the two multiplied matrices. + * @param other the second operand Matrix in the multiplication; required + * @return the product of the two matrices. */ - public Matrix multiply( Matrix b ) + public Matrix multiply(Matrix other) { - return this.multiply(b, new Matrix()); + return multiply(other, new Matrix()); } /** - * This method multiplies this Matrix with the specified other Matrix, storing the product in the specified - * result Matrix. By reusing Matrix instances like this, multiplication chains can be executed without having - * to create many temporary Matrix objects. - *

- * It is allowed to have (other == this) or (result == this) or indeed (other == result) but if this is done, - * the backing float[] matrix values may be copied in order to ensure a correct product. + * This method multiplies this Matrix with the specified other Matrix, storing the product in the specified result + * Matrix. It is allowed to have (other == this) or (result == this) or indeed (other == result). + * + * See {@link #multiply(Matrix)} if you need a version with a single operator. + * + * @param other the second operand Matrix in the multiplication; required + * @param result the Matrix instance into which the result should be stored. If result is null, a new Matrix instance is + * created. + * @return the result. * - * @param other the second operand Matrix in the multiplication - * @param result the Matrix instance into which the result should be stored. If result is null, a new Matrix - * instance is created. - * @return the product of the two matrices. */ + @Deprecated public Matrix multiply( Matrix other, Matrix result ) { + float[] c = result != null && result != other && result != this ? result.single + : new float[SIZE]; + + multiplyArrays(single, other.single, c); + + if (!Matrix.isFinite(c[0]) // + || !Matrix.isFinite(c[1]) // + || !Matrix.isFinite(c[2]) // + || !Matrix.isFinite(c[3]) // + || !Matrix.isFinite(c[4]) // + || !Matrix.isFinite(c[5]) // + || !Matrix.isFinite(c[6]) // + || !Matrix.isFinite(c[7]) // + || !Matrix.isFinite(c[8])) + throw new IllegalArgumentException("Multiplying two matrices produces illegal values"); + if (result == null) { - result = new Matrix(); + return new Matrix(c); } - - if (other != null && other.single != null) + else { - // the operands - float[] thisOperand = this.single; - float[] otherOperand = other.single; - - // We're multiplying 2 sets of floats together to produce a third, but we allow - // any of these float[] instances to be the same objects. - // There is the possibility then to overwrite one of the operands with result values - // and therefore corrupt the result. - - // If either of these operands are the same float[] instance as the result, then - // they need to be copied. - - if (this == result) - { - final float[] thisOrigVals = new float[this.single.length]; - System.arraycopy(this.single, 0, thisOrigVals, 0, this.single.length); - - thisOperand = thisOrigVals; - } - if (other == result) - { - final float[] otherOrigVals = new float[other.single.length]; - System.arraycopy(other.single, 0, otherOrigVals, 0, other.single.length); - - otherOperand = otherOrigVals; - } - - result.single[0] = thisOperand[0] * otherOperand[0] - + thisOperand[1] * otherOperand[3] - + thisOperand[2] * otherOperand[6]; - result.single[1] = thisOperand[0] * otherOperand[1] - + thisOperand[1] * otherOperand[4] - + thisOperand[2] * otherOperand[7]; - result.single[2] = thisOperand[0] * otherOperand[2] - + thisOperand[1] * otherOperand[5] - + thisOperand[2] * otherOperand[8]; - result.single[3] = thisOperand[3] * otherOperand[0] - + thisOperand[4] * otherOperand[3] - + thisOperand[5] * otherOperand[6]; - result.single[4] = thisOperand[3] * otherOperand[1] - + thisOperand[4] * otherOperand[4] - + thisOperand[5] * otherOperand[7]; - result.single[5] = thisOperand[3] * otherOperand[2] - + thisOperand[4] * otherOperand[5] - + thisOperand[5] * otherOperand[8]; - result.single[6] = thisOperand[6] * otherOperand[0] - + thisOperand[7] * otherOperand[3] - + thisOperand[8] * otherOperand[6]; - result.single[7] = thisOperand[6] * otherOperand[1] - + thisOperand[7] * otherOperand[4] - + thisOperand[8] * otherOperand[7]; - result.single[8] = thisOperand[6] * otherOperand[2] - + thisOperand[7] * otherOperand[5] - + thisOperand[8] * otherOperand[8]; + result.single = c; + return result; } - if (Float.isInfinite(result.single[0]) || Float.isNaN(result.single[0]) // - || Float.isInfinite(result.single[1]) || Float.isNaN(result.single[1]) // - || Float.isInfinite(result.single[2]) || Float.isNaN(result.single[2]) // - || Float.isInfinite(result.single[3]) || Float.isNaN(result.single[3]) // - || Float.isInfinite(result.single[4]) || Float.isNaN(result.single[4]) // - || Float.isInfinite(result.single[5]) || Float.isNaN(result.single[5]) // - || Float.isInfinite(result.single[6]) || Float.isNaN(result.single[6]) // - || Float.isInfinite(result.single[7]) || Float.isNaN(result.single[7]) // - || Float.isInfinite(result.single[8]) || Float.isNaN(result.single[8])) - throw new IllegalArgumentException( - "Multiplying two matrices produces illegal values"); - return result; } + private static boolean isFinite(float f) + { + // this is faster than the combination of "isNaN" and "isInfinite" and Float.isFinite isn't available in java 6 + return Math.abs(f) <= MAX_FLOAT_VALUE; + } + + private void multiplyArrays(float[] a, float[] b, float[] c) + { + c[0] = a[0] * b[0] + a[1] * b[3] + a[2] * b[6]; + c[1] = a[0] * b[1] + a[1] * b[4] + a[2] * b[7]; + c[2] = a[0] * b[2] + a[1] * b[5] + a[2] * b[8]; + c[3] = a[3] * b[0] + a[4] * b[3] + a[5] * b[6]; + c[4] = a[3] * b[1] + a[4] * b[4] + a[5] * b[7]; + c[5] = a[3] * b[2] + a[4] * b[5] + a[5] * b[8]; + c[6] = a[6] * b[0] + a[7] * b[3] + a[8] * b[6]; + c[7] = a[6] * b[1] + a[7] * b[4] + a[8] * b[7]; + c[8] = a[6] * b[2] + a[7] * b[5] + a[8] * b[8]; + } /** * Transforms the given point by this matrix. * @@ -482,16 +470,18 @@ public Matrix extractScaling() /** * Convenience method to create a scaled instance. * - * @param sx The xscale operator. - * @param sy The yscale operator. + * Produces the following matrix: + * x 0 0 + * 0 y 0 + * 0 0 1 + * + * @param x The xscale operator. + * @param y The yscale operator. * @return A new matrix with just the x/y scaling */ - public static Matrix getScaleInstance(float sx, float sy) + public static Matrix getScaleInstance(float x, float y) { - Matrix matrix = new Matrix(); - matrix.single[0] = sx; - matrix.single[4] = sy; - return matrix; + return new Matrix(x, 0, 0, y, 0, 0); } /** @@ -512,30 +502,34 @@ public Matrix extractTranslating() /** * Convenience method to create a translating instance. * - * @param tx The x translating operator. - * @param ty The y translating operator. + * Produces the following matrix: + * 1 0 0 + * 0 1 0 + * x y 1 + * + * @param x The x translating operator. + * @param y The y translating operator. * @return A new matrix with just the x/y translating. * @deprecated Use {@link #getTranslateInstance} instead. */ @Deprecated - public static Matrix getTranslatingInstance(float tx, float ty) + public static Matrix getTranslatingInstance(float x, float y) { - return getTranslateInstance(tx, ty); + return new Matrix(1, 0, 0, 1, x, y); } /** * Convenience method to create a translating instance. * - * @param tx The x translating operator. - * @param ty The y translating operator. + * Produces the following matrix: 1 0 0 0 1 0 x y 1 + * + * @param x The x translating operator. + * @param y The y translating operator. * @return A new matrix with just the x/y translating. */ - public static Matrix getTranslateInstance(float tx, float ty) + public static Matrix getTranslateInstance(float x, float y) { - Matrix matrix = new Matrix(); - matrix.single[6] = tx; - matrix.single[7] = ty; - return matrix; + return new Matrix(1, 0, 0, 1, x, y); } /** @@ -551,14 +545,7 @@ public static Matrix getRotateInstance(double theta, float tx, float ty) float cosTheta = (float)Math.cos(theta); float sinTheta = (float)Math.sin(theta); - Matrix matrix = new Matrix(); - matrix.single[0] = cosTheta; - matrix.single[1] = sinTheta; - matrix.single[3] = -sinTheta; - matrix.single[4] = cosTheta; - matrix.single[6] = tx; - matrix.single[7] = ty; - return matrix; + return new Matrix(cosTheta, sinTheta, -sinTheta, cosTheta, tx, ty); } /** @@ -569,9 +556,7 @@ public static Matrix getRotateInstance(double theta, float tx, float ty) */ public static Matrix concatenate(Matrix a, Matrix b) { - Matrix copy = a.clone(); - copy.concatenate(b); - return copy; + return b.multiply(a); } /** @@ -581,9 +566,7 @@ public static Matrix concatenate(Matrix a, Matrix b) @Override public Matrix clone() { - Matrix clone = new Matrix(); - System.arraycopy( single, 0, clone.single, 0, 9 ); - return clone; + return new Matrix(single.clone()); } /** @@ -593,8 +576,6 @@ public Matrix clone() */ public float getScalingFactorX() { - float xScale = single[0]; - /** * BM: if the trm is rotated, the calculation is a little more complicated * @@ -612,12 +593,12 @@ public float getScalingFactorX() * sqrt(x2) = * abs(x) */ - if( !(single[1]==0.0f && single[3]==0.0f) ) + if (single[1] != 0.0f) { - xScale = (float)Math.sqrt(Math.pow(single[0], 2)+ + return (float) Math.sqrt(Math.pow(single[0], 2) + Math.pow(single[1], 2)); } - return xScale; + return single[0]; } /** @@ -627,13 +608,12 @@ public float getScalingFactorX() */ public float getScalingFactorY() { - float yScale = single[4]; - if( !(single[1]==0.0f && single[3]==0.0f) ) + if (single[3] != 0.0f) { - yScale = (float)Math.sqrt(Math.pow(single[3], 2)+ + return (float) Math.sqrt(Math.pow(single[3], 2) + Math.pow(single[4], 2)); } - return yScale; + return single[4]; } /** diff --git a/library/src/main/java/com/tom_roush/pdfbox/util/Version.java b/library/src/main/java/com/tom_roush/pdfbox/util/Version.java index b670795a..cd42a5c0 100644 --- a/library/src/main/java/com/tom_roush/pdfbox/util/Version.java +++ b/library/src/main/java/com/tom_roush/pdfbox/util/Version.java @@ -17,6 +17,7 @@ package com.tom_roush.pdfbox.util; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Properties; @@ -44,11 +45,7 @@ public static String getVersion() InputStream is = null; try { - is = Version.class.getResourceAsStream(PDFBOX_VERSION_PROPERTIES); - if (is == null) - { - return null; - } + is = new BufferedInputStream(Version.class.getResourceAsStream(PDFBOX_VERSION_PROPERTIES)); Properties properties = new Properties(); properties.load(is); return properties.getProperty("pdfbox.version", null); diff --git a/library/src/test/java/com/tom_roush/fontbox/cmap/TestCodespaceRange.java b/library/src/test/java/com/tom_roush/fontbox/cmap/TestCodespaceRange.java index 08e4dad9..f9cb3012 100644 --- a/library/src/test/java/com/tom_roush/fontbox/cmap/TestCodespaceRange.java +++ b/library/src/test/java/com/tom_roush/fontbox/cmap/TestCodespaceRange.java @@ -46,11 +46,17 @@ public void testCodeLength() */ public void testConstructor() { + // PDFBOX-4923 "1 begincodespacerange <00> endcodespacerange" case is accepted byte[] startBytes1 = new byte[] { 0x00 }; - byte[] endBytes2 = new byte[] { 0x01, 0x20 }; + byte[] endBytes2 = new byte[] { -1, -1 }; + new CodespaceRange(startBytes1, endBytes2); + + // other cases of different lengths are not + byte[] startBytes3 = new byte[] { 0x01 }; + byte[] endBytes4 = new byte[] { 0x01, 0x20 }; try { - new CodespaceRange(startBytes1, endBytes2); + new CodespaceRange(startBytes3, endBytes4); fail("The constructor should have thrown an IllegalArgumentException exception."); } catch (IllegalArgumentException exception) diff --git a/library/src/test/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFileTest.java b/library/src/test/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFileTest.java index c7864696..e34a13eb 100644 --- a/library/src/test/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFileTest.java +++ b/library/src/test/java/com/tom_roush/fontbox/ttf/BufferedRandomAccessFileTest.java @@ -21,12 +21,12 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; - import org.junit.Assert; import org.junit.Test; /** * @author Cameron Rollhieser + * @author Tilman Hausherr */ public class BufferedRandomAccessFileTest { @@ -48,14 +48,108 @@ public void ensureReadFinishes() throws IOException outputStream.close(); final byte[] readBuffer = new byte[2]; - final BufferedRandomAccessFile buffer = new BufferedRandomAccessFile(file, "r", 4); + final BufferedRandomAccessFile braf = new BufferedRandomAccessFile(file, "r", 4); int amountRead; int totalAmountRead = 0; - while ((amountRead = buffer.read(readBuffer, 0, 2)) != -1) + while ((amountRead = braf.read(readBuffer, 0, 2)) != -1) { totalAmountRead += amountRead; } Assert.assertEquals(10, totalAmountRead); + braf.close(); + file.delete(); + } + + /** + * Test several reading patterns, both reading within a buffer and across buffer. + * + * @throws IOException + */ + @Test + public void testReadBuffer() throws IOException + { + final File file = File.createTempFile("apache-pdfbox", ".dat"); + + OutputStream outputStream = new BufferedOutputStream(new FileOutputStream(file)); + final String content = "012345678A012345678B012345678C012345678D"; + outputStream.write(content.getBytes("UTF-8")); + outputStream.flush(); + outputStream.close(); + + final byte[] readBuffer = new byte[40]; + final BufferedRandomAccessFile braf = new BufferedRandomAccessFile(file, "r", 10); + + int count = 4; + int bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(4, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("0123", new String(readBuffer, 0, count)); + + count = 6; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(10, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("45678A", new String(readBuffer, 0, count)); + + count = 10; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(20, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("012345678B", new String(readBuffer, 0, count)); + + count = 10; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(30, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("012345678C", new String(readBuffer, 0, count)); + + count = 10; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(40, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("012345678D", new String(readBuffer, 0, count)); + + Assert.assertEquals(-1, braf.read()); + + braf.seek(0); + braf.read(readBuffer, 0, 7); + Assert.assertEquals(7, braf.getFilePointer()); + + count = 16; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(23, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("78A012345678B012", new String(readBuffer, 0, count)); + + bytesRead = braf.read(readBuffer, 0, 99); + Assert.assertEquals(40, braf.getFilePointer()); + Assert.assertEquals(17, bytesRead); + Assert.assertEquals("345678C012345678D", new String(readBuffer, 0, 17)); + + Assert.assertEquals(-1, braf.read()); + + braf.seek(0); + braf.read(readBuffer, 0, 7); + Assert.assertEquals(7, braf.getFilePointer()); + + count = 23; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(30, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("78A012345678B012345678C", new String(readBuffer, 0, count)); + + braf.seek(0); + braf.read(readBuffer, 0, 10); + Assert.assertEquals(10, braf.getFilePointer()); + count = 23; + bytesRead = braf.read(readBuffer, 0, count); + Assert.assertEquals(33, braf.getFilePointer()); + Assert.assertEquals(count, bytesRead); + Assert.assertEquals("012345678B012345678C012", new String(readBuffer, 0, count)); + + braf.close(); + + file.delete(); } } diff --git a/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSFloat.java b/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSFloat.java index cb4555ea..c1e9c779 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSFloat.java +++ b/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSFloat.java @@ -204,6 +204,66 @@ void runTest(float num) } + public void testVerySmallValues() throws IOException + { + double smallValue = Float.MIN_VALUE / 10d; + + assertEquals("Test must be performed with a value smaller than Float.MIN_VALUE.", -1, + Double.compare(smallValue, Float.MIN_VALUE)); + + // 1.4012984643248171E-46 + String asString = String.valueOf(smallValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + smallValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(smallValue); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(0.0f, cosFloat.floatValue()); + } + + public void testVeryLargeValues() throws IOException + { + double largeValue = Float.MAX_VALUE * 10d; + + assertEquals("Test must be performed with a value larger than Float.MAX_VALUE.", 1, + Double.compare(largeValue, Float.MIN_VALUE)); + + // 1.4012984643248171E-46 + String asString = String.valueOf(largeValue); + COSFloat cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + // 0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(Float.MAX_VALUE, cosFloat.floatValue()); + + largeValue *= -1; + + // -1.4012984643248171E-46 + asString = String.valueOf(largeValue); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + + // -0.00000000000000000000000000000000000000000000014012984643248171 + asString = new BigDecimal(asString).toPlainString(); + cosFloat = new COSFloat(asString); + assertEquals(-Float.MAX_VALUE, cosFloat.floatValue()); + } + @Override public void testIntValue() { diff --git a/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSNumber.java b/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSNumber.java index 9dcecbb4..19d6616b 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSNumber.java +++ b/library/src/test/java/com/tom_roush/pdfbox/cos/TestCOSNumber.java @@ -78,11 +78,37 @@ public void testGet() { // PASS } - + // PDFBOX-2569: some numbers start with "+" + assertEquals(COSNumber.get("1"), COSNumber.get("+1")); + assertEquals(COSNumber.get("123"), COSNumber.get("+123")); } catch (IOException e) { fail("Failed to convert a number " + e.getMessage()); } } + + /** + * PDFBOX-4895: large number, too big for a long leads to a null value. + * + * @throws IOException + */ + public void testLargeNumber() throws IOException + { + assertNull(COSNumber.get("18446744073307448448")); + assertNull(COSNumber.get("-18446744073307448448")); + } + + public void testInvalidNumber() + { + try + { + COSNumber.get("18446744073307F448448"); + fail("Was expecting an IOException"); + } + catch (IOException e) + { + } + } + } diff --git a/library/src/test/java/com/tom_roush/pdfbox/filter/TestFilters.java b/library/src/test/java/com/tom_roush/pdfbox/filter/TestFilters.java index 73d5f2ad..7a1863cb 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/filter/TestFilters.java +++ b/library/src/test/java/com/tom_roush/pdfbox/filter/TestFilters.java @@ -137,17 +137,17 @@ public void testPDFBOX4517() throws IOException } /** - * This will test the LZW filter with the sequence that failed in PDFBOX-1777. + * This will test the LZW filter with the sequence that failed in PDFBOX-1977. * To check that the test itself is legit, revert LZWFilter.java to rev 1571801, * which should fail this test. * * @throws IOException */ @Test - public void testPDFBOX1777() throws IOException + public void testPDFBOX1977() throws IOException { Filter lzwFilter = FilterFactory.INSTANCE.getFilter(COSName.LZW_DECODE); - byte[] byteArray = IOUtils.toByteArray(this.getClass().getResourceAsStream("/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1777.bin")); + byte[] byteArray = IOUtils.toByteArray(this.getClass().getResourceAsStream("/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1977.bin")); checkEncodeDecode(lzwFilter, byteArray); } diff --git a/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayListTest.java b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayListTest.java index e387eb8e..404b9250 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayListTest.java +++ b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/COSArrayListTest.java @@ -47,7 +47,7 @@ public class COSArrayListTest { // next two entries are to be used for comparison with // COSArrayList behaviour in order to ensure that the - // intented object is now at the correct position. + // intended object is now at the correct position. // Will also be used for Collection/Array based setting // and comparison static List tbcAnnotationsList; @@ -64,7 +64,7 @@ public class COSArrayListTest { private static final File OUT_DIR = new File("target/test-output/pdmodel/common"); /* - * Create thre new different annotations an add them to the Java List/Array as + * Create three new different annotations and add them to the Java List/Array as * well as PDFBox List/Array implementations. */ @Before diff --git a/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/TestPDNumberTreeNode.java b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/TestPDNumberTreeNode.java index 96a7471d..8973c1dd 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/TestPDNumberTreeNode.java +++ b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/common/TestPDNumberTreeNode.java @@ -22,7 +22,6 @@ import java.util.Map; import java.util.TreeMap; -import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSInteger; import org.junit.Assert; @@ -59,7 +58,7 @@ public PDTest(COSInteger cosInt) } @Override - public COSBase getCOSObject() + public COSInteger getCOSObject() { return COSInteger.get( value ); } diff --git a/library/src/test/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java new file mode 100644 index 00000000..7aa22ce2 --- /dev/null +++ b/library/src/test/java/com/tom_roush/pdfbox/pdmodel/interactive/form/MultilineFieldsTest.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.tom_roush.pdfbox.pdmodel.interactive.form; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import com.tom_roush.pdfbox.cos.COSName; +import com.tom_roush.pdfbox.cos.COSNumber; +import com.tom_roush.pdfbox.cos.COSString; +import com.tom_roush.pdfbox.pdfparser.PDFStreamParser; +import com.tom_roush.pdfbox.pdmodel.PDDocument; +import com.tom_roush.pdfbox.pdmodel.interactive.annotation.PDAnnotationWidget; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class MultilineFieldsTest +{ + private static final File IN_DIR = new File("src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form"); + + // Test for PDFBOX-3812 + @Test + public void testMultilineAuto() throws IOException + { + PDDocument document = PDDocument.load(new File(IN_DIR, "PDFBOX3812-acrobat-multiline-auto.pdf")); + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); + + // Get and store the field sizes in the original PDF + PDTextField fieldMultiline = (PDTextField) acroForm.getField("Multiline"); + float fontSizeMultiline = getFontSizeFromAppearanceStream(fieldMultiline); + + PDTextField fieldSingleline = (PDTextField) acroForm.getField("Singleline"); + float fontSizeSingleline = getFontSizeFromAppearanceStream(fieldSingleline); + + PDTextField fieldMultilineAutoscale = (PDTextField) acroForm.getField("MultilineAutoscale"); + float fontSizeMultilineAutoscale = getFontSizeFromAppearanceStream(fieldMultilineAutoscale); + + PDTextField fieldSinglelineAutoscale = (PDTextField) acroForm.getField("SinglelineAutoscale"); + float fontSizeSinglelineAutoscale = getFontSizeFromAppearanceStream(fieldSinglelineAutoscale); + + fieldMultiline.setValue("Multiline - Fixed"); + fieldSingleline.setValue("Singleline - Fixed"); + fieldMultilineAutoscale.setValue("Multiline - auto"); + fieldSinglelineAutoscale.setValue("Singleline - auto"); + + assertEquals(fontSizeMultiline, getFontSizeFromAppearanceStream(fieldMultiline), 0.001f); + assertEquals(fontSizeSingleline, getFontSizeFromAppearanceStream(fieldSingleline), 0.001f); + assertEquals(fontSizeMultilineAutoscale, getFontSizeFromAppearanceStream(fieldMultilineAutoscale), 0.001f); + assertEquals(fontSizeSinglelineAutoscale, getFontSizeFromAppearanceStream(fieldSinglelineAutoscale), 0.025f); + } + + // Test for PDFBOX-3812 + @Test + public void testMultilineBreak() throws IOException + { + final String TEST_PDF = "PDFBOX-3835-input-acrobat-wrap.pdf"; + PDDocument document = PDDocument.load(new File(IN_DIR, TEST_PDF)); + PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); + + // Get and store the field sizes in the original PDF + PDTextField fieldInput = (PDTextField) acroForm.getField("filled"); + String fieldValue = fieldInput.getValue(); + List acrobatLines = getTextLinesFromAppearanceStream(fieldInput); + fieldInput.setValue(fieldValue); + List pdfboxLines = getTextLinesFromAppearanceStream(fieldInput); + assertEquals("Number of lines generated by PDFBox shall match Acrobat", acrobatLines.size(),pdfboxLines.size()); + for (int i = 0; i < acrobatLines.size(); i++) + { + assertEquals("Number of characters per lines generated by PDFBox shall match Acrobat", acrobatLines.get(i).length(), pdfboxLines.get(i).length()); + } + document.close(); + } + + private float getFontSizeFromAppearanceStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + while (token != null) + { + if (token instanceof COSName && ((COSName) token).getName().equals("Helv")) + { + token = parser.parseNextToken(); + if (token != null && token instanceof COSNumber) + { + return ((COSNumber) token).floatValue(); + } + } + token = parser.parseNextToken(); + } + return 0; + } + + private List getTextLinesFromAppearanceStream(PDField field) throws IOException + { + PDAnnotationWidget widget = field.getWidgets().get(0); + PDFStreamParser parser = new PDFStreamParser(widget.getNormalAppearanceStream()); + + Object token = parser.parseNextToken(); + + List lines = new ArrayList(); + + while (token != null) + { + if (token instanceof COSString) + { + lines.add(((COSString) token).getString()); + } + token = parser.parseNextToken(); + } + return lines; + } + +} diff --git a/library/src/test/java/com/tom_roush/pdfbox/text/TestTextStripper.java b/library/src/test/java/com/tom_roush/pdfbox/text/TestTextStripper.java index fedcf194..7d881367 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/text/TestTextStripper.java +++ b/library/src/test/java/com/tom_roush/pdfbox/text/TestTextStripper.java @@ -34,8 +34,12 @@ import java.util.LinkedList; import java.util.List; +import com.tom_roush.fontbox.util.BoundingBox; import com.tom_roush.pdfbox.pdmodel.PDDocument; import com.tom_roush.pdfbox.pdmodel.TestPDPageTree; +import com.tom_roush.pdfbox.pdmodel.font.PDFont; +import com.tom_roush.pdfbox.pdmodel.font.PDFontDescriptor; +import com.tom_roush.pdfbox.pdmodel.font.PDType3Font; import com.tom_roush.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; import com.tom_roush.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; import com.tom_roush.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; @@ -120,7 +124,7 @@ public void setUp() } catch (IOException e) { - System.out.println(e.getMessage()); + e.printStackTrace(); } // If you want to test a single file using DEBUG logging, from an IDE, // you can do something like this: @@ -242,7 +246,6 @@ public void doTestFile(File inFile, File outDir, boolean bLogResult, boolean bSo } } - //System.out.println(" " + inFile + (bSort ? " (sorted)" : "")); PDDocument document = PDDocument.load(inFile); try { @@ -305,116 +308,117 @@ public void doTestFile(File inFile, File outDir, boolean bLogResult, boolean bSo return; } - boolean localFail = false; + compareResult(expectedFile, outFile, inFile, bSort, diffFile); + } + finally + { + document.close(); + } + } - LineNumberReader expectedReader = - new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), ENCODING)); - LineNumberReader actualReader = - new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), ENCODING)); + private void compareResult(File expectedFile, File outFile, File inFile, boolean bSort, File diffFile) + throws IOException + { + boolean localFail = false; - while (true) - { - String expectedLine = expectedReader.readLine(); - while( expectedLine != null && expectedLine.trim().length() == 0 ) - { - expectedLine = expectedReader.readLine(); - } - String actualLine = actualReader.readLine(); - while( actualLine != null && actualLine.trim().length() == 0 ) - { - actualLine = actualReader.readLine(); - } - if (!stringsEqual(expectedLine, actualLine)) - { - this.bFail = true; - localFail = true; - System.out.println("FAILURE: Line mismatch for file " + inFile.getName() + - " (sort = "+bSort+")" + - " at expected line: " + expectedReader.getLineNumber() + - " at actual line: " + actualReader.getLineNumber() + - "\nexpected line was: \"" + expectedLine + "\"" + - "\nactual line was: \"" + actualLine + "\"" + "\n"); - - //lets report all lines, even though this might produce some verbose logging - //break; - } + LineNumberReader expectedReader = + new LineNumberReader(new InputStreamReader(new FileInputStream(expectedFile), ENCODING)); + LineNumberReader actualReader = + new LineNumberReader(new InputStreamReader(new FileInputStream(outFile), ENCODING)); - if( expectedLine == null || actualLine==null) - { - break; - } + while (true) + { + String expectedLine = expectedReader.readLine(); + while( expectedLine != null && expectedLine.trim().length() == 0 ) + { + expectedLine = expectedReader.readLine(); } - expectedReader.close(); - actualReader.close(); - if (!localFail) + String actualLine = actualReader.readLine(); + while( actualLine != null && actualLine.trim().length() == 0 ) { - outFile.delete(); + actualLine = actualReader.readLine(); } - else + if (!stringsEqual(expectedLine, actualLine)) + { + this.bFail = true; + localFail = true; + System.out.println("FAILURE: Line mismatch for file " + inFile.getName() + + " (sort = "+bSort+")" + + " at expected line: " + expectedReader.getLineNumber() + + " at actual line: " + actualReader.getLineNumber() + + "\nexpected line was: \"" + expectedLine + "\"" + + "\nactual line was: \"" + actualLine + "\"" + "\n"); + + //lets report all lines, even though this might produce some verbose logging + //break; + } + + if (expectedLine == null || actualLine == null) { - // https://code.google.com/p/java-diff-utils/wiki/SampleUsage - List original = fileToLines(expectedFile); - List revised = fileToLines(outFile); + break; + } + } + expectedReader.close(); + actualReader.close(); + if (!localFail) + { + outFile.delete(); + } + else + { + // https://code.google.com/p/java-diff-utils/wiki/SampleUsage + List original = fileToLines(expectedFile); + List revised = fileToLines(outFile); - // Compute diff. Get the Patch object. Patch is the container for computed deltas. - Patch patch = DiffUtils.diff(original, revised); + // Compute diff. Get the Patch object. Patch is the container for computed deltas. + Patch patch = DiffUtils.diff(original, revised); - PrintStream diffPS = new PrintStream(diffFile, ENCODING); - for (Object delta : patch.getDeltas()) + PrintStream diffPS = new PrintStream(diffFile, ENCODING); + for (Object delta : patch.getDeltas()) + { + if (delta instanceof ChangeDelta) { - if (delta instanceof ChangeDelta) - { - ChangeDelta cdelta = (ChangeDelta) delta; - diffPS.println("Org: " + cdelta.getOriginal()); - diffPS.println("New: " + cdelta.getRevised()); - diffPS.println(); - } - else if (delta instanceof DeleteDelta) - { - DeleteDelta ddelta = (DeleteDelta) delta; - diffPS.println("Org: " + ddelta.getOriginal()); - diffPS.println("New: " + ddelta.getRevised()); - diffPS.println(); - } - else if (delta instanceof InsertDelta) - { - InsertDelta idelta = (InsertDelta) delta; - diffPS.println("Org: " + idelta.getOriginal()); - diffPS.println("New: " + idelta.getRevised()); - diffPS.println(); - } - else - { - diffPS.println(delta); - } + ChangeDelta cdelta = (ChangeDelta) delta; + diffPS.println("Org: " + cdelta.getOriginal()); + diffPS.println("New: " + cdelta.getRevised()); + diffPS.println(); + } + else if (delta instanceof DeleteDelta) + { + DeleteDelta ddelta = (DeleteDelta) delta; + diffPS.println("Org: " + ddelta.getOriginal()); + diffPS.println("New: " + ddelta.getRevised()); + diffPS.println(); + } + else if (delta instanceof InsertDelta) + { + InsertDelta idelta = (InsertDelta) delta; + diffPS.println("Org: " + idelta.getOriginal()); + diffPS.println("New: " + idelta.getRevised()); + diffPS.println(); + } + else + { + diffPS.println(delta); } - diffPS.close(); } - } - finally - { - document.close(); + diffPS.close(); } } // Helper method for get the file content - private static List fileToLines(File file) + private static List fileToLines(File file) throws IOException { List lines = new LinkedList(); - String line = ""; - try - { - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING)); - while ((line = in.readLine()) != null) - { - lines.add(line); - } - in.close(); - } - catch (IOException e) + String line; + + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ENCODING)); + while ((line = in.readLine()) != null) { - e.printStackTrace(); + lines.add(line); } + in.close(); + return lines; } @@ -440,6 +444,7 @@ private int findOutlineItemDestPageNum(PDDocument doc, PDOutlineItem oi) throws * must be empty. * * @throws IOException + * @throws URISyntaxException */ @Test public void testStripByOutlineItems() throws IOException, URISyntaxException @@ -604,4 +609,93 @@ public void testExtract() throws Exception fail("One or more failures, see test log for details"); } } + + @Test + public void testTabula() throws IOException + { + File pdfFile = new File("src/test/resources/pdfbox/input", "eu-001.pdf"); + File outFile = new File("target/test-output", "eu-001.pdf-tabula.txt"); + File expectedOutFile = new File("src/test/resources/pdfbox/input", "eu-001.pdf-tabula.txt"); + File diffFile = new File("target/test-output", "eu-001.pdf-tabula-diff.txt"); + PDDocument tabulaDocument = PDDocument.load(pdfFile); + PDFTextStripper tabulaStripper = new PDFTabulaTextStripper(); + + OutputStream os = new FileOutputStream(outFile); + + os.write(0xEF); + os.write(0xBB); + os.write(0xBF); + + Writer writer = new BufferedWriter(new OutputStreamWriter(os, ENCODING)); + try + { + tabulaStripper.writeText(tabulaDocument, writer); + } + finally + { + writer.close(); + } + + os.close(); + + compareResult(expectedOutFile, outFile, pdfFile, false, diffFile); + + assertFalse(bFail); + } + + private class PDFTabulaTextStripper extends PDFTextStripper + { + PDFTabulaTextStripper() throws IOException + { + // empty + } + + @Override + protected float computeFontHeight(PDFont font) throws IOException + { + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) + { + // PDFBOX-2158 and PDFBOX-3130 + // files by Salmat eSolutions / ClibPDF Library + bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536)); + } + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = bbox.getHeight() / 2; + + // sometimes the bbox has very high values, but CapHeight is OK + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) + { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 + && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = capHeight; + } + // PDFBOX-3464, PDFBOX-448: + // sometimes even CapHeight has very high value, but Ascent and Descent are ok + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 + && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) + { + glyphHeight = (ascent - descent) / 2; + } + } + + // transformPoint from glyph space -> text space + float height; + if (font instanceof PDType3Font) + { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } + else + { + height = glyphHeight / 1000; + } + + return height; + } + } } diff --git a/library/src/test/java/com/tom_roush/pdfbox/util/MatrixTest.java b/library/src/test/java/com/tom_roush/pdfbox/util/MatrixTest.java index aae3cdab..8d113a5a 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/util/MatrixTest.java +++ b/library/src/test/java/com/tom_roush/pdfbox/util/MatrixTest.java @@ -17,6 +17,7 @@ import com.tom_roush.pdfbox.cos.COSArray; import com.tom_roush.pdfbox.cos.COSFloat; +import com.tom_roush.pdfbox.cos.COSName; import org.junit.Test; import static org.junit.Assert.*; @@ -40,7 +41,105 @@ public void testConstructionAndCopy() throws Exception } @Test - public void testMultiplication() throws Exception + public void testGetScalingFactor() + { + // check scaling factor of an initial matrix + Matrix m1 = new Matrix(); + assertEquals(1, m1.getScalingFactorX(), 0); + assertEquals(1, m1.getScalingFactorY(), 0); + + // check scaling factor of an initial matrix + Matrix m2 = new Matrix(2, 4, 4, 2, 0, 0); + assertEquals((float) Math.sqrt(20), m2.getScalingFactorX(), 0); + assertEquals((float) Math.sqrt(20), m2.getScalingFactorY(), 0); + } + + @Test + public void testCreateMatrixUsingInvalidInput() + { + // anything but a COSArray is invalid and leads to an initial matrix + Matrix createMatrix = Matrix.createMatrix(COSName.A); + assertMatrixIsPristine(createMatrix); + + // a COSArray with fewer than 6 entries leads to an initial matrix + COSArray cosArray = new COSArray(); + cosArray.add(COSName.A); + createMatrix = Matrix.createMatrix(cosArray); + assertMatrixIsPristine(createMatrix); + + // a COSArray containing other kind of objects than COSNumber leads to an initial matrix + cosArray = new COSArray(); + for (int i = 0; i < 6; i++) + { + cosArray.add(COSName.A); + } + createMatrix = Matrix.createMatrix(cosArray); + assertMatrixIsPristine(createMatrix); + } + + @Test + public void testMultiplication() + { + // These matrices will not change - we use it to drive the various multiplications. + final Matrix const1 = new Matrix(); + final Matrix const2 = new Matrix(); + + // Create matrix with values + // [ 0, 1, 2 + // 1, 2, 3 + // 2, 3, 4] + for (int x = 0; x < 3; x++) + { + for (int y = 0; y < 3; y++) + { + const1.setValue(x, y, x + y); + const2.setValue(x, y, 8 + x + y); + } + } + + float[] m1MultipliedByM1 = new float[] { 5, 8, 11, 8, 14, 20, 11, 20, 29 }; + float[] m1MultipliedByM2 = new float[] { 29, 32, 35, 56, 62, 68, 83, 92, 101 }; + float[] m2MultipliedByM1 = new float[] { 29, 56, 83, 32, 62, 92, 35, 68, 101 }; + + Matrix var1 = const1.clone(); + Matrix var2 = const2.clone(); + + // Multiply two matrices together producing a new result matrix. + Matrix result = var1.multiply(var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m1MultipliedByM2, result); + + // Multiply two matrices together with the result being written to a third matrix + // (Any existing values there will be overwritten). + result = var1.multiply(var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m1MultipliedByM2, result); + + // Multiply two matrices together with the result being written into 'this' matrix + var1 = const1.clone(); + var2 = const2.clone(); + var1.concatenate(var2); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m2MultipliedByM1, var1); + + var1 = const1.clone(); + var2 = const2.clone(); + result = Matrix.concatenate(var1, var2); + assertEquals(const1, var1); + assertEquals(const2, var2); + assertMatrixValuesEqualTo(m2MultipliedByM1, result); + + // Multiply the same matrix with itself with the result being written into 'this' matrix + var1 = const1.clone(); + result = var1.multiply(var1); + assertEquals(const1, var1); + assertMatrixValuesEqualTo(m1MultipliedByM1, result); + } + + @Test + public void testOldMultiplication() throws Exception { // This matrix will not change - we use it to drive the various multiplications. final Matrix testMatrix = new Matrix(); @@ -158,6 +257,64 @@ public void testPdfbox2872() } + @Test + public void testGetValues() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + float[][] values = m.getValues(); + assertEquals(2, values[0][0], 0); + assertEquals(4, values[0][1], 0); + assertEquals(0, values[0][2], 0); + assertEquals(4, values[1][0], 0); + assertEquals(2, values[1][1], 0); + assertEquals(0, values[1][2], 0); + assertEquals(15, values[2][0], 0); + assertEquals(30, values[2][1], 0); + assertEquals(1, values[2][2], 0); + } + + @Test + public void testScaling() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + m.scale(2, 3); + // first row, multiplication with 2 + assertEquals(4, m.getValue(0, 0), 0); + assertEquals(8, m.getValue(0, 1), 0); + assertEquals(0, m.getValue(0, 2), 0); + + // second row, multiplication with 3 + assertEquals(12, m.getValue(1, 0), 0); + assertEquals(6, m.getValue(1, 1), 0); + assertEquals(0, m.getValue(1, 2), 0); + + // third row, no changes at all + assertEquals(15, m.getValue(2, 0), 0); + assertEquals(30, m.getValue(2, 1), 0); + assertEquals(1, m.getValue(2, 2), 0); + } + + @Test + public void testTranslation() + { + Matrix m = new Matrix(2, 4, 4, 2, 15, 30); + m.translate(2, 3); + // first row, no changes at all + assertEquals(2, m.getValue(0, 0), 0); + assertEquals(4, m.getValue(0, 1), 0); + assertEquals(0, m.getValue(0, 2), 0); + + // second row, no changes at all + assertEquals(4, m.getValue(1, 0), 0); + assertEquals(2, m.getValue(1, 1), 0); + assertEquals(0, m.getValue(1, 2), 0); + + // third row, translated values + assertEquals(31, m.getValue(2, 0), 0); + assertEquals(44, m.getValue(2, 1), 0); + assertEquals(1, m.getValue(2, 2), 0); + } + /** * This method asserts that the matrix values for the given {@link Matrix} object are equal to the pristine, or * original, values. @@ -190,4 +347,19 @@ private void assertMatrixValuesEqualTo(float[] values, Matrix m) } } + //Uncomment annotation to run the test + // @Test + public void testMultiplicationPerformance() { + long start = System.currentTimeMillis(); + Matrix c; + Matrix d; + for (int i=0; i<100000000; i++) { + c = new Matrix(15, 3, 235, 55, 422, 1); + d = new Matrix(45, 345, 23, 551, 66, 832); + c.multiply(d); + c.concatenate(d); + } + long stop = System.currentTimeMillis(); + System.out.println("Matrix multiplication took " + (stop - start) + "ms."); + } } diff --git a/library/src/test/java/com/tom_roush/pdfbox/util/TestDateUtil.java b/library/src/test/java/com/tom_roush/pdfbox/util/TestDateUtil.java index 2ba26f72..e51ef49a 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/util/TestDateUtil.java +++ b/library/src/test/java/com/tom_roush/pdfbox/util/TestDateUtil.java @@ -167,6 +167,10 @@ public void testDateConverter() throws Exception // PDFBOX-1219 checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33+01:00 "); + + // Same with milliseconds + checkParse(2001, 1,31,10,33, 0, +1, 0, "2001-01-31T10:33.123+01:00"); + // PDFBOX-465 checkParse(2002, 5,12, 9,47, 0, 0, 0, "9:47 5/12/2002"); // PDFBOX-465 @@ -215,6 +219,7 @@ public void testDateConverter() throws Exception checkParse(2000, 2,29, 0, 0, 0, 0, 0, "2000 Feb 29"); // valid date checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 GMT + 11:00"); // valid date + checkParse(2000, 2,29, 0, 0, 0,+11, 0, " 2000 Feb 29 UTC + 11:00"); // valid date checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2100 Feb 29 GMT+11"); // invalid date checkParse(2012, 2,29, 0, 0, 0,+11, 0, "2012 Feb 29 GMT+11"); // valid date checkParse(BAD, 0, 0, 0, 0, 0, 0, 0, "2012 Feb 30 GMT+11"); // invalid date diff --git a/library/src/test/java/com/tom_roush/pdfbox/util/TestHexUtil.java b/library/src/test/java/com/tom_roush/pdfbox/util/TestHexUtil.java index d4300b90..3e44b4f6 100644 --- a/library/src/test/java/com/tom_roush/pdfbox/util/TestHexUtil.java +++ b/library/src/test/java/com/tom_roush/pdfbox/util/TestHexUtil.java @@ -15,9 +15,12 @@ */ package com.tom_roush.pdfbox.util; +import java.io.IOException; +import java.util.Locale; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; +import static org.junit.Assert.assertArrayEquals; /** * @@ -48,17 +51,34 @@ public void testGetCharsUTF16BE() assertArrayEquals(new char[]{'5','E','2','E','5','2','A','9'}, Hex.getCharsUTF16BE("帮助")); } - private void assertArrayEquals(char[] expected, char[] actual) + /** + * Test getBytes() and getString() and decodeHex() + */ + public void testMisc() throws IOException { - assertEquals("Length of char array not equal", expected.length, actual.length); - for (int idx = 0; idx < expected.length; idx++) + byte[] byteSrcArray = new byte[256]; + for (int i = 0; i < 256; ++i) { - if (expected[idx] != actual[idx]) - { - fail(String.format("Character at index %d not equal. Expected '%c' but got '%c'", - idx, expected[idx], actual[idx])); - } + byteSrcArray[i] = (byte) i; + + byte[] bytes = Hex.getBytes((byte) i); + assertEquals(2, bytes.length); + String s2 = String.format(Locale.US, "%02X", i); + assertArrayEquals(s2.getBytes(Charsets.US_ASCII), bytes); + s2 = Hex.getString((byte) i); + assertArrayEquals(s2.getBytes(Charsets.US_ASCII), bytes); + + assertArrayEquals(new byte[]{(byte) i}, Hex.decodeHex(s2)); } + byte[] byteDstArray = Hex.getBytes(byteSrcArray); + assertEquals(byteDstArray.length, byteSrcArray.length * 2); + + String dstString = Hex.getString(byteSrcArray); + assertEquals(dstString.length(), byteSrcArray.length * 2); + + assertArrayEquals(dstString.getBytes(Charsets.US_ASCII), byteDstArray); + + assertArrayEquals(byteSrcArray, Hex.decodeHex(dstString)); } /** diff --git a/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1777.bin b/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1977.bin similarity index 100% rename from library/src/test/resources/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1777.bin rename to library/src/test/resources/pdfbox/com/tom_roush/pdfbox/filter/PDFBOX-1977.bin diff --git a/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf b/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf new file mode 100644 index 00000000..d9aa1a91 Binary files /dev/null and b/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX-3835-input-acrobat-wrap.pdf differ diff --git a/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf b/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf new file mode 100644 index 00000000..785a4393 Binary files /dev/null and b/library/src/test/resources/pdfbox/com/tom_roush/pdfbox/pdmodel/interactive/form/PDFBOX3812-acrobat-multiline-auto.pdf differ diff --git a/library/src/test/resources/pdfbox/input/eu-001.pdf b/library/src/test/resources/pdfbox/input/eu-001.pdf new file mode 100644 index 00000000..20680bda Binary files /dev/null and b/library/src/test/resources/pdfbox/input/eu-001.pdf differ diff --git a/library/src/test/resources/pdfbox/input/eu-001.pdf-sorted.txt b/library/src/test/resources/pdfbox/input/eu-001.pdf-sorted.txt new file mode 100644 index 00000000..c64993be --- /dev/null +++ b/library/src/test/resources/pdfbox/input/eu-001.pdf-sorted.txt @@ -0,0 +1,159 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) 10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) 5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) 100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) 10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Anthracene 50 1 1 +Benzene 1 000 200 (as 200 (as BTEX) BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - 200 (as 200 (as BTEX) BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) - 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - 200 (as 200 (as BTEX) BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) - 50 000 - +Xylenes - 200 (as 200 (as BTEX) BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air to water to land +kg/year kg/year kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + + diff --git a/library/src/test/resources/pdfbox/input/eu-001.pdf-tabula.txt b/library/src/test/resources/pdfbox/input/eu-001.pdf-tabula.txt new file mode 100644 index 00000000..ffdbba11 --- /dev/null +++ b/library/src/test/resources/pdfbox/input/eu-001.pdf-tabula.txt @@ -0,0 +1,209 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) +10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) +5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) +100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) +10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Anthracene 50 1 1 +Benzene 1 000 +200 (as +BTEX) +200 (as +BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - +200 (as +BTEX) +200 (as +BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) +- 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - +200 (as +BTEX) +200 (as +BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) +- 50 000 - +Xylenes - +200 (as +BTEX) +200 (as +BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + + diff --git a/library/src/test/resources/pdfbox/input/eu-001.pdf.txt b/library/src/test/resources/pdfbox/input/eu-001.pdf.txt new file mode 100644 index 00000000..7ce680d3 --- /dev/null +++ b/library/src/test/resources/pdfbox/input/eu-001.pdf.txt @@ -0,0 +1,195 @@ +E-PRTR pollutants and their thresholds + +A facility has to report data under E-PRTR if it fulfils the following criteria: +• the facility falls under at least one of the 65 E-PRTR economic activities. The +activities are also reported using a statistical classification of economic activities +(NACE rev 2) +• the facility has a capacity exceeding at least one of the E-PRTR capacity +thresholds +• the facility releases pollutants or transfers waste off-site which exceed specific +thresholds set out in Article 5 of the E-PRTR Regulation. These thresholds for +releases of pollutants are specified for each media - air, water and land - in Annex +II of the E-PRTR Regulation. + +In the following tables you will find the 91 E-PRTR pollutants and their thresholds broken +down by the 7 groups used in all the searches of the E-PRTR website. + + +Greenhouse gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Carbon dioxide (CO2) 100 million - - +Hydro-fluorocarbons (HFCs) 100 - - +Methane (CH4) 100 000 - - +Nitrous oxide (N2O) 10 000 - - +Perfluorocarbons (PFCs) 100 - - +Sulphur hexafluoride (SF6) 50 - - + +Other gases + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Ammonia (NH3) 10 000 - - +Carbon monoxide (CO) 500 000 - - +Chlorine and inorganic compounds +(as HCl) 10 000 - - +Chlorofluorocarbons (CFCs) 1 - - +Flourine and inorganic compounds +(as HF) 5 000 - - +Halons 1 - - +Hydrochlorofluorocarbons (HCFCs) 1 - - +Hydrogen Cyanide (HCN) 200 - - +Nitrogen oxides (NOx/NO2) 100 000 - - +Non-methane volatile organic +compounds (NMVOC) 100 000 - - +Sulphur oxides (SOx/SO2) 150 000 - - + +Heavy metals + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Arsenic and compounds (as As) 20 5 5 +Cadmium and compounds (as Cd) 10 5 5 +Chromium and compounds (as Cr) 100 50 50 +Copper and compounds (as Cu) 100 50 50 +Lead and compounds (as Pb) 200 20 20 +Mercury and compounds (as Hg) 10 1 1 +Nickel and compounds (as Ni) 50 20 20 +Zinc and compounds (as Zn) 200 100 100 + +Pesticides + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,2,3,4,5,6- hexachlorocyclohexane +(HCH) 10 1 1 +Alachlor - 1 1 +Aldrin 1 1 1 +Atrazine - 1 1 +Chlordane 1 1 1 +Chlordecone 1 1 1 +Chlorfenvinphos - 1 1 +Chlorpyrifos - 1 1 +DDT 1 1 1 +Diuron - 1 1 +Endosulphan - 1 1 +Endrin 1 1 1 +Heptachlor 1 1 1 +Isodrin - 1 - +Isoproturon - 1 1 +Lindane 1 1 1 +Mirex 1 1 1 +Simazine - 1 1 +Toxaphene 1 1 1 +Tributylin and compounds - 1 1 +Trifluralin - 1 1 +Triphenyltin and compounds - 1 1 + +Chlorinated organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +1,1,1-trichloroethane 100 - - +1,1,2,2-tetrachloroethane 50 - - +1,2-dichloroethane (EDC) 1 000 10 10 +Brominated diphenylethers (PBDE) - 1 1 +Chloro-alkanes, C10-C13 - 1 1 +Dichloromethane (DCM) 1 000 10 10 +Dieldrin 1 1 1 +Halogenated Organic Compounds (AOX) - 1 000 1 000 +Hexabromobifenyl 0,1 0,1 0,1 +Hexachlorobenzene (HCB) 10 1 1 +Hexachlorobutadiene (HCBD) - 1 1 +PCDD+PCFD (Dioxins+furans) (as Teq) 0,0001 0,0001 0,0001 +Pentachlorobenzene 1 1 1 +Pentachlorophenol (PCP) 10 1 1 +Polychlorinated biphenyls (PCB) 0,1 0,1 0,1 +Tetrachloroethylene (PER) 2 000 10 - +Tetrachloromethane (TCM) 100 1 - +Trichlorobenzenes (TCBs) (all isomers) 10 1 - +Trichloroethylene 2 000 10 - +Trichloromethane 500 10 - +Vynil chloride 1 000 10 10 + + +Other organic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Anthracene 50 1 1 +Benzene 1 000 200 (as BTEX) +200 (as +BTEX) +Benzo(g,h,i)perylene - 1 - +Di-(2-ethyl hexyl) phthalate (DEHP) 10 1 1 +Ethyl benzene - 200 (as BTEX) +200 (as +BTEX) +Ethylene oxide 1 000 10 10 +Fluoranthene - 1 - +Naphthalene 100 10 10 +Nonylphenol and Nonylphenol ethoxylates +(NP/NPEs) - 1 1 +Octylphenols and octylphenol ethoxylates - 1 - +Organotin compounds (as total Sn) - 50 50 +Phenols (as total C) - 20 20 +Polycyclic Aromatic hydrocarbons (PAHs) 50 5 5 +Toluene - 200 (as BTEX) +200 (as +BTEX) +Total Organic Carbon (TOC) (as total C or +COD/3) - 50 000 - +Xylenes - 200 (as BTEX) +200 (as +BTEX) + + +Inorganic substances + + THRESHOLD FOR RELEASES + to air +kg/year +to water +kg/year +to land +kg/year +Asbestos 1 1 1 +Chlorides (as total Cl) - 2 million 2 million +Cyanides (as total CN) - 50 50 +Fluorides (as total F) - 2 000 2 000 +Particulate matter (PM10) 50 000 - - +Total Nitrogen - 50 000 50 000 +Total Phosphorus - 5 000 5 000 + +