From cef71c3dbb28a9846acb7da2671f85f4a9949b43 Mon Sep 17 00:00:00 2001 From: Sylvain Jermini Date: Thu, 17 Dec 2020 16:18:52 +0100 Subject: [PATCH] more robust data uri with base64 encoded data. Handle whitespaces. --- .../openhtmltopdf/swing/NaiveUserAgent.java | 7 ++- .../com/openhtmltopdf/util/ImageUtil.java | 11 +++- .../expected/more-robust-data-uri-base64.pdf | Bin 0 -> 2440 bytes .../html/more-robust-data-uri-base64.html | 56 ++++++++++++++++++ .../VisualRegressionTest.java | 8 +++ .../pdfboxout/PdfBoxUserAgent.java | 7 +-- 6 files changed, 79 insertions(+), 10 deletions(-) create mode 100644 openhtmltopdf-examples/src/main/resources/visualtest/expected/more-robust-data-uri-base64.pdf create mode 100644 openhtmltopdf-examples/src/main/resources/visualtest/html/more-robust-data-uri-base64.html diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java index 36e92b554..3d14217b4 100644 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/swing/NaiveUserAgent.java @@ -26,7 +26,6 @@ import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; @@ -136,7 +135,7 @@ public FSStream getUrl(String url) { String data = url.substring(idxSeparator+1); byte[] res; if (url.indexOf("base64,") == idxSeparator - 6 /* 6 = "base64,".length */) { - res = Base64.getMimeDecoder().decode(data); + res = ImageUtil.fromBase64Encoded(data); } else { res = data.getBytes(StandardCharsets.UTF_8); } @@ -438,6 +437,10 @@ public static class DefaultUriResolver implements FSUriResolver { public String resolveURI(String baseUri, String uri) { if (uri == null || uri.isEmpty()) return null; + + if (uri.startsWith("data:")) { + return uri; //bypass URI "formatting" check for data uri, as we may have whitespace in the base64 encoded data + } try { URI possiblyRelative = new URI(uri); diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/ImageUtil.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/ImageUtil.java index c54d9b85e..41c27c77b 100755 --- a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/ImageUtil.java +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/ImageUtil.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.Map; import java.util.logging.Level; +import java.util.regex.Pattern; /** * Static utility methods for working with images. Meant to suggest "best practices" for the most straightforward @@ -202,7 +203,13 @@ public static BufferedImage createTransparentImage(int width, int height) { g2d.dispose(); return bi; } - + + private static final Pattern WHITE_SPACE = Pattern.compile("\\s+"); + + public static byte[] fromBase64Encoded(String b64encoded) { + return Base64.getMimeDecoder().decode(WHITE_SPACE.matcher(b64encoded).replaceAll("")); + } + /** * Get the binary content of an embedded base 64 image. * @@ -213,7 +220,7 @@ public static byte[] getEmbeddedBase64Image(String imageDataUri) { int b64Index = imageDataUri.indexOf("base64,"); if (b64Index != -1) { String b64encoded = imageDataUri.substring(b64Index + "base64,".length()); - return Base64.getMimeDecoder().decode(b64encoded); + return fromBase64Encoded(b64encoded); } else { XRLog.log(Level.SEVERE, LogMessageId.LogMessageId0Param.LOAD_EMBEDDED_DATA_URI_MUST_BE_ENCODED_IN_BASE64); } diff --git a/openhtmltopdf-examples/src/main/resources/visualtest/expected/more-robust-data-uri-base64.pdf b/openhtmltopdf-examples/src/main/resources/visualtest/expected/more-robust-data-uri-base64.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2ee1031c6c83d6ffca4e8ebb416a20a0d84dad67 GIT binary patch literal 2440 zcmai02~-nl5(X8-R+LlZ7-XFE z4l0N8W&y>xf&pkq~L8r1SEN|a8?2u3y?M;n23i8KoV?Y2auSoSVRPp z3H3n0*%?3tT!k6sGjk6NVc~=UgM}lY1;c?1ks+82+rcy#rckXQ*c^h)*AkvElq=>S z7-)eC5kVZzPry+jH^zp8^4EUmTCvsNBbx~UAg$+dMPRtCB9aJzI0%1csQB}x zAwcp+xIC6Snhb_RHV~+|sbm}K%!Ba5WMK&5=+6TZUIjSuDFUgAZ2(drB0|L&hd_Wf zBlrseg~IG^euy9zj{{*U1t59x5^&;-B(DSl4hF(Oxric05l*1QR|k>{ksPO`&Crs} z$t0izq>EQa$13m1*rN<*9CKH%h^TcI809i`+M8Z%$L>_Eo8NG)d#T^DWY~7LzW(O{AO(rp_)Nw5@QK>`I$|Vu9xh@c zmnpjeQQ#UV!*0peAGJds?! z2&B)%O7bC6iwRLhUO*fZ&q>Sk!)bo`->taZWaq|YsP(T>H83^pG~2wZr|jLrcxmbo z_V2qnh-7o66zM;*Y zhQpry%7x#1r5S$UsLF0M!*;rho_TltNb$E`ul#zR#k1z;LVjEQh$f|E&|K!Sx0o_? z(oLjNFIC&;dheUi#zl`Pc?XM?4wgXmQO^e?-x)FY6&X@KM917@K$!*$Qd2r|W9e;LvM8VM=PmkVAIjX^o0u~DShr*ON zvfsz4zPhWG*QFnrtQtW2l?ofrmfn_IE}3NA!>N5<^Zs=qH#b`!u2ZG9(O({k`*vJx zlGI}sqH<4l0_>gAIY3bt-C|gz%kRmsQ^{MnqH4ALsN-@qhm+!>ZfLA7^Zpjq)_Gz3 z52$2ymz8HsPd`?YF%S85-|Vh&HHalY?eCD|L$6e zd&WMsoM5??+hg@ON${A2bnLwT*2VcBeO-4t(n=qMM=3QVdUgiLtu7bp^vAnT6&2im zGu#q?3E$(>`c7GT`t`hu__tygn@Hw9J$ypt>QA0&l`?UPN7Jjw$ml5jp~b>7POY(1 zwt@d)L#R9B^>$0;6SaV4^V%A@?mwrO+U8i(240eDj|V$|x`S?>7U?nThk?9{((pf? zr<>>K*$?QCo~w@Ku_rVxo-2K(S7?9wnyuB!$%afc(U(zab?N#WE%_Qq%T+qDE9p`D zErGb~k%L}iZ$^4waLuss{&T)h>e|P`3PQb33`(V zAFtXrhb~wyX|2vKa2D-={S8w()4;431)KJ3te?F8am7f*IjqG-^C9Q;-Lr1?6?u+D za`9_782_g*1DrN%5_`b*M(kMzA5O`^^k?zlv`oDZ}V37 zR7&>dM2a$f8Qb%mVa1$X?t(|dlLyL3iXQQ4=z(z_KsZ?+kKt(P(}f)zP9 ziV>`(AkzdfUqFx?(OYL4stBZf#;nO05(7Y>Jpg^WKpK@wp@K2$pJXt>h>8moe3HR5 z`!8gW9TD0bJu;b~nmIBG^^k^`_>3^4zAqp|x%*t?##Y;f2nMo%QMTwbV zmJVW2P%+04H1ERz!yVo2= + + + + + +Smiley + + \ No newline at end of file diff --git a/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/VisualRegressionTest.java b/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/VisualRegressionTest.java index 4a4f333a4..b83beb283 100644 --- a/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/VisualRegressionTest.java +++ b/openhtmltopdf-examples/src/test/java/com/openhtmltopdf/visualregressiontests/VisualRegressionTest.java @@ -1332,6 +1332,14 @@ public void testIssue615InfiniteLoopBreakWord() throws IOException { assertTrue(vt.runTest("issue-615-infinite-loop-break-word")); } + /** + * Test that we allow whitespaces inside the base64 encoded data uri. + */ + @Test + public void testMoreRobustDataUriBase64() throws IOException { + assertTrue(vt.runTest("more-robust-data-uri-base64")); + } + // TODO: // + Elements that appear just on generated overflow pages. // + content property (page counters, etc) diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java index eb43d4245..55b5ab728 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxUserAgent.java @@ -23,14 +23,12 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.net.URI; import java.util.Locale; import java.util.logging.Level; import com.openhtmltopdf.layout.SharedContext; import com.openhtmltopdf.resource.ImageResource; import com.openhtmltopdf.swing.NaiveUserAgent; -import com.openhtmltopdf.util.ImageUtil; import com.openhtmltopdf.util.LogMessageId; import com.openhtmltopdf.util.XRLog; @@ -77,10 +75,7 @@ public ImageResource getImageResource(String uriStr) { if (is != null) { try { - URI uri = new URI(uriStr); - if (uri.getPath() != null - && uri.getPath().toLowerCase(Locale.US) - .endsWith(".pdf")) { + if (uriStr.toLowerCase(Locale.US).endsWith(".pdf")) { // TODO: Implement PDF AS IMAGE // PdfReader reader = _outputDevice.getReader(uri); // PDFAsImage image = new PDFAsImage(uri);