From c64269cf9c33f9b41f653df230167ac7f8155ba7 Mon Sep 17 00:00:00 2001 From: Daniel Kec Date: Wed, 17 Jan 2024 15:35:33 +0100 Subject: [PATCH] Load optional UTF-32 charsets lazily #121 --- .../parsson/UnicodeDetectingInputStream.java | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/impl/src/main/java/org/eclipse/parsson/UnicodeDetectingInputStream.java b/impl/src/main/java/org/eclipse/parsson/UnicodeDetectingInputStream.java index 740cb206..c3571ea0 100644 --- a/impl/src/main/java/org/eclipse/parsson/UnicodeDetectingInputStream.java +++ b/impl/src/main/java/org/eclipse/parsson/UnicodeDetectingInputStream.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2021 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2024 Oracle and/or its affiliates. All rights reserved. * * This program and the accompanying materials are made available under the * terms of the Eclipse Public License v. 2.0, which is available at @@ -30,10 +30,6 @@ * @author Jitendra Kotamraju */ class UnicodeDetectingInputStream extends FilterInputStream { - - private static final Charset UTF_32LE = Charset.forName("UTF-32LE"); - private static final Charset UTF_32BE = Charset.forName("UTF-32BE"); - private static final byte FF = (byte)0xFF; private static final byte FE = (byte)0xFE; private static final byte EF = (byte)0xEF; @@ -41,6 +37,9 @@ class UnicodeDetectingInputStream extends FilterInputStream { private static final byte BF = (byte)0xBF; private static final byte NUL = (byte)0x00; + private static Charset utf32Le; + private static Charset utf32Be; + private final byte[] buf = new byte[4]; private int bufLen; private int curIndex; @@ -108,10 +107,10 @@ private Charset detectEncoding() { // Use BOM to detect encoding if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) { curIndex = 4; - return UTF_32BE; + return getUtf32be(); } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) { curIndex = 4; - return UTF_32LE; + return getUtf32le(); } else if (buf[0] == FE && buf[1] == FF) { curIndex = 2; return StandardCharsets.UTF_16BE; @@ -124,11 +123,11 @@ private Charset detectEncoding() { } // No BOM, just use JSON RFC's encoding algo to auto-detect if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) { - return UTF_32BE; + return getUtf32be(); } else if (buf[0] == NUL && buf[2] == NUL) { return StandardCharsets.UTF_16BE; } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) { - return UTF_32LE; + return getUtf32le(); } else if (buf[1] == NUL && buf[3] == NUL) { return StandardCharsets.UTF_16LE; } @@ -136,6 +135,20 @@ private Charset detectEncoding() { return StandardCharsets.UTF_8; } + private static Charset getUtf32be() { + if (utf32Be == null) { + utf32Be = Charset.forName("UTF-32BE"); + } + return utf32Be; + } + + private static Charset getUtf32le() { + if (utf32Le == null) { + utf32Le = Charset.forName("UTF-32LE"); + } + return utf32Le; + } + @Override public int read() throws IOException { if (curIndex < bufLen) {