elastic · droberts195 · Aug 29, 2018 · Aug 29, 2018
diff --git a/...rc/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java b/...rc/main/java/org/elasticsearch/xpack/ml/logstructurefinder/LogStructureFinderManager.java
@@ -163,9 +163,15 @@ CharsetMatch findCharset(List<String> explanation, InputStream inputStream) thro
                 // deduction algorithms on binary files is very slow as the binary files generally appear to
                 // have very long lines.
                 boolean spaceEncodingContainsZeroByte = false;
-                byte[] spaceBytes = " ".getBytes(name);
-                for (int i = 0; i < spaceBytes.length && spaceEncodingContainsZeroByte == false; ++i) {
-                    spaceEncodingContainsZeroByte = (spaceBytes[i] == 0);
+                Charset charset = Charset.forName(name);
+                // Some character sets cannot be encoded.  These are extremely rare so it's likely that
+                // they've been chosen based on incorrectly provided binary data.  Therefore, err on
+                // the side of rejecting binary data.
+                if (charset.canEncode()) {
+                    byte[] spaceBytes = " ".getBytes(charset);
+                    for (int i = 0; i < spaceBytes.length && spaceEncodingContainsZeroByte == false; ++i) {
+                        spaceEncodingContainsZeroByte = (spaceBytes[i] == 0);
+                    }
                 }
                 if (containsZeroBytes && spaceEncodingContainsZeroByte == false) {
                     explanation.add("Character encoding [" + name + "] matched the input with [" + charsetMatch.getConfidence() +