From c0c2f72b439c4ebe978bb67a14ad6ab7302ecf04 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 20 Nov 2019 16:22:38 +0000 Subject: [PATCH] encoding for to assist GBIF SE --- .travis.yml | 1 + pom.xml | 2 +- .../ala/layers/intersect/SimpleShapeFile.java | 24 ++++++++++++------- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.travis.yml b/.travis.yml index ce4e71e0..ff303677 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,7 @@ jdk: branches: only: - master + - encoding-fix - spring3 before_install: - mkdir -p ~/.m2; wget -q -O ~/.m2/settings.xml https://raw.githubusercontent.com/AtlasOfLivingAustralia/travis-build-configuration/master/travis_maven_settings.xml diff --git a/pom.xml b/pom.xml index ca6352ac..ec361ebf 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ layers-store jar - 2.0.2 + 2.0.3-SNAPSHOT ALA Spatial Layers Store http://www.ala.org.au diff --git a/src/main/java/au/org/ala/layers/intersect/SimpleShapeFile.java b/src/main/java/au/org/ala/layers/intersect/SimpleShapeFile.java index a2e2547b..12ab3354 100644 --- a/src/main/java/au/org/ala/layers/intersect/SimpleShapeFile.java +++ b/src/main/java/au/org/ala/layers/intersect/SimpleShapeFile.java @@ -20,6 +20,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Comparator; import java.util.List; @@ -27,6 +28,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + /** * SimpleShapeFile is a representation of a Shape File for * intersections with points @@ -84,9 +87,6 @@ public class SimpleShapeFile extends Object implements Serializable { protected SimpleShapeFile() { } - public static String getDBFEncoding(){ - return System.getProperty("dbf.encoding", "UTF-8"); - } /** * Constructor for a SimpleShapeFile, requires .dbf and .shp files present @@ -1506,6 +1506,12 @@ class DBFField extends Object implements Serializable { byte[] data; //placeholder for reading byte blocks /* don't care autoinc */ + + static String convertToUTF8(String myString){ + byte[] ptext = myString.getBytes(StandardCharsets.UTF_8); + return new String(ptext, StandardCharsets.UTF_8); + } + /** * constructor for DBFField with first byte separated from * rest of the data structure @@ -1523,7 +1529,7 @@ public DBFField(byte firstbyte, ByteBuffer buffer) { ba[i] = buffer.get(); } try { - name = (new String(ba, SimpleShapeFile.getDBFEncoding())).trim().toUpperCase(); + name = convertToUTF8(new String(ba, "ISO-8859-1").trim().toUpperCase()); } catch (Exception e) { logger.error(e.getMessage(), e); } @@ -1531,7 +1537,7 @@ public DBFField(byte firstbyte, ByteBuffer buffer) { byte[] ba2 = new byte[1]; ba2[0] = buffer.get(); try { - type = (new String(ba2, SimpleShapeFile.getDBFEncoding())).charAt(0); + type = convertToUTF8(new String(ba2, "ISO-8859-1").trim()).charAt(0); } catch (Exception e) { logger.error(e.getMessage(), e); } @@ -1769,10 +1775,10 @@ record = new String[fields.size()]; try { switch (f.getType()) { case 'C': //string - record[i] = (new String(data, SimpleShapeFile.getDBFEncoding())).trim(); + record[i] = new String(new String(data, "ISO-8859-1").trim().getBytes("ISO-8859-1"), StandardCharsets.UTF_8); break; case 'N': //number as string - record[i] = (new String(data, SimpleShapeFile.getDBFEncoding())).trim(); + record[i] = new String(new String(data, "ISO-8859-1").trim().getBytes("ISO-8859-1"), StandardCharsets.UTF_8); break; } } catch (Exception e) { @@ -1795,10 +1801,10 @@ record = mergeColumns ? new String[1] : new String[columnIdx.length]; try { switch (f.getType()) { case 'C': //string - fieldValues[i] = (new String(data, SimpleShapeFile.getDBFEncoding())).trim(); + fieldValues[i] = DBFField.convertToUTF8(new String(data, "ISO-8859-1").trim()); break; case 'N': //number as string - fieldValues[i] = (new String(data, SimpleShapeFile.getDBFEncoding())).trim(); + fieldValues[i] = DBFField.convertToUTF8(new String(data, "ISO-8859-1").trim()); break; } } catch (Exception e) {