Skip to content

Commit

Permalink
Merge pull request #603 from jamesmudd/write-strings
Browse files Browse the repository at this point in the history
Add String writing support
  • Loading branch information
jamesmudd authored Aug 8, 2024
2 parents c08e128 + af6f494 commit 9eda9ea
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 15 deletions.
9 changes: 8 additions & 1 deletion jhdf/src/main/java/io/jhdf/WritableDatasetImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

import static io.jhdf.Utils.flatten;
import static io.jhdf.Utils.stripLeadingIndex;
import static org.apache.commons.lang3.ClassUtils.primitiveToWrapper;

public class WritableDatasetImpl extends AbstractWritableNode implements WritiableDataset {

Expand Down Expand Up @@ -130,7 +131,13 @@ public Object getData(long[] sliceOffset, int[] sliceDimensions) {

@Override
public Class<?> getJavaType() {
return Utils.getArrayType(data);
final Class<?> type = dataType.getJavaType();
// For scalar datasets the returned type will be the wrapper class because
// getData returns Object
if (isScalar() && type.isPrimitive()) {
return primitiveToWrapper(type);
}
return type;
}

@Override
Expand Down
3 changes: 1 addition & 2 deletions jhdf/src/main/java/io/jhdf/api/WritableAttributeImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
*/
package io.jhdf.api;

import io.jhdf.Utils;
import io.jhdf.object.datatype.DataType;
import io.jhdf.object.message.DataSpace;

Expand Down Expand Up @@ -54,7 +53,7 @@ public long getSizeInBytes() {

@Override
public int[] getDimensions() {
return Utils.getDimensions(data);
return dataSpace.getDimensions();
}

@Override
Expand Down
4 changes: 3 additions & 1 deletion jhdf/src/main/java/io/jhdf/object/datatype/DataType.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public static DataType readDataType(ByteBuffer bb) {
return new FloatingPoint(bb);
case 2: // Time
throw new UnsupportedHdfException("Time data type is not yet supported");
case 3: // String
case StringData.CLASS_ID: // String
return new StringData(bb);
case 4: // Bit field
return new BitField(bb);
Expand Down Expand Up @@ -109,6 +109,8 @@ public static DataType fromObject(Object data) {
return FloatingPoint.FLOAT;
} else if (type == double.class || type == Double.class) {
return FloatingPoint.DOUBLE;
} else if (type == String.class) {
return StringData.create(data);
} else {
throw new HdfException("Could not create DataType for: " + type);
}
Expand Down
112 changes: 107 additions & 5 deletions jhdf/src/main/java/io/jhdf/object/datatype/StringData.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,23 @@
*/
package io.jhdf.object.datatype;

import io.jhdf.BufferBuilder;
import io.jhdf.Utils;
import io.jhdf.exceptions.HdfException;
import io.jhdf.storage.HdfBackingStorage;
import io.jhdf.storage.HdfFileChannel;

import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Objects;

import static io.jhdf.Constants.NULL;
import static io.jhdf.Constants.SPACE;
import static io.jhdf.Utils.stripLeadingIndex;
import static java.nio.charset.StandardCharsets.US_ASCII;

/**
* Data type representing strings.
Expand All @@ -29,6 +34,7 @@
*/
public class StringData extends DataType {

public static final int CLASS_ID = 3;
private final PaddingType paddingType;

private final Charset charset;
Expand Down Expand Up @@ -56,14 +62,16 @@ private static void fillFixedLengthStringData(Object data, int[] dims, ByteBuffe
}

public enum PaddingType {
NULL_TERMINATED(new NullTerminated()),
NULL_PADDED(new NullPadded()),
SPACE_PADDED(new SpacePadded());
NULL_TERMINATED(new NullTerminated(), 0),
NULL_PADDED(new NullPadded(), 1),
SPACE_PADDED(new SpacePadded(), 2);

private final StringPaddingHandler stringPaddingHandler;
private final int id;

PaddingType(StringPaddingHandler stringPaddingHandler) {
PaddingType(StringPaddingHandler stringPaddingHandler, int id) {
this.stringPaddingHandler = stringPaddingHandler;
this.id = id;
}
}

Expand All @@ -88,7 +96,7 @@ public StringData(ByteBuffer bb) {
final int charsetIndex = Utils.bitsToInt(classBits, 4, 4);
switch (charsetIndex) {
case 0:
charset = StandardCharsets.US_ASCII;
charset = US_ASCII;
break;
case 1:
charset = StandardCharsets.UTF_8;
Expand Down Expand Up @@ -156,6 +164,100 @@ public void setBufferLimit(ByteBuffer byteBuffer) {
}
}

public static StringData create(Object data) {
int maxLength = Arrays.stream(Utils.flatten(data))
.map(String.class::cast)
.mapToInt(String::length)
.max().getAsInt();

return new StringData(PaddingType.NULL_TERMINATED, StandardCharsets.UTF_8, maxLength);
}

private StringData(PaddingType paddingType, Charset charset, int maxLength) {
super(CLASS_ID, maxLength + 1); // +1 for padding
this.paddingType = paddingType;
this.charset = charset;
}

@Override
public ByteBuffer toBuffer() {
Utils.writeIntToBits(paddingType.id, classBits, 0, 4);
Utils.writeIntToBits(1, classBits, 4, 4); // Always UTF8
return super.toBufferBuilder().build();
}

@Override
public void writeData(Object data, int[] dimensions, HdfFileChannel hdfFileChannel) {
if (data.getClass().isArray()) {
final int fastDimSize = dimensions[dimensions.length - 1];
final ByteBuffer buffer = ByteBuffer.allocate(fastDimSize * getSize());
writeArrayData(data, dimensions, buffer, hdfFileChannel);
} else {
writeScalarData(data, hdfFileChannel);
}
}

private void writeScalarData(Object data, HdfFileChannel hdfFileChannel) {
ByteBuffer buffer = encodeScalarData(data);
buffer.rewind();
hdfFileChannel.write(buffer);
}

private void writeArrayData(Object data, int[] dims, ByteBuffer buffer, HdfFileChannel hdfFileChannel) {
if (dims.length > 1) {
for (int i = 0; i < dims[0]; i++) {
Object newArray = Array.get(data, i);
writeArrayData(newArray, stripLeadingIndex(dims), buffer, hdfFileChannel);
}
} else {
String[] strings = (String[]) data;
for (int i = 0; i < strings.length; i++) {
String str = strings[i];
buffer.put(charset.encode(str))
.put(NULL)
.position((i + 1) * getSize());
}
buffer.rewind();
hdfFileChannel.write(buffer);
buffer.clear();
}
}

private ByteBuffer encodeScalarData(Object data) {
return new BufferBuilder()
.writeBuffer(charset.encode((String) data))
.writeByte(NULL)
.build();
}

@Override
public ByteBuffer encodeData(Object data) {
Objects.requireNonNull(data, "Cannot encode null");

if (data.getClass().isArray()) {
final int[] dimensions = Utils.getDimensions(data);
final int totalElements = Arrays.stream(dimensions).reduce(1, Math::multiplyExact);
final ByteBuffer buffer = ByteBuffer.allocate(totalElements * getSize());
encodeDataInternal(data, dimensions, buffer);
return buffer;
} else {
return encodeScalarData(data);
}
}

private void encodeDataInternal(Object data, int[] dims, ByteBuffer buffer) {
if (dims.length > 1) {
for (int i = 0; i < dims[0]; i++) {
Object newArray = Array.get(data, i);
encodeDataInternal(newArray, stripLeadingIndex(dims), buffer);
}
} else {
for (String str : (String[]) data) {
buffer.put(this.charset.encode(str)).put(NULL);
}
}
}

@Override
public String toString() {
return "StringData{" +
Expand Down
21 changes: 19 additions & 2 deletions jhdf/src/test/java/io/jhdf/TestUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ public static double[] toDoubleArray(Object data) {
.toArray();
}

public static String[] toStringArray(Object data) {
return Arrays.stream(Utils.flatten(data))
.map(el -> el.toString())
.toArray(String[]::new);
}

public static void compareGroups(Group group1, Group group2) {
logger.info("Comparing groups [{}]", group1.getPath());

Expand All @@ -72,12 +78,23 @@ private static void compareAttributes(Attribute attribute1, Attribute attribute2
logger.info("Comparing attribute [{}] on node [{}]", attribute1.getName(), attribute1.getNode().getPath());
assertThat(attribute1.getName(), is(equalTo(attribute2.getName())));
assertThat(attribute1.getDimensions(), is(equalTo(attribute2.getDimensions())));
assertArrayEquals(toDoubleArray(attribute1.getData()), toDoubleArray(attribute2.getData()), 0.002);
assertThat(attribute1.getJavaType(), is(equalTo(attribute2.getJavaType())));
if(attribute1.getJavaType() == String.class) {
assertArrayEquals(toStringArray(attribute1.getData()), toStringArray(attribute2.getData()));
} else {
assertArrayEquals(toDoubleArray(attribute1.getData()), toDoubleArray(attribute2.getData()), 0.002);
}
}

private static void compareDatasets(Dataset dataset1, Dataset dataset2) {
logger.info("Comparing dataset2 [{}] on node [{}]", dataset1.getName(), dataset1.getPath());
assertThat(dataset1.getName(), is(equalTo(dataset2.getName())));
assertThat(dataset1.getDimensions(), is(equalTo(dataset2.getDimensions())));
assertArrayEquals(toDoubleArray(dataset1.getData()), toDoubleArray(dataset2.getData()), 0.002);
assertThat(dataset1.getJavaType(), is(equalTo(dataset2.getJavaType())));
if(dataset1.getJavaType() == String.class) {
assertArrayEquals(toStringArray(dataset1.getData()), toStringArray(dataset2.getData()));
} else {
assertArrayEquals(toDoubleArray(dataset1.getData()), toDoubleArray(dataset2.getData()), 0.002);
}
}
}
7 changes: 6 additions & 1 deletion jhdf/src/test/java/io/jhdf/h5dump/DataXml.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,16 @@
import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty;
import org.apache.commons.lang3.StringUtils;

import java.util.regex.Pattern;

public class DataXml {
private static final Pattern PATTERN = Pattern.compile("[\\n\\s\"]{2,}");
@JacksonXmlProperty(localName = "DataFromFile")
String dataString;

public String[] getData() {
return StringUtils.split(dataString);
return PATTERN.splitAsStream(dataString)
.filter(StringUtils::isNotBlank)
.toArray(String[]::new);
}
}
14 changes: 11 additions & 3 deletions jhdf/src/test/java/io/jhdf/h5dump/H5Dump.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.concurrent.TimeUnit;

import static io.jhdf.TestUtils.toDoubleArray;
import static io.jhdf.TestUtils.toStringArray;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.blankOrNullString;
import static org.hamcrest.Matchers.equalTo;
Expand Down Expand Up @@ -84,14 +85,21 @@ private static void compareAttributes(AttributeXml attributeXml, Attribute attri
logger.info("Comparing attribute [{}] on node [{}]", attribute.getName(), attribute.getNode().getPath());
assertThat(attributeXml.name, is(equalTo(attribute.getName())));
assertThat(attributeXml.getDimensions(), is(equalTo(attribute.getDimensions())));
assertArrayEquals(toDoubleArray(attributeXml.getData()), toDoubleArray(attribute.getData()), 0.002);
}
if(attribute.getJavaType() == String.class) {
assertArrayEquals(toStringArray(attributeXml.getData()), toStringArray(attribute.getData()));
} else {
assertArrayEquals(toDoubleArray(attributeXml.getData()), toDoubleArray(attribute.getData()), 0.002);
} }

private static void compareDatasets(DatasetXml datasetXml, Dataset dataset) {
logger.info("Comparing dataset [{}] on node [{}]", dataset.getName(), dataset.getPath());
assertThat(datasetXml.getObjectId(), is(equalTo(dataset.getAddress())));
assertThat(datasetXml.getDimensions(), is(equalTo(dataset.getDimensions())));
assertArrayEquals(toDoubleArray(datasetXml.getData()), toDoubleArray(dataset.getData()), 0.002);
if(dataset.getJavaType() == String.class) {
assertArrayEquals(toStringArray(datasetXml.getData()), toStringArray(dataset.getData()));
} else {
assertArrayEquals(toDoubleArray(datasetXml.getData()), toDoubleArray(dataset.getData()), 0.002);
}
}


Expand Down
Loading

0 comments on commit 9eda9ea

Please sign in to comment.