Skip to content

Commit

Permalink
Fixes the ArrayTypeException in the Bunsen code. (#202)
Browse files Browse the repository at this point in the history
Fixes #156
  • Loading branch information
bashir2 authored Aug 23, 2021
1 parent 2dc82c1 commit 1009683
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,12 @@
import com.cerner.bunsen.definitions.StringConverter;
import com.cerner.bunsen.definitions.StructureField;
import com.google.common.collect.ImmutableMap;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.JsonProperties;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
Expand Down Expand Up @@ -75,30 +71,17 @@ public Schema getDataType() {
}
};

private static final LogicalTypes.Decimal DECIMAL_PRECISION
= LogicalTypes.decimal(12, 4);
private static final Schema DOUBLE_SCHEMA = Schema.create(Type.DOUBLE);

private static final Schema DECIMAL_SCHEMA =
DECIMAL_PRECISION.addToSchema(Schema.create(Type.BYTES));

private static final HapiConverter DECIMAL_CONVERTER = new PrimitiveConverter<Schema>("Decimal") {

private final Conversion<BigDecimal> conversion = new Conversions.DecimalConversion();

@Override
public void toHapi(Object input, IPrimitiveType primitive) {

primitive.setValue(input);
}

protected Object fromHapi(IPrimitiveType primitive) {

return primitive.getValue();
}
// We cannot use Avro logical type `decimal` to represent FHIR `decimal` type. The reason is that
// Avro `decimal` type has a fixed scale and a maximum precision but with a fixed scale we have
// no guarantees on the precision of the FHIR `decimal` type. See this for more details:
// https://github.com/GoogleCloudPlatform/openmrs-fhir-analytics/issues/156#issuecomment-880964207
private static final HapiConverter DOUBLE_CONVERTER = new PrimitiveConverter<Schema>("Double") {

@Override
public Schema getDataType() {
return DECIMAL_SCHEMA;
return DOUBLE_SCHEMA;
}
};

Expand All @@ -116,7 +99,7 @@ public Schema getDataType() {
.put("string", STRING_CONVERTER)
.put("oid", STRING_CONVERTER)
.put("xhtml", STRING_CONVERTER)
.put("decimal", DECIMAL_CONVERTER)
.put("decimal", DOUBLE_CONVERTER)
.put("integer", INTEGER_CONVERTER)
.put("unsignedInt", INTEGER_CONVERTER)
.put("positiveInt", INTEGER_CONVERTER)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import com.cerner.bunsen.avro.AvroConverter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Conversions.DecimalConversion;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
Expand Down Expand Up @@ -182,14 +181,7 @@ synchronized public void write(Resource resource) throws IOException {
createWriter(resourceType);
}
final ParquetWriter<GenericRecord> parquetWriter = writerMap.get(resourceType);
try {
parquetWriter.write(this.convertToAvro(resource));
}
catch (AvroTypeException e) {
// TODO fix the root cause of this exception! Check the unit-test that exposes this.
log.error(String.format("Dropping %s resource with id %s due to exception: %s ", resource.fhirType(),
resource.getId(), e));
}
parquetWriter.write(this.convertToAvro(resource));
}

synchronized private void flush(String resourceType) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,8 @@ public void convertObservationWithBigDecimalValue() throws IOException {
}

/**
* This is the test to demonstrate the BigDecimal conversion bug. If we remove the try/catch for
* AvroTypeException in `ParquetUtil.write` this test will fail. Note the previous test shows the
* conversion of such record is fine.
* This is the test to demonstrate the BigDecimal conversion bug. See:
* https://github.com/GoogleCloudPlatform/openmrs-fhir-analytics/issues/156
*/
@Test
public void writeObservationWithBigDecimalValue() throws IOException {
Expand All @@ -239,4 +238,18 @@ public void writeObservationWithBigDecimalValue() throws IOException {
Observation observation = parser.parseResource(Observation.class, observationStr);
parquetUtil.write(observation);
}

/**
* This is similar to the above test but has more `decimal` examples with different scales.
*/
@Test
public void writeObservationBundleWithDecimalConversionIssue() throws IOException {
initilizeLocalFileSystem();
parquetUtil = new ParquetUtil(rootPath.toString(), 0, 0, "", fileSystem);
String observationBundleStr = Resources.toString(Resources.getResource("observation_decimal_bundle.json"),
StandardCharsets.UTF_8);
IParser parser = fhirContext.newJsonParser();
Bundle bundle = parser.parseResource(Bundle.class, observationBundleStr);
parquetUtil.writeRecords(bundle);
}
}
188 changes: 188 additions & 0 deletions pipelines/common/src/test/resources/observation_decimal_bundle.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
{
"resourceType": "Bundle",
"id": "bf3bcfcc-2b34-45e3-8a60-ef04ef7060ef",
"meta": {
"lastUpdated": "2020-09-30T14:04:02.282-04:00",
"tag": [
{
"system": "http://hl7.org/fhir/v3/ObservationValue",
"code": "SUBSETTED",
"display": "Resource encoded in summary mode"
}
]
},
"type": "searchset",
"total": 4,
"link": [
{
"relation": "self",
"url": "http://localhost:9020/openmrs/ws/fhir2/R4/Observation?_count=10&_getpagesoffset=0&_summary=data"
}
],
"entry": [
{
"fullUrl": "http://localhost:9020/openmrs/ws/fhir2/R4/Observation/2c98a790-1a48-4b75-b458-ceb2d51f5df1",
"resource": {
"resourceType": "Observation",
"id": "2c98a790-1a48-4b75-b458-ceb2d51f5df1",
"status": "final",
"category": [
{
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "laboratory",
"display": "Laboratory"
}
]
}
],
"code": {
"coding": [
{
"code": "a898c87a-1350-11df-a1f1-0026b9348838",
"display": "BODY SURFACE AREA"
}
]
},
"subject": {
"reference": "Patient/7_SOME_PATIENT_REF",
"type": "Patient",
"display": "NO_NAME"
},
"encounter": {
"reference": "Encounter/8_SOME_ENC_REF",
"type": "Encounter"
},
"effectiveDateTime": "2021-04-16T11:12:33+03:00",
"issued": "2021-04-16T11:13:33.000+03:00",
"valueQuantity": {
"value": 1.8287822299126937
}
}
},
{
"fullUrl": "http://localhost:9020/openmrs/ws/fhir2/R4/Observation/2c98a790-1a48-4b75-b458-ceb2d51f5df2",
"resource": {
"resourceType": "Observation",
"id": "2c98a790-1a48-4b75-b458-ceb2d51f5df2",
"status": "final",
"category": [
{
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "laboratory",
"display": "Laboratory"
}
]
}
],
"code": {
"coding": [
{
"code": "a898c87a-1350-11df-a1f1-0026b9348838",
"display": "BODY SURFACE AREA"
}
]
},
"subject": {
"reference": "Patient/7_SOME_PATIENT_REF",
"type": "Patient",
"display": "NO_NAME"
},
"encounter": {
"reference": "Encounter/8_SOME_ENC_REF",
"type": "Encounter"
},
"effectiveDateTime": "2021-04-16T11:12:33+03:00",
"issued": "2021-04-16T11:13:33.000+03:00",
"valueQuantity": {
"value": 182878222991.26937
}
}
},
{
"fullUrl": "http://localhost:9020/openmrs/ws/fhir2/R4/Observation/2c98a790-1a48-4b75-b458-ceb2d51f5df3",
"resource": {
"resourceType": "Observation",
"id": "2c98a790-1a48-4b75-b458-ceb2d51f5df3",
"status": "final",
"category": [
{
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "laboratory",
"display": "Laboratory"
}
]
}
],
"code": {
"coding": [
{
"code": "a898c87a-1350-11df-a1f1-0026b9348838",
"display": "BODY SURFACE AREA"
}
]
},
"subject": {
"reference": "Patient/7_SOME_PATIENT_REF",
"type": "Patient",
"display": "NO_NAME"
},
"encounter": {
"reference": "Encounter/8_SOME_ENC_REF",
"type": "Encounter"
},
"effectiveDateTime": "2021-04-16T11:12:33+03:00",
"issued": "2021-04-16T11:13:33.000+03:00",
"valueQuantity": {
"value": 182878222991269.37
}
}
},
{
"fullUrl": "http://localhost:9020/openmrs/ws/fhir2/R4/Observation/2c98a790-1a48-4b75-b458-ceb2d51f5df4",
"resource": {
"resourceType": "Observation",
"id": "2c98a790-1a48-4b75-b458-ceb2d51f5df4",
"status": "final",
"category": [
{
"coding": [
{
"system": "http://terminology.hl7.org/CodeSystem/observation-category",
"code": "laboratory",
"display": "Laboratory"
}
]
}
],
"code": {
"coding": [
{
"code": "a898c87a-1350-11df-a1f1-0026b9348838",
"display": "BODY SURFACE AREA"
}
]
},
"subject": {
"reference": "Patient/7_SOME_PATIENT_REF",
"type": "Patient",
"display": "NO_NAME"
},
"encounter": {
"reference": "Encounter/8_SOME_ENC_REF",
"type": "Encounter"
},
"effectiveDateTime": "2021-04-16T11:12:33+03:00",
"issued": "2021-04-16T11:13:33.000+03:00",
"valueQuantity": {
"value": 18287822299126937
}
}
}
]
}

0 comments on commit 1009683

Please sign in to comment.