From d1815f9d8022239e9364d40605a8aeac92ebd368 Mon Sep 17 00:00:00 2001 From: Andrew Wen Date: Wed, 15 Feb 2023 09:08:40 -0600 Subject: [PATCH] dont add medtagger prefix as it messes with legacy configs --- pom.xml | 2 +- .../backbone/MedTaggerBackboneTransform.java | 20 ++++++++-------- ...MedTaggerOutputToOHDSIFormatTransform.java | 24 +++++++++---------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pom.xml b/pom.xml index 2136ed9..0d6d6f7 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.ohnlp.medtagger medtagger - 1.0.51 + 1.0.52 The MedTagger biomedical information extraction pipeline diff --git a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java index fca3cbe..7c88537 100644 --- a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java +++ b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java @@ -72,16 +72,16 @@ public void initFromConfig(JsonNode config) throws ComponentInitializationExcept @Override public Schema calculateOutputSchema(Schema schema) { List fields = new ArrayList<>(schema.getFields()); - fields.add(Schema.Field.of("medtagger_matched_text", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_concept_code", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_matched_sentence", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_section_id", Schema.FieldType.INT32)); - fields.add(Schema.Field.of("medtagger_nlp_run_dtm", Schema.FieldType.DATETIME)); - fields.add(Schema.Field.of("medtagger_certainty", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_experiencer", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_status", Schema.FieldType.STRING)); - fields.add(Schema.Field.of("medtagger_offset", Schema.FieldType.INT32)); - fields.add(Schema.Field.of("medtagger_semgroups", Schema.FieldType.STRING).withNullable(true)); + fields.add(Schema.Field.of("matched_text", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("concept_code", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("matched_sentence", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("section_id", Schema.FieldType.INT32)); + fields.add(Schema.Field.of("nlp_run_dtm", Schema.FieldType.DATETIME)); + fields.add(Schema.Field.of("certainty", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("experiencer", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("status", Schema.FieldType.STRING)); + fields.add(Schema.Field.of("offset", Schema.FieldType.INT32)); + fields.add(Schema.Field.of("semgroups", Schema.FieldType.STRING).withNullable(true)); this.outputSchema = Schema.of(fields.toArray(new Schema.Field[0])); return this.outputSchema; } diff --git a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerOutputToOHDSIFormatTransform.java b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerOutputToOHDSIFormatTransform.java index 976d7fa..aead1c9 100644 --- a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerOutputToOHDSIFormatTransform.java +++ b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerOutputToOHDSIFormatTransform.java @@ -99,21 +99,21 @@ public void processElement(@Element Row input, OutputReceiver output) throw // Generate an output row Row.Builder rowBuild = Row.withSchema(schema) .addValues(input.getValues()) - .addValue(input.getInt32("medtagger_section_id")) - .addValue(input.getString("medtagger_matched_text")) - .addValue(input.getString("medtagger_matched_sentence")); + .addValue(input.getInt32("section_id")) + .addValue(input.getString("matched_text")) + .addValue(input.getString("matched_sentence")); switch (resources.toUpperCase(Locale.ROOT)) { case "NONE": { try { - rowBuild = rowBuild.addValue(Integer.valueOf(Optional.ofNullable(input.getString("medtagger_concept_code")).orElse("0"))); + rowBuild = rowBuild.addValue(Integer.valueOf(Optional.ofNullable(input.getString("concept_code")).orElse("0"))); } catch (NumberFormatException e) { throw new IllegalArgumentException("OHDSI requires integer concept codes, value " - + input.getString("medtagger_concept_code") + " was instead provided with mapping ruleset 'NONE'"); + + input.getString("concept_code") + " was instead provided with mapping ruleset 'NONE'"); } break; } case "UMLS": { - String conceptCode = input.getString("medtagger_concept_code"); + String conceptCode = input.getString("concept_code"); // Only take first portion as CUI, remainder is top freq lexeme in current dict format. String cui = conceptCode.contains(":") ? conceptCode.split(":")[0].toUpperCase(Locale.ROOT) : conceptCode.toUpperCase(Locale.ROOT); @@ -121,20 +121,20 @@ public void processElement(@Element Row input, OutputReceiver output) throw rowBuild = rowBuild.addValue(ohdsicid); } default: { - rowBuild = rowBuild.addValue(ohdsiConceptMap.getOrDefault(input.getString("medtagger_concept_code"), 0)); + rowBuild = rowBuild.addValue(ohdsiConceptMap.getOrDefault(input.getString("concept_code"), 0)); } } Row out = rowBuild .addValue(0) - .addValue(input.getDateTime("medtagger_nlp_run_dtm")) + .addValue(input.getDateTime("nlp_run_dtm")) .addValue( String.format("certainty=%1$s,experiencer=%2$s,status=%3$s", - input.getString("medtagger_certainty"), - input.getString("medtagger_experiencer"), - input.getString("medtagger_status") + input.getString("certainty"), + input.getString("experiencer"), + input.getString("status") ) ) - .addValue(input.getInt32("medtagger_offset")) + .addValue(input.getInt32("offset")) .addValue(version.trim()) .build(); output.output(out);