From be085ecdbaf590fe4e67a2a55be48eac90630b9b Mon Sep 17 00:00:00 2001 From: Andrew Wen Date: Mon, 7 Oct 2024 14:31:08 -0500 Subject: [PATCH] Fix string split mechanism for section tagging --- pom.xml | 2 +- .../sectag/RulebasedSectionAnnotator.java | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index f5b3d27..a9462c9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.ohnlp.medtagger medtagger - 1.0.80 + 1.0.81 The MedTagger biomedical information extraction pipeline diff --git a/src/main/java/org/ohnlp/medtagger/sectag/RulebasedSectionAnnotator.java b/src/main/java/org/ohnlp/medtagger/sectag/RulebasedSectionAnnotator.java index d000f07..5e3534a 100644 --- a/src/main/java/org/ohnlp/medtagger/sectag/RulebasedSectionAnnotator.java +++ b/src/main/java/org/ohnlp/medtagger/sectag/RulebasedSectionAnnotator.java @@ -125,13 +125,18 @@ public void process(JCas jCas) throws AnalysisEngineProcessException { private Segment SecIndicator(Sentence sen, JCas jcas) { String str=sen.getCoveredText(); Segment cSeg=null; - int pos=str.indexOf(":"); - String secStr; - if(pos < 0 || pos >=100) - secStr=str; - else { - secStr=str.substring(0,pos); - } + int pos = -1; + int colonPos=str.indexOf(":"); + int senPos = pos= str.indexOf("\n"); + if (colonPos == -1) { + pos = senPos; + } else if (senPos == -1) { + pos = colonPos; + } else { + pos = Math.min(senPos, colonPos); + }; + if(pos < 0 || pos >=100) return null; + String secStr = str.substring(0, pos); if(sectionMap.containsKey(lvg.getNorm(secStr))){ sen.removeFromIndexes(jcas); String cSegment=sectionMap.get(lvg.getNorm(secStr));