diff --git a/pom.xml b/pom.xml index 0323d96..74e422a 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.ohnlp.medtagger medtagger - 1.0.35 + 1.0.36 The MedTagger biomedical information extraction pipeline diff --git a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java index 01a7130..2b1c9e3 100644 --- a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java +++ b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java @@ -40,6 +40,8 @@ import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; @@ -73,6 +75,8 @@ public PCollection expand(PCollection input) { } private static class MedTaggerPipelineFunction extends DoFn { + private transient static final ReentrantLock INIT_MUTEX_LOCK = new ReentrantLock(); + private final String resourceFolder; private final String textField; private final RunMode mode; @@ -92,77 +96,83 @@ public MedTaggerPipelineFunction(String textField, String resourceFolder, RunMod @Setup public void init() throws IOException, InvalidXMLException, URISyntaxException, ResourceInitializationException { - AggregateBuilder ae = new AggregateBuilder(); - // Tokenization, Sentence Splitting, Section Detection, etc. - ae.add(createEngineDescription("desc.backbone.aes.PreConceptExtractionAE")); - // Add the appropriate NER/normalization component depending on run mode - URI uri = null; - switch (mode) { - case OHNLPTK_DEFINED: // Ruleset from a web service - throw new UnsupportedOperationException("Remote Served IE Rulesets not yet implemented"); - case STANDALONE: - case STANDALONE_IE_ONLY: { - uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI(); - Map env = new HashMap<>(); - env.put("create", "true"); - try { - // Ensure it is created, ignore if not - FileSystem fs = FileSystems.newFileSystem(uri, env); - } catch (FileSystemAlreadyExistsException ignored) { - } - ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString())); - break; - } - case STANDALONE_DICT_ONLY: { - uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI(); - Map env = new HashMap<>(); - env.put("create", "true"); - try { - // Ensure it is created, ignore if not - FileSystem fs = FileSystems.newFileSystem(uri, env); - } catch (FileSystemAlreadyExistsException ignored) { + try { + INIT_MUTEX_LOCK.lock(); + AggregateBuilder ae = new AggregateBuilder(); + // Tokenization, Sentence Splitting, Section Detection, etc. + ae.add(createEngineDescription("desc.backbone.aes.PreConceptExtractionAE")); + // Add the appropriate NER/normalization component depending on run mode + URI uri = null; + switch (mode) { + case OHNLPTK_DEFINED: // Ruleset from a web service + throw new UnsupportedOperationException("Remote Served IE Rulesets not yet implemented"); + case STANDALONE: + case STANDALONE_IE_ONLY: { + uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI(); + Map env = new HashMap<>(); + env.put("create", "true"); + try { + // Ensure it is created, ignore if not + FileSystem fs = FileSystems.newFileSystem(uri, env); + } catch (FileSystemAlreadyExistsException ignored) { + } + ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString())); + break; } - ae.add(createEngineDescription(AhoCorasickLookupAnnotator.class, "dict_file", uri.toString())); - break; - } - case STANDALONE_DICT_AND_IE: { - String[] parsed = this.resourceFolder.split("\\|"); - uri = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[0]).toURI(); - URI dictURI = null; - if (parsed.length > 1) { - dictURI = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[1]).toURI(); + case STANDALONE_DICT_ONLY: { + uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI(); + Map env = new HashMap<>(); + env.put("create", "true"); + try { + // Ensure it is created, ignore if not + FileSystem fs = FileSystems.newFileSystem(uri, env); + } catch (FileSystemAlreadyExistsException ignored) { + } + ae.add(createEngineDescription(AhoCorasickLookupAnnotator.class, "dict_file", uri.toString())); + break; } - Map env = new HashMap<>(); - env.put("create", "true"); - try { - // Ensure it is created, ignore if not - FileSystem fs = FileSystems.newFileSystem(uri, env); - } catch (FileSystemAlreadyExistsException ignored) { + case STANDALONE_DICT_AND_IE: { + String[] parsed = this.resourceFolder.split("\\|"); + uri = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[0]).toURI(); + URI dictURI = null; + if (parsed.length > 1) { + dictURI = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[1]).toURI(); + } + Map env = new HashMap<>(); + env.put("create", "true"); + try { + // Ensure it is created, ignore if not + FileSystem fs = FileSystems.newFileSystem(uri, env); + } catch (FileSystemAlreadyExistsException ignored) { + } + ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString())); + if (dictURI != null) { + ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE", "dict_file", dictURI.toString())); + } else { + ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE")); + } + break; } - ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString())); - if (dictURI != null) { - ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE", "dict_file", dictURI.toString())); - } else { + case GENERAL_CLINICAL: ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE")); - } - break; + break; + } + + // Add Context handling + if (uri != null) { + ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class, "context_ruleset", uri.toString())); + } else { + ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class)); } - case GENERAL_CLINICAL: - ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE")); - break; - } - // Add Context handling - if (uri != null) { - ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class, "context_ruleset", uri.toString())); - } else { - ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class)); + this.resMgr = ResourceManagerFactory.newResourceManager(); + this.aae = UIMAFramework.produceAnalysisEngine(ae.createAggregateDescription(), resMgr, null); + this.cas = CasCreationUtils.createCas(Collections.singletonList(aae.getMetaData()), + null, resMgr); + } finally { + INIT_MUTEX_LOCK.unlock(); } - this.resMgr = ResourceManagerFactory.newResourceManager(); - this.aae = UIMAFramework.produceAnalysisEngine(ae.createAggregateDescription(), resMgr, null); - this.cas = CasCreationUtils.createCas(Collections.singletonList(aae.getMetaData()), - null, resMgr); } @ProcessElement