diff --git a/pom.xml b/pom.xml
index 0323d96..74e422a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
org.ohnlp.medtagger
medtagger
- 1.0.35
+ 1.0.36
The MedTagger biomedical information extraction pipeline
diff --git a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java
index 01a7130..2b1c9e3 100644
--- a/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java
+++ b/src/main/java/org/ohnlp/medtagger/backbone/MedTaggerBackboneTransform.java
@@ -40,6 +40,8 @@
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
@@ -73,6 +75,8 @@ public PCollection expand(PCollection input) {
}
private static class MedTaggerPipelineFunction extends DoFn {
+ private transient static final ReentrantLock INIT_MUTEX_LOCK = new ReentrantLock();
+
private final String resourceFolder;
private final String textField;
private final RunMode mode;
@@ -92,77 +96,83 @@ public MedTaggerPipelineFunction(String textField, String resourceFolder, RunMod
@Setup
public void init() throws IOException, InvalidXMLException, URISyntaxException, ResourceInitializationException {
- AggregateBuilder ae = new AggregateBuilder();
- // Tokenization, Sentence Splitting, Section Detection, etc.
- ae.add(createEngineDescription("desc.backbone.aes.PreConceptExtractionAE"));
- // Add the appropriate NER/normalization component depending on run mode
- URI uri = null;
- switch (mode) {
- case OHNLPTK_DEFINED: // Ruleset from a web service
- throw new UnsupportedOperationException("Remote Served IE Rulesets not yet implemented");
- case STANDALONE:
- case STANDALONE_IE_ONLY: {
- uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI();
- Map env = new HashMap<>();
- env.put("create", "true");
- try {
- // Ensure it is created, ignore if not
- FileSystem fs = FileSystems.newFileSystem(uri, env);
- } catch (FileSystemAlreadyExistsException ignored) {
- }
- ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString()));
- break;
- }
- case STANDALONE_DICT_ONLY: {
- uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI();
- Map env = new HashMap<>();
- env.put("create", "true");
- try {
- // Ensure it is created, ignore if not
- FileSystem fs = FileSystems.newFileSystem(uri, env);
- } catch (FileSystemAlreadyExistsException ignored) {
+ try {
+ INIT_MUTEX_LOCK.lock();
+ AggregateBuilder ae = new AggregateBuilder();
+ // Tokenization, Sentence Splitting, Section Detection, etc.
+ ae.add(createEngineDescription("desc.backbone.aes.PreConceptExtractionAE"));
+ // Add the appropriate NER/normalization component depending on run mode
+ URI uri = null;
+ switch (mode) {
+ case OHNLPTK_DEFINED: // Ruleset from a web service
+ throw new UnsupportedOperationException("Remote Served IE Rulesets not yet implemented");
+ case STANDALONE:
+ case STANDALONE_IE_ONLY: {
+ uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI();
+ Map env = new HashMap<>();
+ env.put("create", "true");
+ try {
+ // Ensure it is created, ignore if not
+ FileSystem fs = FileSystems.newFileSystem(uri, env);
+ } catch (FileSystemAlreadyExistsException ignored) {
+ }
+ ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString()));
+ break;
}
- ae.add(createEngineDescription(AhoCorasickLookupAnnotator.class, "dict_file", uri.toString()));
- break;
- }
- case STANDALONE_DICT_AND_IE: {
- String[] parsed = this.resourceFolder.split("\\|");
- uri = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[0]).toURI();
- URI dictURI = null;
- if (parsed.length > 1) {
- dictURI = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[1]).toURI();
+ case STANDALONE_DICT_ONLY: {
+ uri = MedTaggerPipelineFunction.class.getResource("/resources/" + this.resourceFolder).toURI();
+ Map env = new HashMap<>();
+ env.put("create", "true");
+ try {
+ // Ensure it is created, ignore if not
+ FileSystem fs = FileSystems.newFileSystem(uri, env);
+ } catch (FileSystemAlreadyExistsException ignored) {
+ }
+ ae.add(createEngineDescription(AhoCorasickLookupAnnotator.class, "dict_file", uri.toString()));
+ break;
}
- Map env = new HashMap<>();
- env.put("create", "true");
- try {
- // Ensure it is created, ignore if not
- FileSystem fs = FileSystems.newFileSystem(uri, env);
- } catch (FileSystemAlreadyExistsException ignored) {
+ case STANDALONE_DICT_AND_IE: {
+ String[] parsed = this.resourceFolder.split("\\|");
+ uri = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[0]).toURI();
+ URI dictURI = null;
+ if (parsed.length > 1) {
+ dictURI = MedTaggerPipelineFunction.class.getResource("/resources/" + parsed[1]).toURI();
+ }
+ Map env = new HashMap<>();
+ env.put("create", "true");
+ try {
+ // Ensure it is created, ignore if not
+ FileSystem fs = FileSystems.newFileSystem(uri, env);
+ } catch (FileSystemAlreadyExistsException ignored) {
+ }
+ ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString()));
+ if (dictURI != null) {
+ ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE", "dict_file", dictURI.toString()));
+ } else {
+ ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE"));
+ }
+ break;
}
- ae.add(createEngineDescription("org.ohnlp.medtagger.ie.aes.MedTaggerIEAnnotatorAE", "Resource_dir", uri.toString()));
- if (dictURI != null) {
- ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE", "dict_file", dictURI.toString()));
- } else {
+ case GENERAL_CLINICAL:
ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE"));
- }
- break;
+ break;
+ }
+
+ // Add Context handling
+ if (uri != null) {
+ ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class, "context_ruleset", uri.toString()));
+ } else {
+ ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class));
}
- case GENERAL_CLINICAL:
- ae.add(createEngineDescription("desc.backbone.aes.MedTaggerDictionaryLookupAE"));
- break;
- }
- // Add Context handling
- if (uri != null) {
- ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class, "context_ruleset", uri.toString()));
- } else {
- ae.add(AnalysisEngineFactory.createEngineDescription(RuleContextAnnotator.class));
+ this.resMgr = ResourceManagerFactory.newResourceManager();
+ this.aae = UIMAFramework.produceAnalysisEngine(ae.createAggregateDescription(), resMgr, null);
+ this.cas = CasCreationUtils.createCas(Collections.singletonList(aae.getMetaData()),
+ null, resMgr);
+ } finally {
+ INIT_MUTEX_LOCK.unlock();
}
- this.resMgr = ResourceManagerFactory.newResourceManager();
- this.aae = UIMAFramework.produceAnalysisEngine(ae.createAggregateDescription(), resMgr, null);
- this.cas = CasCreationUtils.createCas(Collections.singletonList(aae.getMetaData()),
- null, resMgr);
}
@ProcessElement