diff --git a/pom.xml b/pom.xml index f62b56c..0372c03 100755 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.zoho hawking - 0.1.6 + 0.1.7 jar Hawking Hawking is a natural language date time parser that extracts date and time from text with context and parse to the required format. @@ -54,7 +54,7 @@ edu.stanford.nlp stanford-corenlp - 4.2.0 + 4.5.1 javax.servlet diff --git a/src/main/java/com/zoho/hawking/language/english/Parser.java b/src/main/java/com/zoho/hawking/language/english/Parser.java index 3b3b3bd..33d2ef6 100755 --- a/src/main/java/com/zoho/hawking/language/english/Parser.java +++ b/src/main/java/com/zoho/hawking/language/english/Parser.java @@ -1,19 +1,15 @@ //$Id$ package com.zoho.hawking.language.english; -import com.zoho.hawking.utils.CommonUtils; import com.zoho.hawking.utils.Constants; +import edu.stanford.nlp.ie.AbstractSequenceClassifier; import edu.stanford.nlp.ie.crf.CRFClassifier; import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.ling.CoreLabel; -import edu.stanford.nlp.sequences.SeqClassifierFlags; import edu.stanford.nlp.util.Triple; - -import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.List; -import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; @@ -21,25 +17,17 @@ public class Parser { private static final Logger LOGGER = Logger.getLogger(Parser.class.getName()); - static CRFClassifier crf = getCRFInstance(); - - private static CRFClassifier getCRFInstance() { - Properties props = new Properties(); - try { - props.load(CommonUtils.readIsFromClasspath(Constants.PARSERPROPSPATH)); - SeqClassifierFlags flags = new SeqClassifierFlags(props); - CRFClassifier crf = new CRFClassifier(flags); - InputStream parserModel = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(Constants.PARSERMODELPATH); - LOGGER.info("Loading Parser Model"); //No I18N - crf.loadClassifier(parserModel); - LOGGER.info("Parser Model Loaded"); //No I18N - return crf; - } catch (ClassCastException | ClassNotFoundException | IOException e) { - LOGGER.log(Level.SEVERE, "Parser :: Exception in parser class", e.getMessage()); - return null; - } - } + static AbstractSequenceClassifier crf = getCRFInstance(); + private static AbstractSequenceClassifier getCRFInstance() { + try { + InputStream parserModel = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(Constants.PARSERMODELPATH); + return CRFClassifier.getClassifier(parserModel); + } catch (ClassCastException | ClassNotFoundException | IOException e) { + LOGGER.log(Level.SEVERE, "Parser :: Exception in parser class", e.getMessage()); + return null; + } + } public static List> parse(String input) { input = input.replaceAll("http","----"); //No I18N input = input.replaceAll("www","---"); //No I18N diff --git a/src/main/resources/parser/parser.config.props b/src/main/resources/parser/parser.config.props index 79e38a5..8ff5e08 100755 --- a/src/main/resources/parser/parser.config.props +++ b/src/main/resources/parser/parser.config.props @@ -132,7 +132,7 @@ conjoinShapeNGrams = true useNeighborNGrams = true # If true, record the NGram features that correspond to a String (under the current option settings) and reuse rather than recalculating if the String is seen again. -cacheNGrams = true +cacheNGrams = false # Do not include character n-gram features for n-grams that contain neither the beginning or end of the word noMidNGrams = true diff --git a/src/main/resources/parser/parser.crf.ser.gz b/src/main/resources/parser/parser.crf.ser.gz index e2308a8..26b25c3 100644 Binary files a/src/main/resources/parser/parser.crf.ser.gz and b/src/main/resources/parser/parser.crf.ser.gz differ