diff --git a/Readme.md b/Readme.md
index f1e2937..e4ad4a7 100644
--- a/Readme.md
+++ b/Readme.md
@@ -21,7 +21,7 @@ If you are looking for light-weight versions, VnCoreNLP's word segmentation and
## Installation
- `Java 1.8+` (Prerequisite)
-- File `VnCoreNLP-1.1.1.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
+- File `VnCoreNLP-1.2.jar` (27MB) and folder `models` (115MB) are placed in the same working folder.
- `Python 3.6+` if using [a Python wrapper of VnCoreNLP](https://github.com/thelinhbkhn2014/VnCoreNLP_Wrapper). To install this wrapper, users have to run the following command:
`$ pip3 install py_vncorenlp`
@@ -38,7 +38,7 @@ import py_vncorenlp
# and save them in some local working folder
py_vncorenlp.download_model(save_dir='/absolute/path/to/vncorenlp')
-# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.1.1.jar` and `models`
+# Load VnCoreNLP from the local working folder that contains both `VnCoreNLP-1.2.jar` and `models`
model = py_vncorenlp.VnCoreNLP(save_dir='/absolute/path/to/vncorenlp')
# Equivalent to: model = py_vncorenlp.VnCoreNLP(annotators=["wseg", "pos", "ner", "parse"], save_dir='/absolute/path/to/vncorenlp')
@@ -80,13 +80,13 @@ print(output)
You can run VnCoreNLP to annotate an input raw text corpus (e.g. a collection of news content) by using following commands:
// To perform word segmentation, POS tagging, NER and then dependency parsing
- $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt
+ $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt
// To perform word segmentation, POS tagging and then NER
- $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
+ $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos,ner
// To perform word segmentation and then POS tagging
- $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg,pos
+ $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg,pos
// To perform word segmentation
- $ java -Xmx2g -jar VnCoreNLP-1.1.1.jar -fin input.txt -fout output.txt -annotators wseg
+ $ java -Xmx2g -jar VnCoreNLP-1.2.jar -fin input.txt -fout output.txt -annotators wseg
### Using VnCoreNLP from the API
diff --git a/VnCoreNLP-1.2.jar b/VnCoreNLP-1.2.jar
new file mode 100644
index 0000000..c3255f2
Binary files /dev/null and b/VnCoreNLP-1.2.jar differ
diff --git a/pom.xml b/pom.xml
index aaaabbe..0bf5ed6 100755
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
VnCoreNLP
VnCoreNLP
- 1.1.1
+ 1.2
diff --git a/src/main/java/vn/corenlp/ner/NerRecognizer.java b/src/main/java/vn/corenlp/ner/NerRecognizer.java
index b32c892..ab74dfa 100644
--- a/src/main/java/vn/corenlp/ner/NerRecognizer.java
+++ b/src/main/java/vn/corenlp/ner/NerRecognizer.java
@@ -11,6 +11,7 @@
import vn.corenlp.wordsegmenter.Vocabulary;
import vn.pipeline.LexicalInitializer;
import vn.pipeline.Word;
+import vn.pipeline.Utils;
import java.io.File;
import java.io.IOException;
@@ -34,7 +35,7 @@ public NerRecognizer() throws IOException{
nlpDecoder = new NLPDecoder();
List> components = new ArrayList();
- String modelPath = System.getProperty("user.dir") + "/models/ner/vi-ner.xz";
+ String modelPath = Utils.jarDir + "/models/ner/vi-ner.xz";
if (!new File(modelPath).exists()) throw new IOException("NerRecognizer: " + modelPath + " is not found!");
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
if(lexica != null) {
diff --git a/src/main/java/vn/corenlp/parser/DependencyParser.java b/src/main/java/vn/corenlp/parser/DependencyParser.java
index b92b38d..488a42a 100755
--- a/src/main/java/vn/corenlp/parser/DependencyParser.java
+++ b/src/main/java/vn/corenlp/parser/DependencyParser.java
@@ -10,6 +10,7 @@
import org.apache.log4j.Logger;
import vn.pipeline.LexicalInitializer;
import vn.pipeline.Word;
+import vn.pipeline.Utils;
import java.io.File;
import java.io.IOException;
@@ -32,7 +33,7 @@ public DependencyParser() throws IOException {
nlpDecoder = new NLPDecoder();
List> components = new ArrayList();
- String modelPath = System.getProperty("user.dir") + "/models/dep/vi-dep.xz";
+ String modelPath = Utils.jarDir + "/models/dep/vi-dep.xz";
if (!new File(modelPath).exists()) throw new IOException("DependencyParser: " + modelPath + " is not found!");
GlobalLexica lexica = LexicalInitializer.initialize(true).initializeLexica();
if(lexica != null) {
diff --git a/src/main/java/vn/corenlp/postagger/PosTagger.java b/src/main/java/vn/corenlp/postagger/PosTagger.java
index f0cafa6..231d3f2 100644
--- a/src/main/java/vn/corenlp/postagger/PosTagger.java
+++ b/src/main/java/vn/corenlp/postagger/PosTagger.java
@@ -13,13 +13,15 @@
import java.util.LinkedList;
import java.util.List;
+import vn.pipeline.Utils;
+
public class PosTagger {
private static PosTagger posTagger = null;
private MorphTagger tagger;
public final static Logger LOGGER = Logger.getLogger(PosTagger.class);
public PosTagger() throws IOException {
LOGGER.info("Loading POS Tagging model");
- String modelPath = System.getProperty("user.dir") + "/models/postagger/vi-tagger";
+ String modelPath = Utils.jarDir + "/models/postagger/vi-tagger";
if (!new File(modelPath).exists()) throw new IOException("PosTagger: " + modelPath + " is not found!");
tagger = FileUtils.loadFromFile(modelPath);
diff --git a/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java b/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
index bfdead0..aca365c 100755
--- a/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
+++ b/src/main/java/vn/corenlp/wordsegmenter/Vocabulary.java
@@ -7,13 +7,15 @@
import java.util.HashSet;
import java.util.Set;
+import vn.pipeline.Utils;
+
@SuppressWarnings("unchecked")
public class Vocabulary {
public static Set VN_DICT;
static {
VN_DICT = new HashSet();
try {
- String vocabPath = System.getProperty("user.dir") + "/models/wordsegmenter/vi-vocab";
+ String vocabPath = Utils.jarDir + "/models/wordsegmenter/vi-vocab";
if (!new File(vocabPath).exists())
throw new IOException("Vocabulary: " + vocabPath + " is not found!");
//Vocabulary.class.getClassLoader().getResource("wordsegmenter/vi-vocab").getPath()
diff --git a/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java b/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
index 6e9bd10..379c86b 100644
--- a/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
+++ b/src/main/java/vn/corenlp/wordsegmenter/WordSegmenter.java
@@ -22,7 +22,7 @@ public class WordSegmenter {
public WordSegmenter()
throws IOException {
LOGGER.info("Loading Word Segmentation model");
- String modelPath = System.getProperty("user.dir") + "/models/wordsegmenter/wordsegmenter.rdr";
+ String modelPath = vn.pipeline.Utils.jarDir + "/models/wordsegmenter/wordsegmenter.rdr";
if (!new File(modelPath).exists())
throw new IOException("WordSegmenter: " + modelPath + " is not found!");
diff --git a/src/main/java/vn/pipeline/LexicalInitializer.java b/src/main/java/vn/pipeline/LexicalInitializer.java
index 2574196..0f8f753 100644
--- a/src/main/java/vn/pipeline/LexicalInitializer.java
+++ b/src/main/java/vn/pipeline/LexicalInitializer.java
@@ -27,12 +27,12 @@ public LexicalInitializer(boolean initLexica) throws IOException {
this.initLexica = initLexica;
this.lexicalMap = new HashMap<>();
- String lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-500brownclusters.xz";
+ String lexicalPath = Utils.jarDir + "/models/ner/vi-500brownclusters.xz";
if (!new File(lexicalPath).exists())
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
lexicalMap.put("word_clusters", lexicalPath);
- lexicalPath = System.getProperty("user.dir") + "/models/ner/vi-pretrainedembeddings.xz";
+ lexicalPath = Utils.jarDir + "/models/ner/vi-pretrainedembeddings.xz";
if (!new File(lexicalPath).exists())
throw new IOException("LexicalInitializer: " + lexicalPath + " is not found!");
lexicalMap.put("word_embeddings", lexicalPath);
diff --git a/src/main/java/vn/pipeline/Utils.java b/src/main/java/vn/pipeline/Utils.java
index 641bb11..c4c8f04 100644
--- a/src/main/java/vn/pipeline/Utils.java
+++ b/src/main/java/vn/pipeline/Utils.java
@@ -6,10 +6,14 @@
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
+import java.io.File;
import java.io.IOException;
import java.util.List;
public class Utils {
+ private static File jarFile = new File(VnCoreNLP.class.getProtectionDomain().getCodeSource().getLocation().getPath());
+ public static String jarDir = jarFile.getParentFile().getPath();
+
private static LanguageDetector languageDetector = null;
public static String detectLanguage(String text) throws IOException{
if(languageDetector == null) {
diff --git a/src/main/java/vn/pipeline/VnCoreNLP.java b/src/main/java/vn/pipeline/VnCoreNLP.java
index fc6fc0f..c965d58 100644
--- a/src/main/java/vn/pipeline/VnCoreNLP.java
+++ b/src/main/java/vn/pipeline/VnCoreNLP.java
@@ -14,11 +14,10 @@
public class VnCoreNLP {
- private PosTagger posTagger;
-
public final static Logger LOGGER = Logger.getLogger(Annotation.class);
private WordSegmenter wordSegmenter;
+ private PosTagger posTagger;
private NerRecognizer nerRecognizer;
private DependencyParser dependencyParser;