diff --git a/build_use.sh b/build_use.sh index baaad71..e9e0d3b 100644 --- a/build_use.sh +++ b/build_use.sh @@ -8,9 +8,9 @@ cp meta/meta.json use_model/meta.json # create the package mkdir -p use_package python -m spacy package use_model use_package --force -pushd use_package/en_use-0.1.1 +pushd use_package/en_use-0.1.2 # zip it python setup.py sdist # install the tar.gz from dist/en_use-0.1.1.tar.gz -pip install dist/en_use-0.1.1.tar.gz +pip install dist/en_use-0.1.2.tar.gz popd \ No newline at end of file diff --git a/meta/meta.json b/meta/meta.json index 5852e65..2d10ba0 100644 --- a/meta/meta.json +++ b/meta/meta.json @@ -1,7 +1,7 @@ { "lang": "en", "name": "use", - "version": "0.1.1", + "version": "0.1.2", "spacy_version": ">=2.2.3", "description": "Using TFHub USE", "author": "Martino Mensio", @@ -18,7 +18,9 @@ "name": null }, "pipeline": [ - "sentencizer", + "tagger", + "parser", + "ner", "overwrite_vectors" ], "pipeline_args": { @@ -27,8 +29,130 @@ } }, "factories": { - "sentencizer": "sentencizer", + "tagger": "tagger", + "parser": "parser", + "ner": "ner", "overwrite_vectors": "overwrite_vectors" }, - "labels": {} + "labels": { + "tagger": [ + "$", + "''", + ",", + "-LRB-", + "-RRB-", + ".", + ":", + "ADD", + "AFX", + "CC", + "CD", + "DT", + "EX", + "FW", + "HYPH", + "IN", + "JJ", + "JJR", + "JJS", + "LS", + "MD", + "NFP", + "NN", + "NNP", + "NNPS", + "NNS", + "PDT", + "POS", + "PRP", + "PRP$", + "RB", + "RBR", + "RBS", + "RP", + "SYM", + "TO", + "UH", + "VB", + "VBD", + "VBG", + "VBN", + "VBP", + "VBZ", + "WDT", + "WP", + "WP$", + "WRB", + "XX", + "_SP", + "``" + ], + "parser": [ + "ROOT", + "acl", + "acomp", + "advcl", + "advmod", + "agent", + "amod", + "appos", + "attr", + "aux", + "auxpass", + "case", + "cc", + "ccomp", + "compound", + "conj", + "csubj", + "csubjpass", + "dative", + "dep", + "det", + "dobj", + "expl", + "intj", + "mark", + "meta", + "neg", + "nmod", + "npadvmod", + "nsubj", + "nsubjpass", + "nummod", + "oprd", + "parataxis", + "pcomp", + "pobj", + "poss", + "preconj", + "predet", + "prep", + "prt", + "punct", + "quantmod", + "relcl", + "xcomp" + ], + "ner": [ + "CARDINAL", + "DATE", + "EVENT", + "FAC", + "GPE", + "LANGUAGE", + "LAW", + "LOC", + "MONEY", + "NORP", + "ORDINAL", + "ORG", + "PERCENT", + "PERSON", + "PRODUCT", + "QUANTITY", + "TIME", + "WORK_OF_ART" + ] + } } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index bd5d004..6ac5da7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -e . +https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz pylint autopep8 \ No newline at end of file diff --git a/universal_sentence_encoder/language.py b/universal_sentence_encoder/language.py index e9fb5ff..89fc829 100644 --- a/universal_sentence_encoder/language.py +++ b/universal_sentence_encoder/language.py @@ -37,8 +37,9 @@ def overwrite_vectors(doc): @staticmethod def create_nlp(language_base='en'): - nlp = spacy.blank(language_base) - nlp.add_pipe(nlp.create_pipe('sentencizer')) + # nlp = spacy.blank(language_base) + # nlp.add_pipe(nlp.create_pipe('sentencizer')) + nlp = spacy.load(f'{language_base}_core_web_sm') nlp.add_pipe(UniversalSentenceEncoder.overwrite_vectors) return nlp