Skip to content

Commit

Permalink
added tagger, parser, ner from en_core_web_sm
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinoMensio committed Feb 10, 2020
1 parent a918105 commit f979403
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 8 deletions.
4 changes: 2 additions & 2 deletions build_use.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ cp meta/meta.json use_model/meta.json
# create the package
mkdir -p use_package
python -m spacy package use_model use_package --force
pushd use_package/en_use-0.1.1
pushd use_package/en_use-0.1.2
# zip it
python setup.py sdist
# install the tar.gz from dist/en_use-0.1.1.tar.gz
pip install dist/en_use-0.1.1.tar.gz
pip install dist/en_use-0.1.2.tar.gz
popd
132 changes: 128 additions & 4 deletions meta/meta.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"lang": "en",
"name": "use",
"version": "0.1.1",
"version": "0.1.2",
"spacy_version": ">=2.2.3",
"description": "Using TFHub USE",
"author": "Martino Mensio",
Expand All @@ -18,7 +18,9 @@
"name": null
},
"pipeline": [
"sentencizer",
"tagger",
"parser",
"ner",
"overwrite_vectors"
],
"pipeline_args": {
Expand All @@ -27,8 +29,130 @@
}
},
"factories": {
"sentencizer": "sentencizer",
"tagger": "tagger",
"parser": "parser",
"ner": "ner",
"overwrite_vectors": "overwrite_vectors"
},
"labels": {}
"labels": {
"tagger": [
"$",
"''",
",",
"-LRB-",
"-RRB-",
".",
":",
"ADD",
"AFX",
"CC",
"CD",
"DT",
"EX",
"FW",
"HYPH",
"IN",
"JJ",
"JJR",
"JJS",
"LS",
"MD",
"NFP",
"NN",
"NNP",
"NNPS",
"NNS",
"PDT",
"POS",
"PRP",
"PRP$",
"RB",
"RBR",
"RBS",
"RP",
"SYM",
"TO",
"UH",
"VB",
"VBD",
"VBG",
"VBN",
"VBP",
"VBZ",
"WDT",
"WP",
"WP$",
"WRB",
"XX",
"_SP",
"``"
],
"parser": [
"ROOT",
"acl",
"acomp",
"advcl",
"advmod",
"agent",
"amod",
"appos",
"attr",
"aux",
"auxpass",
"case",
"cc",
"ccomp",
"compound",
"conj",
"csubj",
"csubjpass",
"dative",
"dep",
"det",
"dobj",
"expl",
"intj",
"mark",
"meta",
"neg",
"nmod",
"npadvmod",
"nsubj",
"nsubjpass",
"nummod",
"oprd",
"parataxis",
"pcomp",
"pobj",
"poss",
"preconj",
"predet",
"prep",
"prt",
"punct",
"quantmod",
"relcl",
"xcomp"
],
"ner": [
"CARDINAL",
"DATE",
"EVENT",
"FAC",
"GPE",
"LANGUAGE",
"LAW",
"LOC",
"MONEY",
"NORP",
"ORDINAL",
"ORG",
"PERCENT",
"PERSON",
"PRODUCT",
"QUANTITY",
"TIME",
"WORK_OF_ART"
]
}
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
-e .
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

pylint
autopep8
5 changes: 3 additions & 2 deletions universal_sentence_encoder/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ def overwrite_vectors(doc):

@staticmethod
def create_nlp(language_base='en'):
nlp = spacy.blank(language_base)
nlp.add_pipe(nlp.create_pipe('sentencizer'))
# nlp = spacy.blank(language_base)
# nlp.add_pipe(nlp.create_pipe('sentencizer'))
nlp = spacy.load(f'{language_base}_core_web_sm')
nlp.add_pipe(UniversalSentenceEncoder.overwrite_vectors)
return nlp

Expand Down

0 comments on commit f979403

Please sign in to comment.