-
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f072abf
commit 846f2aa
Showing
13 changed files
with
465 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
use_model/** | ||
models/** | ||
use_package/** | ||
packages/** | ||
universal_sentence_encoder/models/** | ||
.DS_store | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,24 @@ | ||
set -e | ||
|
||
# select here which one to build | ||
|
||
# MODEL_NAME='en_use_md' | ||
# MODEL_NAME='en_use_lg' | ||
# MODEL_NAME='xx_use_md' | ||
MODEL_NAME='xx_use_lg' | ||
|
||
mkdir -p models/$MODEL_NAME | ||
# create the nlp and save to disk | ||
python create.py | ||
python create.py $MODEL_NAME | ||
# overwrite meta.json | ||
cp meta/meta.json use_model/meta.json | ||
cp meta/$MODEL_NAME.json models/$MODEL_NAME/meta.json | ||
|
||
# create the package | ||
mkdir -p use_package | ||
python -m spacy package use_model use_package --force | ||
pushd use_package/en_use-0.1.3 | ||
mkdir -p packages | ||
python -m spacy package models/$MODEL_NAME packages --force | ||
pushd packages/$MODEL_NAME-0.2.0 | ||
# zip it | ||
python setup.py sdist | ||
# install the tar.gz from dist/en_use-0.1.1.tar.gz | ||
pip install dist/en_use-0.1.3.tar.gz | ||
pip install dist/$MODEL_NAME-0.2.0.tar.gz | ||
popd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,16 @@ | ||
from universal_sentence_encoder import language | ||
import typer | ||
from universal_sentence_encoder import language, util | ||
|
||
nlp = language.UniversalSentenceEncoder.create_nlp() | ||
print(nlp.pipe_names) | ||
doc = nlp('Hello my friend') | ||
print(doc.vector) | ||
nlp.to_disk('use_model') | ||
def main(model_name): | ||
if model_name not in util.configs: | ||
raise ValueError(f'Model "{model_name}" not available') | ||
selected_config = util.configs[model_name] | ||
nlp = language.UniversalSentenceEncoder.create_nlp(selected_config['spacy_base_model'], selected_config['tfhub_model_url']) | ||
# nlp.vocab.reset_vectors(width=0) | ||
print(nlp.pipe_names) | ||
doc = nlp('Hello my friend') | ||
print(doc.vector.shape) | ||
nlp.to_disk(f'models/{model_name}') | ||
|
||
if __name__ == "__main__": | ||
typer.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
{ | ||
"lang": "en", | ||
"name": "use_lg", | ||
"version": "0.2.0", | ||
"spacy_version": ">=2.2.3", | ||
"description": "TensorFlow Hub wrapper for Universal Sentence Encoder", | ||
"author": "Martino Mensio", | ||
"email": "martino.mensio@open.ac.uk", | ||
"url": "https://github.com/MartinoMensio/spacy-universal-sentence-encoder-tfhub", | ||
"license": "MIT", | ||
"requirements": [ | ||
"universal_sentence_encoder @ git+ssh://git@github.com/MartinoMensio/spacy-universal-sentence-embedding-tfhub" | ||
], | ||
"sources": [{ | ||
"name": "Universal Sentence Encoder - Large", | ||
"url": "https://tfhub.dev/google/universal-sentence-encoder-large", | ||
"license": "Apache-2.0" | ||
}, { | ||
"name": "SpaCy English model", | ||
"url": "https://spacy.io/models/en#en_core_web_sm", | ||
"license": "MIT" | ||
}], | ||
"vectors": { | ||
"width": 512, | ||
"vectors": 0, | ||
"keys": 0, | ||
"name": null | ||
}, | ||
"pipeline": [ | ||
"tagger", | ||
"parser", | ||
"ner", | ||
"save_tfhub_model_url", | ||
"overwrite_vectors" | ||
], | ||
"factories": { | ||
"tagger": "tagger", | ||
"parser": "parser", | ||
"ner": "ner", | ||
"save_tfhub_model_url": "save_tfhub_model_url", | ||
"overwrite_vectors": "overwrite_vectors" | ||
}, | ||
"labels": { | ||
"tagger": [ | ||
"$", | ||
"''", | ||
",", | ||
"-LRB-", | ||
"-RRB-", | ||
".", | ||
":", | ||
"ADD", | ||
"AFX", | ||
"CC", | ||
"CD", | ||
"DT", | ||
"EX", | ||
"FW", | ||
"HYPH", | ||
"IN", | ||
"JJ", | ||
"JJR", | ||
"JJS", | ||
"LS", | ||
"MD", | ||
"NFP", | ||
"NN", | ||
"NNP", | ||
"NNPS", | ||
"NNS", | ||
"PDT", | ||
"POS", | ||
"PRP", | ||
"PRP$", | ||
"RB", | ||
"RBR", | ||
"RBS", | ||
"RP", | ||
"SYM", | ||
"TO", | ||
"UH", | ||
"VB", | ||
"VBD", | ||
"VBG", | ||
"VBN", | ||
"VBP", | ||
"VBZ", | ||
"WDT", | ||
"WP", | ||
"WP$", | ||
"WRB", | ||
"XX", | ||
"_SP", | ||
"``" | ||
], | ||
"parser": [ | ||
"ROOT", | ||
"acl", | ||
"acomp", | ||
"advcl", | ||
"advmod", | ||
"agent", | ||
"amod", | ||
"appos", | ||
"attr", | ||
"aux", | ||
"auxpass", | ||
"case", | ||
"cc", | ||
"ccomp", | ||
"compound", | ||
"conj", | ||
"csubj", | ||
"csubjpass", | ||
"dative", | ||
"dep", | ||
"det", | ||
"dobj", | ||
"expl", | ||
"intj", | ||
"mark", | ||
"meta", | ||
"neg", | ||
"nmod", | ||
"npadvmod", | ||
"nsubj", | ||
"nsubjpass", | ||
"nummod", | ||
"oprd", | ||
"parataxis", | ||
"pcomp", | ||
"pobj", | ||
"poss", | ||
"preconj", | ||
"predet", | ||
"prep", | ||
"prt", | ||
"punct", | ||
"quantmod", | ||
"relcl", | ||
"xcomp" | ||
], | ||
"ner": [ | ||
"CARDINAL", | ||
"DATE", | ||
"EVENT", | ||
"FAC", | ||
"GPE", | ||
"LANGUAGE", | ||
"LAW", | ||
"LOC", | ||
"MONEY", | ||
"NORP", | ||
"ORDINAL", | ||
"ORG", | ||
"PERCENT", | ||
"PERSON", | ||
"PRODUCT", | ||
"QUANTITY", | ||
"TIME", | ||
"WORK_OF_ART" | ||
] | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.