From 77bc77a36e174f88a779f9269b8d98496d1c1a1f Mon Sep 17 00:00:00 2001 From: koernerfelicia Date: Thu, 21 Jan 2021 17:38:17 +0100 Subject: [PATCH 1/2] Add prepare-transformers target to Makefile, and add list of s3 urls and cache filenames for HF transformers models --- Makefile | 9 +++++++- data/test/hf_transformers_models.txt | 34 ++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 data/test/hf_transformers_models.txt diff --git a/Makefile b/Makefile index ee3cda52f057..3f949257ff7c 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,8 @@ help: @echo " Download all additional resources needed to use spacy as part of Rasa." @echo " prepare-mitie" @echo " Download all additional resources needed to use mitie as part of Rasa." + @echo " prepare-transformers:" + @echo " Download all models needed for testing LanguageModelFeaturizer." @echo " test" @echo " Run pytest on tests/." @echo " Use the JOBS environment variable to configure number of workers (default: 1)." @@ -134,7 +136,12 @@ else endif rm data/MITIE*.bz2 -prepare-tests-files: prepare-spacy prepare-mitie +prepare-transformers: + CACHE_DIR=$(HOME)/.cache/torch/transformers;\ + mkdir -p "$$CACHE_DIR";\ + while read URL; do read -r CACHE_FILE; wget $$URL -O $$CACHE_DIR/$$CACHE_FILE; done < "data/hf_transformers_models.txt" + +prepare-tests-files: prepare-spacy prepare-mitie prepare-transformers prepare-wget-macos: brew install wget || true diff --git a/data/test/hf_transformers_models.txt b/data/test/hf_transformers_models.txt new file mode 100644 index 000000000000..b3eb395c1a6b --- /dev/null +++ b/data/test/hf_transformers_models.txt @@ -0,0 +1,34 @@ +https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/vocab.txt +21aa38329c730774d9f45df9ec5443a9bd4abd2191e1d510c27647c151c5437f.f2539f82b1008971c6ea6574f078d95c6eead57223fc74fdc420013fa9de391a +https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/special_tokens_map.json +99497d78492c90ab7d824d695b9a8d043369fbc2bf1112dcc7cdef9a6c4fa691.275045728fbf41c11d3dae08b8742c054377e18d92cc7b72b6351152a99b64e4 +https://s3.amazonaws.com/models.huggingface.co/bert/rasa/LaBSE/tokenizer_config.json +527f618330e845c9d31826e7d9ce983aa816fafcf4f29f8c52f8ae1fdd097219.1c61d5d3dc67d88e0c74c64cda9b17bc30bdbd1c373cceeb740b9953729709aa +https://cdn.huggingface.co/rasa/LaBSE/tf_model.h5 +fd2ff7409cd4abbce31d54b8acebc305939787751dd697b6f38a3bf1f197a614.2589e15ea34b96d9bdcc478748ae77b629487da363566089fe6a8cdb1e6ea284.h5 +https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json +4ab93d0cd78ae80e746c27c9cd34e90b470abdabe0590c9ec742df61625ba310.b9628f6fe5519626534b82ce7ec72b22ce0ae79550325f45c604a25c0ad87fd6 +https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-merges.txt +0f8de0dbd6a2bb6bde7d758f4c120dd6dd20b46f2bf0a47bc899c89f46532fde.20808570f9a3169212a577f819c845330da870aeb14c40f7319819fce10c3b76 +https://cdn.huggingface.co/openai-gpt-tf_model.h5 +642cba239b8eca22b702e71e92d507b8af47ddd2df74dc7751e2a4f65d8d434c.f26918df904593cca2dbd78a3bb760376f4f8ce1ce8d3b13bb6ab28228ee65fc.h5 +https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-vocab.json +f2808208f9bec2320371a9f5f891c184ae0b674ef866b79c58177067d15732dd.1512018be4ba4e8726e41b9145129dc30651ea4fec86aa61f4b9f40bf94eac71 +https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-merges.txt +d629f792e430b3c76a1291bb2766b0a047e36fae0588f9dbc1ae51decdff691b.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda +https://cdn.huggingface.co/gpt2-tf_model.h5 +132dec44f9ced4b20f1b1c88a426b1d3dab5ba9e5f24a82541833dae44d5b8db.afd2261c07481427cd087f622388c2c086be9c62875f5945922c7adb2239b63a.h5 +https://s3.amazonaws.com/models.huggingface.co/bert/xlnet-base-cased-spiece.model +dad589d582573df0293448af5109cb6981ca77239ed314e15ca63b7b8a318ddd.8b10bd978b5d01c21303cc761fc9ecd464419b3bf921864a355ba807cfbfafa8 +https://cdn.huggingface.co/xlnet-base-cased-tf_model.h5 +44ada4a49497a676c2d1fa2dbb7059df50f9cedb14f332862d2fca2c35d04a7d.42bc04b3944abffc38e9b60aadffb89f81aafa9c86473d157ed4b28953471ceb.h5 +https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt +26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084 +https://cdn.huggingface.co/distilbert-base-uncased-tf_model.h5 +cce28882467f298a29fc905b9dd1683695d96198a83432fe707089dccd71c019.e02bd57e9d8507853eccc7c04ac2e938a6cdaff4b9bf941c10e781b61ddb9bbd.h5 +https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-vocab.json +d0c5776499adc1ded22493fae699da0971c1ee4c2587111707a4d177d20257a2.ef00af9e673c7160b4d41cfda1f48c5f4cba57d5142754525572a846a1ab1b9b +https://s3.amazonaws.com/models.huggingface.co/bert/roberta-base-merges.txt +b35e7cd126cd4229a746b5d5c29a749e8e84438b14bcdb575950584fe33207e8.70bec105b4158ed9a1747fea67a43f5dee97855c64d62b6ec3742f4cfdb5feda +https://cdn.huggingface.co/roberta-base-tf_model.h5 +2e18f106492efe1a8e6766e4d4e0bf4a82cee267c0cc52af431cf97005d4e3db.34733ed140f011f207fb07b32b443050356e99a9638db284a22d77bd3d5f54b3.h5 From 36808db74d0f12bf05b7778c1b5a02bf1b1efcef Mon Sep 17 00:00:00 2001 From: koernerfelicia Date: Thu, 21 Jan 2021 17:52:38 +0100 Subject: [PATCH 2/2] Fix typo in prepare-tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3f949257ff7c..7259151d56cd 100644 --- a/Makefile +++ b/Makefile @@ -139,7 +139,7 @@ endif prepare-transformers: CACHE_DIR=$(HOME)/.cache/torch/transformers;\ mkdir -p "$$CACHE_DIR";\ - while read URL; do read -r CACHE_FILE; wget $$URL -O $$CACHE_DIR/$$CACHE_FILE; done < "data/hf_transformers_models.txt" + while read URL; do read -r CACHE_FILE; wget $$URL -O $$CACHE_DIR/$$CACHE_FILE; done < "data/test/hf_transformers_models.txt" prepare-tests-files: prepare-spacy prepare-mitie prepare-transformers