Add pretrained models and sniff tests for allennlp_semparse. (#4)

- For allenai/allennlp#3351. - Conveniently allenai/allennlp#3361 broke `allennlp_semparse` a while back, so the (AllenNLP Hub Master Build)[http://build.allennlp.org/viewType.html?buildTypeId=AllenNLPHub_Master] should break when this PR merged. - We should then fix `allennlp-semparse` and verify that the build goes green.
allenai · Nov 15, 2019 · 2b28b80 · 2b28b80
1 parent dbf122b
commit 2b28b80
Show file tree

Hide file tree

Showing 9 changed files with 332 additions and 180 deletions.
diff --git a/allennlp_hub/__init__.py b/allennlp_hub/__init__.py
diff --git a/allennlp_hub/pretrained.py b/allennlp_hub/pretrained.py
diff --git a/allennlp_hub/pretrained/__init__.py b/allennlp_hub/pretrained/__init__.py
@@ -0,0 +1,2 @@
+from allennlp_hub.pretrained.allennlp_pretrained import *
+from allennlp_hub.pretrained.allennlp_semparse_pretrained import *
diff --git a/allennlp_hub/pretrained/allennlp_pretrained.py b/allennlp_hub/pretrained/allennlp_pretrained.py
@@ -0,0 +1,122 @@
+from allennlp import predictors
+from allennlp_hub.pretrained.helpers import _load_predictor
+import allennlp.models
+
+
+# Models in the main repo
+
+
+def srl_with_elmo_luheng_2018() -> predictors.SemanticRoleLabelerPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/srl-model-2018.05.25.tar.gz",
+        "semantic-role-labeling",
+    )
+    return predictor
+
+
+def bert_srl_shi_2019() -> predictors.SemanticRoleLabelerPredictor:
+    predictor = _load_predictor(
+        "https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz",
+        "semantic-role-labeling",
+    )
+    return predictor
+
+
+def bidirectional_attention_flow_seo_2017() -> predictors.BidafPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/bidaf-model-2017.09.15-charpad.tar.gz",
+        "machine-comprehension",
+    )
+    return predictor
+
+
+def naqanet_dua_2019() -> predictors.BidafPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/naqanet-2019.04.29-fixed-weight-names.tar.gz",
+        "machine-comprehension",
+    )
+    return predictor
+
+
+def open_information_extraction_stanovsky_2018() -> predictors.OpenIePredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/openie-model.2018-08-20.tar.gz",
+        "open-information-extraction",
+    )
+    return predictor
+
+
+def decomposable_attention_with_elmo_parikh_2017() -> predictors.DecomposableAttentionPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/decomposable-attention-elmo-2018.02.19.tar.gz",
+        "textual-entailment",
+    )
+    return predictor
+
+
+def neural_coreference_resolution_lee_2017() -> predictors.CorefPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/coref-model-2018.02.05.tar.gz",
+        "coreference-resolution",
+    )
+
+    predictor._dataset_reader._token_indexers[
+        "token_characters"
+    ]._min_padding_length = 5
+    return predictor
+
+
+def named_entity_recognition_with_elmo_peters_2018() -> predictors.SentenceTaggerPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/ner-model-2018.12.18.tar.gz",
+        "sentence-tagger",
+    )
+
+    predictor._dataset_reader._token_indexers[
+        "token_characters"
+    ]._min_padding_length = 3
+    return predictor
+
+
+def fine_grained_named_entity_recognition_with_elmo_peters_2018() -> predictors.SentenceTaggerPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/fine-grained-ner-model-elmo-2018.12.21.tar.gz",
+        "sentence-tagger",
+    )
+
+    predictor._dataset_reader._token_indexers[
+        "token_characters"
+    ]._min_padding_length = 3
+    return predictor
+
+
+def span_based_constituency_parsing_with_elmo_joshi_2018() -> predictors.ConstituencyParserPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/elmo-constituency-parser-2018.03.14.tar.gz",
+        "constituency-parser",
+    )
+    return predictor
+
+
+def biaffine_parser_stanford_dependencies_todzat_2017() -> predictors.BiaffineDependencyParserPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/biaffine-dependency-parser-ptb-2018.08.23.tar.gz",
+        "biaffine-dependency-parser",
+    )
+    return predictor
+
+
+def biaffine_parser_universal_dependencies_todzat_2017() -> predictors.BiaffineDependencyParserPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/biaffine-dependency-parser-ud-2018.08.23.tar.gz",
+        "biaffine-dependency-parser",
+    )
+    return predictor
+
+
+def esim_nli_with_elmo_chen_2017() -> predictors.DecomposableAttentionPredictor:
+    predictor = _load_predictor(
+        "https://allennlp.s3.amazonaws.com/models/esim-elmo-2018.05.17.tar.gz",
+        "textual-entailment",
+    )
+    return predictor
diff --git a/allennlp_hub/pretrained/allennlp_semparse_pretrained.py b/allennlp_hub/pretrained/allennlp_semparse_pretrained.py
@@ -0,0 +1,38 @@
+from allennlp_hub.pretrained.helpers import _load_predictor
+from allennlp_semparse import predictors as semparse_predictors
+import allennlp_semparse.models
+
+
+# AllenNLP Semparse models
+
+
+def wikitables_parser_dasigi_2019() -> semparse_predictors.WikiTablesParserPredictor:
+    predictor = _load_predictor(
+        "https://storage.googleapis.com/allennlp-public-models/wikitables-model-2019.07.29.tar.gz",
+        "wikitables-parser",
+    )
+    return predictor
+
+
+def nlvr_parser_dasigi_2019() -> semparse_predictors.NlvrParserPredictor:
+    predictor = _load_predictor(
+        "https://storage.googleapis.com/allennlp-public-models/nlvr-erm-model-2018-12-18-rule-vocabulary-updated.tar.gz",
+        "nlvr-parser",
+    )
+    return predictor
+
+
+def atis_parser_lin_2019() -> semparse_predictors.AtisParserPredictor:
+    predictor = _load_predictor(
+        "https://storage.googleapis.com/allennlp-public-models/atis-parser-2018.11.10.tar.gz",
+        "atis-parser",
+    )
+    return predictor
+
+
+def quarel_parser_tafjord_2019() -> semparse_predictors.QuarelParserPredictor:
+    predictor = _load_predictor(
+        "https://storage.googleapis.com/allennlp-public-models/quarel-parser-zero-2018.12.20.tar.gz",
+        "quarel-parser",
+    )
+    return predictor
diff --git a/allennlp_hub/pretrained/helpers.py b/allennlp_hub/pretrained/helpers.py
@@ -0,0 +1,9 @@
+from allennlp.predictors import Predictor
+from allennlp.models.archival import load_archive
+
+def _load_predictor(archive_file: str, predictor_name: str) -> Predictor:
+    """
+    Helper to load the desired predictor from the given archive.
+    """
+    archive = load_archive(archive_file)
+    return Predictor.from_archive(archive, predictor_name)
diff --git a/setup.py b/setup.py
@@ -34,10 +34,16 @@
     #
     # As a mitigation, run `pip uninstall allennlp` before installing this
     # package.
-    # TODO(brendanr): Make these point to released versions.
+    #
+    # TODO(brendanr): Make these point to released versions. Currently
+    # allennlp-semparse is unreleased and it depends on a specific allennlp
+    # SHA. Due to the aforementioned setuptools bug, we explicitly set the
+    # allennlp version here to be that required by allennlp-semparse.
+    allennlp_sha = "93024e53c1445cb4630ee5c07926abff8943715f"
+    semparse_sha = "937d5945488a33c61d0047bd74d8106e60340bbd"
     install_requirements = [
-        "allennlp @ git+ssh://git@github.com/allenai/allennlp@master#egg=allennlp",
-        "allennlp @ git+ssh://git@github.com/allenai/allennlp-semparse@master#egg=allennlp-semparse"
+        f"allennlp @ git+ssh://git@github.com/allenai/allennlp@{allennlp_sha}#egg=allennlp",
+        f"allennlp_semparse @ git+ssh://git@github.com/allenai/allennlp-semparse@{semparse_sha}#egg=allennlp-semparse",
     ]
 
 # make pytest-runner a conditional requirement,

diff --git a/allennlp_hub/tests/sniff_test.py → tests/pretrained/allennlp_pretrained_test.py b/allennlp_hub/tests/sniff_test.py → tests/pretrained/allennlp_pretrained_test.py
@@ -5,9 +5,7 @@
 from allennlp_hub import pretrained
 
 
-class SniffTest(AllenNlpTestCase):
-    # TODO: Add semparse sniff tests. Were there ever any?
-
+class AllenNlpPretrainedTest(AllenNlpTestCase):
     def test_machine_comprehension(self):
         predictor = pretrained.bidirectional_attention_flow_seo_2017()
 
@@ -16,9 +14,7 @@ def test_machine_comprehension(self):
 
         result = predictor.predict_json({"passage": passage, "question": question})
 
-        correct = (
-            "Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, Hugo Weaving, and Joe Pantoliano"
-        )
+        correct = "Keanu Reeves, Laurence Fishburne, Carrie-Anne Moss, Hugo Weaving, and Joe Pantoliano"
 
         assert correct == result["best_span_str"]
 
@@ -362,7 +358,9 @@ def test_ner(self):
         ]
         assert result["tags"] == ["B-PER", "L-PER", "O", "O", "O", "O", "U-LOC", "O"]
 
-    @pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
+    @pytest.mark.skipif(
+        spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1"
+    )
     def test_constituency_parsing(self):
         predictor = pretrained.span_based_constituency_parsing_with_elmo_joshi_2018()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from allennlp_hub.pretrained.allennlp_pretrained import *
		from allennlp_hub.pretrained.allennlp_semparse_pretrained import *