diff --git a/.github/actions/setup-poetry/action.yml b/.github/actions/setup-poetry/action.yml index 575cfa5e..ee399916 100644 --- a/.github/actions/setup-poetry/action.yml +++ b/.github/actions/setup-poetry/action.yml @@ -15,5 +15,5 @@ runs: python-version: ${{ inputs.python-version }} cache: 'poetry' - name: Install dependencies - run: poetry install + run: poetry install --all-extras shell: bash diff --git a/deepsearch/model/README.md b/deepsearch/model/README.md index 5529d302..61a849af 100644 --- a/deepsearch/model/README.md +++ b/deepsearch/model/README.md @@ -4,21 +4,64 @@ The Model API allows users to serve and integrate their own models. +## Installation +To use the Model API, install including the `api` +extra, i.e.: +- with poetry: +`poetry add "deepsearch-toolkit[api]"` +- with pip: `pip install "deepsearch-toolkit[api]"` + ## Usage +To run a model, register it with a +[`DeepSearchAnnotatorApp`](server/deepsearch_annotator_app.py) and run the app: ```python from deepsearch.model.server.deepsearch_annotator_app import DeepSearchAnnotatorApp annotator = ... # e.g. SimpleTextGeographyAnnotator() app = DeepSearchAnnotatorApp() app.register_annotator(annotator) -app.run() +app.run(host="127.0.0.1", port=8000) ``` -For a complete example, check [examples/main.py](examples/main.py). +### OpenAPI + +The OpenAPI UI is served under `/docs`, e.g. http://127.0.0.1:8000/docs. + +#### Inference + +An example input payload for the `/predict` endpoint would look as follows +(note that `deepsearch.res.ibm.com/x-deadline` should be a future timestamp): +```json +{ + "apiVersion": "v1", + "kind": "NLPModel", + "metadata": { + "annotations": { + "deepsearch.res.ibm.com/x-deadline": "2024-04-20T12:26:01.479484+00:00", + "deepsearch.res.ibm.com/x-transaction-id": "abc", + "deepsearch.res.ibm.com/x-attempt-number": 5, + "deepsearch.res.ibm.com/x-max-attempts": 5 + } + }, + "spec": { + "findEntities": { + "entityNames": ["cities", "countries"], + "objectType": "text", + "texts": [ + "Bern, the capital city of Switzerland, is built around a crook in the Aare River.", + "Athens is a major coastal urban area in the Mediterranean and is both the capital and largest city of Greece." + ] + } + } +} +``` -## Models -For an example, check -[examples/simple_text_geography_annotator/](examples/simple_text_geography_annotator/). +## Developing a new model +To develop a new model class, inherit from a [base model class](base/) and implement the +methods and attributes that are declared as abstract. -## Base models -Check [base/](base/). +### Examples +- Minimal dummy annotator: +[examples/minimal_annotator/](examples/minimal_annotator) +- Simple geography annotator: +[examples/simple_text_geography_annotator/](examples/simple_text_geography_annotator/) diff --git a/deepsearch/model/base/base_annotator.py b/deepsearch/model/base/base_annotator.py index 25d55158..96e8b9b7 100644 --- a/deepsearch/model/base/base_annotator.py +++ b/deepsearch/model/base/base_annotator.py @@ -1,172 +1,34 @@ -# Interface -> defines mandatory functions for annotators -from abc import ABCMeta, abstractproperty -from typing import List, Optional, Union +from abc import ABC, abstractmethod +from typing import List -from fastapi import HTTPException -from pydantic import BaseModel, ValidationError +class BaseAnnotator(ABC): -class BaseAnnotator_properties(ABCMeta): + version: str = "undefined" + url: str = "undefined" + author: str = "undefined" + description: str = "undefined" + expected_compute_time: float = 1.0 + labels: dict = {} - supports = ( - entities - ) = ( - relationships - ) = ( - properties - ) = ( - name - ) = version = url = author = description = expected_compute_time = abstractproperty + @property + @abstractmethod + def kind(self) -> str: + return self.kind - def __call__(self, *args, **kwargs): - obj = super(BaseAnnotator_properties, self).__call__(*args, **kwargs) - try: - getattr(obj, "supports") - except AttributeError: - self.supports = [] - try: - getattr(obj, "entities") - except AttributeError: - self.entities = [] - try: - getattr(obj, "relationships") - except AttributeError: - self.relationships = [] - try: - getattr(obj, "properties") - except AttributeError: - self.properties = [] - try: - getattr(obj, "name") - except AttributeError: - self.name = "undefined" - try: - getattr(obj, "version") - except AttributeError: - self.version = "undefined" - try: - getattr(obj, "url") - except AttributeError: - self.url = "undefined" - try: - getattr(obj, "author") - except AttributeError: - self.author = "undefined" - try: - getattr(obj, "description") - except AttributeError: - self.description = "undefined" - try: - getattr(obj, "expected_compute_time") - except AttributeError: - self.expected_compute_time = 1.0 - try: - getattr(obj, "kind") - except AttributeError: - self.kind = "undefined" + @property + @abstractmethod + def name(self) -> str: + return self.name - return obj - - -class BaseAnnotator(metaclass=BaseAnnotator_properties): - - kind: str - supports: Union[tuple, list] - name: str - version: str - url: str - author: str - description: str - expected_compute_time: float - labels: dict - - def annotate_batched_entities( - self, object_type: str, items: List, entity_names: Optional[List[str]] - ) -> List[dict]: - results = [] - for item in items: - try: - results.append( - self.annotate_entities(object_type, item, entity_names)[0] - ) - except HTTPException as e: - raise e - return results - - def annotate_batched_properties( - self, - object_type: str, - items: List, - entities: List[dict], - property_names: Optional[List[str]], - ) -> List[dict]: - results = [] - for item, entity in zip(items, entities): - try: - results.append( - self.annotate_properties(object_type, item, entity, property_names)[ - 0 - ] - ) - except HTTPException as e: - raise e - return results - - def annotate_batched_relationships( - self, - object_type: str, - items: List, - entities: List[dict], - relationship_names: Optional[List[str]], - ) -> List[dict]: - results = [] - for item, entity in zip(items, entities): - try: - results.append( - self.annotate_relationships( - object_type, item, entity, relationship_names - )[0] - ) - except HTTPException as e: - raise e - return results - - def annotate_entities( - self, object_type: str, item: List, entity_names: Optional[List[str]] - ) -> List[dict]: - raise HTTPException( - status_code=501, detail="Unsuported Operation for annotator: findEntities" - ) - - def annotate_properties( - self, - object_type: str, - item: str, - entity: dict, - property_names: Optional[List[str]], - ) -> List[dict]: - # Incomplete - raise HTTPException( - status_code=501, detail="Unsuported Operation for annotator: findProperties" - ) - - def annotate_relationships( - self, - object_type: str, - item: str, - entity: dict, - relationship_names: Optional[List[str]], - ) -> List[dict]: - # Incomplete - raise HTTPException( - status_code=501, - detail="Unsuported Operation for annotator: findRelationships", - ) + @property + @abstractmethod + def supports(self) -> List[str]: + return self.supports def get_annotator_info(self) -> dict: annotator_info = { "definitions": { - # The extensive url in the issue proposition "apiVersion": "v1", "kind": self.kind, "spec": { @@ -185,12 +47,3 @@ def get_annotator_info(self) -> dict: } return annotator_info - - # def get_entity_names(self): - # return self.annotator_info["spec"]["definition"]["entities"] - # - # def get_relationship_names(self): - # return self.annotator_info["spec"]["definition"]["entities"] - # - # def get_property_names(self): - # return self.annotator_info["spec"]["definition"]["entities"] diff --git a/deepsearch/model/base/base_nlp_annotator.py b/deepsearch/model/base/base_nlp_annotator.py new file mode 100644 index 00000000..39433fee --- /dev/null +++ b/deepsearch/model/base/base_nlp_annotator.py @@ -0,0 +1,57 @@ +from abc import abstractmethod +from typing import List, Optional + +from deepsearch.model.base.base_annotator import BaseAnnotator + + +class BaseNLPAnnotator(BaseAnnotator): + + kind: str = "NLPModel" + + @abstractmethod + def annotate_batched_entities( + self, + object_type: str, + items: List[str], + entity_names: Optional[List[str]], + ) -> List[dict]: + """Annotate entities in given items batch. + + Args: + object_type (str): type of objects to annotate, e.g. "text"; must be included + in `supports` property + items (List[str]): batch of input items to annotate + entity_names (Optional[List[str]]): entities to annotate + + Returns: + List[dict]: a list, which, for each item in `items`, contains a dict with keys + being the various entity names each mapped to a list of its annotations + (can be empty) in the item, each annotation being a dict like: + { + "type": , + "match": , + "original": , + "range": [, ] + } + """ + return [] + + @abstractmethod + def annotate_batched_relationships( + self, + object_type: str, + items: List[str], + entities: List[dict], + relationship_names: Optional[List[str]], + ) -> List[dict]: + return [] + + @abstractmethod + def annotate_batched_properties( + self, + object_type: str, + items: List[str], + entities: List[dict], + property_names: Optional[List[str]], + ) -> List[dict]: + return [] diff --git a/deepsearch/model/examples/minimal_annotator/__init__.py b/deepsearch/model/examples/minimal_annotator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/deepsearch/model/examples/minimal_annotator/app.py b/deepsearch/model/examples/minimal_annotator/app.py new file mode 100644 index 00000000..b28feb74 --- /dev/null +++ b/deepsearch/model/examples/minimal_annotator/app.py @@ -0,0 +1,14 @@ +from deepsearch.model.examples.minimal_annotator.minimal_annotator import ( + MinimalAnnotator, +) +from deepsearch.model.server.deepsearch_annotator_app import DeepSearchAnnotatorApp + + +def run(): + app = DeepSearchAnnotatorApp() + app.register_annotator(MinimalAnnotator()) + app.run() + + +if __name__ == "__main__": + run() diff --git a/deepsearch/model/examples/minimal_annotator/minimal_annotator.py b/deepsearch/model/examples/minimal_annotator/minimal_annotator.py new file mode 100644 index 00000000..cb9181e1 --- /dev/null +++ b/deepsearch/model/examples/minimal_annotator/minimal_annotator.py @@ -0,0 +1,63 @@ +from typing import List, Optional + +from fastapi import HTTPException + +from deepsearch.model.base.base_nlp_annotator import BaseNLPAnnotator + + +class MinimalAnnotator(BaseNLPAnnotator): + + name = "MinimalAnnotator" + supports = ["text"] + + def annotate_batched_entities( + self, + object_type: str, + items: List[str], + entity_names: Optional[List[str]], + ) -> List[dict]: + _entity_names = entity_names or ["entity_foo", "entity_bar"] + results = [] + for item in items: + results.append( + { + k: [ + { + "type": k, + "match": f"a '{k}' match in '{item}'", + "original": f"a '{k}' original in '{item}'", + "range": [1, 5], + }, + { + "type": k, + "match": f"another '{k}' match in '{item}'", + "original": f"another '{k}' original in '{item}'", + "range": [12, 42], + }, + ] + for k in _entity_names + } + ) + return results + + def annotate_batched_relationships( + self, + object_type: str, + items: List[str], + entities: List[dict], + relationship_names: Optional[List[str]], + ) -> List[dict]: + # raise HTTP 501 to indicate method not supported + raise HTTPException( + status_code=501, detail="Relationship annotation not supported" + ) + + def annotate_batched_properties( + self, + object_type: str, + items: List[str], + entities: List[dict], + property_names: Optional[List[str]], + ) -> List[dict]: + # raise HTTP 501 to indicate method not supported + raise HTTPException(status_code=501, detail="Property annotation not supported") diff --git a/deepsearch/model/examples/simple_text_geography_annotator/__init__.py b/deepsearch/model/examples/simple_text_geography_annotator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/deepsearch/model/examples/main.py b/deepsearch/model/examples/simple_text_geography_annotator/app.py similarity index 84% rename from deepsearch/model/examples/main.py rename to deepsearch/model/examples/simple_text_geography_annotator/app.py index 785d8dfc..34048f83 100644 --- a/deepsearch/model/examples/main.py +++ b/deepsearch/model/examples/simple_text_geography_annotator/app.py @@ -7,7 +7,6 @@ def run(): app = DeepSearchAnnotatorApp() app.register_annotator(SimpleTextGeographyAnnotator()) - app.register_annotator(SimpleTextGeographyAnnotator(), name="foo") app.run() diff --git a/deepsearch/model/examples/simple_text_geography_annotator/simple_text_geography_annotator.py b/deepsearch/model/examples/simple_text_geography_annotator/simple_text_geography_annotator.py index 244ce915..c66f7ca4 100644 --- a/deepsearch/model/examples/simple_text_geography_annotator/simple_text_geography_annotator.py +++ b/deepsearch/model/examples/simple_text_geography_annotator/simple_text_geography_annotator.py @@ -8,7 +8,9 @@ import logging -from deepsearch.model.base.base_annotator import BaseAnnotator +from fastapi import HTTPException + +from deepsearch.model.base.base_nlp_annotator import BaseNLPAnnotator logger = logging.getLogger("cps-nlp") from typing import List, Optional @@ -29,10 +31,10 @@ # import pprint ## For debugging only. -class SimpleTextGeographyAnnotator(BaseAnnotator): - ## This is the class name that you need to use in the controller. +class SimpleTextGeographyAnnotator(BaseNLPAnnotator): - supports = ("text",) + name = "SimpleTextGeographyAnnotator" + supports = ["text"] _ent_annotator_classes = [ CitiesAnnotator, @@ -47,32 +49,15 @@ class SimpleTextGeographyAnnotator(BaseAnnotator): ] def __init__(self): - self.name = "SimpleTextGeographyAnnotator" - self.kind = "NLPModelDefinition" - self._ent_annots = {} self._rel_annots = {} self._initialize_annotators() self.entity_names = list(self._ent_annots.keys()) self.relationship_names = list(self._rel_annots.keys()) - self.property_names = ( - [] - ) # This example annotator does not have any property annotator + self.property_names = [] # this annotator does not annotate properties self.labels = self._generate_annotator_labels() - def get_entity_names(self): - return self.entity_names - - def get_relationship_names(self): - return self.relationship_names - - def get_property_names(self): - return self.property_names - - def get_labels(self): - return self.labels - def _generate_annotator_labels(self): # Derive entity labels from classes entities_with_desc = [ @@ -80,10 +65,9 @@ def _generate_annotator_labels(self): for annot in self._ent_annots.values() ] # Dummy implementation of property labels - property_names = self.get_property_names() properties_with_desc = [ {"key": property, "description": f"Property of type {property!r}"} - for property in property_names + for property in self.property_names ] # Derive relationships labels from classes @@ -114,7 +98,7 @@ def _initialize_annotators(self): self._rel_annots[annot.key()] = annot def annotate_batched_entities( - self, object_type, items: List, entity_names: Optional[List[str]] + self, object_type, items: List[str], entity_names: Optional[List[str]] ) -> List[dict]: ## An item is a string if object_type == "text", and List[List[dict]] if object_type == "table" if entity_names is None: @@ -134,7 +118,7 @@ def annotate_batched_entities( for item in items: entity_map = {} try: - cps_entities = self.annotate_entities( + cps_entities = self.annotate_entities_in_item( object_type, item, desired_entities ) except Exception as exc: @@ -156,8 +140,8 @@ def annotate_batched_entities( return results - def annotate_entities( - self, object_type: str, item: List, entity_names: Optional[List[str]] + def annotate_entities_in_item( + self, object_type: str, item: str, entity_names: Optional[List[str]] ) -> List[dict]: # In this case entity_names is never None, however since BaseAnnotator defines the signature of this method as # Optionally having entity names we must ensure that they are defined. @@ -182,7 +166,7 @@ def annotate_entities( def annotate_batched_relationships( self, object_type: str, - items: List[dict], + items: List[str], entities: List[dict], relationship_names: Optional[List[str]], ) -> List[dict]: @@ -207,3 +191,13 @@ def annotate_batched_relationships( results.append(result) return results + + def annotate_batched_properties( + self, + object_type: str, + items: List, + entities: List[dict], + property_names: Optional[List[str]], + ) -> List[dict]: + # raise HTTP 501 to indicate method not supported + raise HTTPException(status_code=501, detail="Property annotation not supported") diff --git a/poetry.lock b/poetry.lock index 6f9adc6c..f24d4bad 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3,7 +3,7 @@ name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" category = "main" -optional = false +optional = true python-versions = ">=3.6.2" [package.dependencies] @@ -280,7 +280,7 @@ name = "fastapi" version = "0.95.1" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" category = "main" -optional = false +optional = true python-versions = ">=3.7" [package.dependencies] @@ -360,7 +360,7 @@ name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" category = "main" -optional = false +optional = true python-versions = ">=3.7" [[package]] @@ -1156,7 +1156,7 @@ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" category = "main" -optional = false +optional = true python-versions = ">=3.7" [[package]] @@ -1180,7 +1180,7 @@ name = "starlette" version = "0.26.1" description = "The little ASGI library that shines." category = "main" -optional = false +optional = true python-versions = ">=3.7" [package.dependencies] @@ -1362,7 +1362,7 @@ name = "uvicorn" version = "0.21.1" description = "The lightning-fast ASGI server." category = "main" -optional = false +optional = true python-versions = ">=3.7" [package.dependencies] @@ -1447,10 +1447,13 @@ python-versions = ">=3.7" docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)"] testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] +[extras] +api = ["fastapi", "uvicorn", "anyio", "starlette"] + [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "6bda8ec4ba3b890418ac144f5e881b8b3ad7729bcaf9390bfe7e0256453aa8a4" +content-hash = "b9db015dfe4d89939af76bca95245077cf6514412dded3fba7ba12f3e48a0284" [metadata.files] anyio = [ diff --git a/pyproject.toml b/pyproject.toml index c139c135..fc8482b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,10 +45,19 @@ appdirs = "^1.4.4" tabulate = "^0.8.9" pluggy = "^1.0.0" tqdm = "^4.64.0" -fastapi = "^0.95.1" -uvicorn = "^0.21.1" -anyio = "^3.6.2" -starlette = "^0.26.1" + +fastapi = { version = "^0.95.1", optional = true } +uvicorn = { version = "^0.21.1", optional = true } +anyio = { version = "^3.6.2", optional = true } +starlette = { version = "^0.26.1", optional = true } + +[tool.poetry.extras] +api = [ + "fastapi", + "uvicorn", + "anyio", + "starlette", +] [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^22.1.0"}