-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: set up FastAPI-based model API (#76)
Signed-off-by: HolyMichael <34553282+HolyMichael@users.noreply.github.com>
- Loading branch information
1 parent
4c1691a
commit e84fee4
Showing
32 changed files
with
14,263 additions
and
1,204 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
# Interface -> defines mandatory functions for annotators | ||
from abc import ABCMeta, abstractproperty | ||
from typing import List, Optional, Union | ||
|
||
from fastapi import HTTPException | ||
from pydantic import BaseModel, ValidationError | ||
|
||
|
||
class BaseAnnotator_properties(ABCMeta): | ||
|
||
supports = ( | ||
entities | ||
) = ( | ||
relationships | ||
) = ( | ||
properties | ||
) = ( | ||
name | ||
) = version = url = author = description = expected_compute_time = abstractproperty | ||
|
||
def __call__(self, *args, **kwargs): | ||
obj = super(BaseAnnotator_properties, self).__call__(*args, **kwargs) | ||
try: | ||
getattr(obj, "supports") | ||
except AttributeError: | ||
self.supports = [] | ||
try: | ||
getattr(obj, "entities") | ||
except AttributeError: | ||
self.entities = [] | ||
try: | ||
getattr(obj, "relationships") | ||
except AttributeError: | ||
self.relationships = [] | ||
try: | ||
getattr(obj, "properties") | ||
except AttributeError: | ||
self.properties = [] | ||
try: | ||
getattr(obj, "name") | ||
except AttributeError: | ||
self.name = "undefined" | ||
try: | ||
getattr(obj, "version") | ||
except AttributeError: | ||
self.version = "undefined" | ||
try: | ||
getattr(obj, "url") | ||
except AttributeError: | ||
self.url = "undefined" | ||
try: | ||
getattr(obj, "author") | ||
except AttributeError: | ||
self.author = "undefined" | ||
try: | ||
getattr(obj, "description") | ||
except AttributeError: | ||
self.description = "undefined" | ||
try: | ||
getattr(obj, "expected_compute_time") | ||
except AttributeError: | ||
self.expected_compute_time = 1.0 | ||
try: | ||
getattr(obj, "kind") | ||
except AttributeError: | ||
self.kind = "undefined" | ||
|
||
return obj | ||
|
||
|
||
class BaseAnnotator(metaclass=BaseAnnotator_properties): | ||
|
||
kind: str | ||
supports: Union[tuple, list] | ||
name: str | ||
version: str | ||
url: str | ||
author: str | ||
description: str | ||
expected_compute_time: float | ||
labels: dict | ||
|
||
def annotate_batched_entities( | ||
self, object_type: str, items: List, entity_names: Optional[List[str]] | ||
) -> List[dict]: | ||
results = [] | ||
for item in items: | ||
try: | ||
results.append( | ||
self.annotate_entities(object_type, item, entity_names)[0] | ||
) | ||
except HTTPException as e: | ||
raise e | ||
return results | ||
|
||
def annotate_batched_properties( | ||
self, | ||
object_type: str, | ||
items: List, | ||
entities: List[dict], | ||
property_names: Optional[List[str]], | ||
) -> List[dict]: | ||
results = [] | ||
for item, entity in zip(items, entities): | ||
try: | ||
results.append( | ||
self.annotate_properties(object_type, item, entity, property_names)[ | ||
0 | ||
] | ||
) | ||
except HTTPException as e: | ||
raise e | ||
return results | ||
|
||
def annotate_batched_relationships( | ||
self, | ||
object_type: str, | ||
items: List, | ||
entities: List[dict], | ||
relationship_names: Optional[List[str]], | ||
) -> List[dict]: | ||
results = [] | ||
for item, entity in zip(items, entities): | ||
try: | ||
results.append( | ||
self.annotate_relationships( | ||
object_type, item, entity, relationship_names | ||
)[0] | ||
) | ||
except HTTPException as e: | ||
raise e | ||
return results | ||
|
||
def annotate_entities( | ||
self, object_type: str, item: List, entity_names: Optional[List[str]] | ||
) -> List[dict]: | ||
raise HTTPException( | ||
status_code=501, detail="Unsuported Operation for annotator: findEntities" | ||
) | ||
|
||
def annotate_properties( | ||
self, | ||
object_type: str, | ||
item: str, | ||
entity: dict, | ||
property_names: Optional[List[str]], | ||
) -> List[dict]: | ||
# Incomplete | ||
raise HTTPException( | ||
status_code=501, detail="Unsuported Operation for annotator: findProperties" | ||
) | ||
|
||
def annotate_relationships( | ||
self, | ||
object_type: str, | ||
item: str, | ||
entity: dict, | ||
relationship_names: Optional[List[str]], | ||
) -> List[dict]: | ||
# Incomplete | ||
raise HTTPException( | ||
status_code=501, | ||
detail="Unsuported Operation for annotator: findRelationships", | ||
) | ||
|
||
def get_annotator_info(self) -> dict: | ||
annotator_info = { | ||
"definitions": { | ||
# The extensive url in the issue proposition | ||
"apiVersion": "v1", | ||
"kind": self.kind, | ||
"spec": { | ||
"metadata": { | ||
"name": self.name, | ||
"version": self.version, | ||
"url": self.url, | ||
"author": self.author, | ||
"description": self.description, | ||
"expected_compute_time": self.expected_compute_time, | ||
"supported_object_types": self.supports, | ||
}, | ||
"definition": self.labels, | ||
}, | ||
} | ||
} | ||
|
||
return annotator_info | ||
|
||
# def get_entity_names(self): | ||
# return self.annotator_info["spec"]["definition"]["entities"] | ||
# | ||
# def get_relationship_names(self): | ||
# return self.annotator_info["spec"]["definition"]["entities"] | ||
# | ||
# def get_property_names(self): | ||
# return self.annotator_info["spec"]["definition"]["entities"] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import importlib.util | ||
import os | ||
|
||
import typer | ||
import uvicorn | ||
|
||
from deepsearch.model.examples.simple_text_geography_annotator.simple_text_geography_annotator import ( # type: ignore | ||
SimpleTextGeographyAnnotator, | ||
) | ||
from deepsearch.model.server.deepsearch_annotator_app import DeepSearchAnnotatorApp | ||
|
||
app = typer.Typer(no_args_is_help=True) | ||
|
||
|
||
@app.command(name="serve", help="Serve an annotator instance") | ||
def serve( | ||
port: int = typer.Option(8000, "-p", "--port", help="The port to listen on"), | ||
annotator: str = typer.Option(None, "-a", "--annotator", help="Not implemented"), | ||
): | ||
# Load the Annotator App | ||
deepsearch_annotator_app = DeepSearchAnnotatorApp() | ||
|
||
# register annotators | ||
deepsearch_annotator_app.register_annotator(SimpleTextGeographyAnnotator()) | ||
|
||
# Run | ||
uvicorn.run(deepsearch_annotator_app.app, host="0.0.0.0", port=port) | ||
|
||
|
||
if __name__ == "__main__": | ||
app() |
Empty file.
Empty file.
21 changes: 21 additions & 0 deletions
21
deepsearch/model/examples/simple_text_geography_annotator/entities/cities_annotator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import os | ||
from typing import Any, Optional | ||
|
||
from .common.dictionary_text_entity_annotator import ( | ||
Config, | ||
DictionaryTextEntityAnnotator, | ||
) | ||
from .common.utils import resources_dir | ||
|
||
|
||
class CitiesAnnotator(DictionaryTextEntityAnnotator): | ||
def key(self) -> str: | ||
return "cities" | ||
|
||
def description(self) -> str: | ||
return "Names of cities" | ||
|
||
def __init__(self): | ||
super().__init__( | ||
Config(dictionary_filename=os.path.join(resources_dir, "cities.json")) | ||
) |
Empty file.
21 changes: 21 additions & 0 deletions
21
...el/examples/simple_text_geography_annotator/entities/common/base_text_entity_annotator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from abc import ABC, abstractmethod | ||
from typing import Any, Optional | ||
|
||
# from deepsearch.model.base.base_annotator import BaseAnnotator | ||
|
||
|
||
class BaseTextEntityAnnotator: | ||
@abstractmethod | ||
def key(self) -> str: | ||
pass | ||
|
||
@abstractmethod | ||
def description(self) -> str: | ||
pass | ||
|
||
def initialize(self): | ||
return | ||
|
||
@abstractmethod | ||
def annotate_entities_text(self, text: str) -> list: | ||
pass |
105 changes: 105 additions & 0 deletions
105
...mples/simple_text_geography_annotator/entities/common/dictionary_text_entity_annotator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import logging | ||
|
||
logger = logging.getLogger("cps-nlp") | ||
import json | ||
import re | ||
from dataclasses import dataclass | ||
from typing import List, Optional | ||
|
||
from .base_text_entity_annotator import BaseTextEntityAnnotator | ||
|
||
|
||
@dataclass | ||
class Config: | ||
dictionary_filename: str | ||
|
||
|
||
class DictionaryTextEntityAnnotator(BaseTextEntityAnnotator): | ||
def __init__(self, config: Config): | ||
self.config = config | ||
self._initialized = False | ||
|
||
def initialize(self): | ||
if self._initialized: | ||
return | ||
|
||
## In this example annotator, we load dictionaries. | ||
## Here you might load AI models instead. | ||
logger.info("reading from %s", self.config.dictionary_filename) | ||
with open(self.config.dictionary_filename) as f: | ||
dictionary = json.load(f) | ||
logger.info("loaded %s", len(dictionary)) | ||
self._exprs = self._compile_dictionary(dictionary) | ||
logger.info("compiled expr") | ||
|
||
self._initialized = True | ||
|
||
## Dictionary compilation. | ||
## If you use AI models instead of dictionaries, no similar function is needed. | ||
def _compile_dictionary(self, dictionary): | ||
logger.info("compiling dictionary") | ||
|
||
# delimiters used to avoid partial matches | ||
starts_with = "(^|\s|'|\"|\(|\{|\[|\,|\.|\!|\?|\:|\;)" | ||
ends_with = "($|\s|'|\"|\)|\}|\]|\,|\.|\!|\?|\:|\;)" | ||
try: | ||
tmp = [] | ||
for item in dictionary: | ||
tmp.append(re.escape(item)) | ||
|
||
# The regex is split into multiple expressions if we exceed 4096 characters. | ||
# This is needed to avoid length limits of regex. | ||
exprs = [] | ||
MAX_LEN = 48 | ||
local = [] | ||
for texpr in tmp: | ||
|
||
if len("|".join(local)) > 4096: | ||
|
||
expr_str = starts_with + "(" + "|".join(local) + ")" + ends_with | ||
expr = re.compile(expr_str) | ||
exprs.append(expr) | ||
|
||
local = [] | ||
|
||
if len(texpr) < MAX_LEN: | ||
local.append(texpr) | ||
else: | ||
logger.warning( | ||
"name of entity '%s' is longer than %s chars", texpr, MAX_LEN | ||
) | ||
|
||
if len(local) > 0: | ||
expr_str = starts_with + "(" + "|".join(local) + ")" + ends_with | ||
expr = re.compile(expr_str) | ||
exprs.append(expr) | ||
except BaseException as e: | ||
logger.exception("Could not compile the dictionary") | ||
raise RuntimeError("Could not compile the dictionary") | ||
return exprs | ||
|
||
def annotate_entities_text(self, text: str) -> list: | ||
self.initialize() | ||
|
||
## Annotate one text string with the desired entities. | ||
## Output: List of entities in CPS format, i.e., dicts with keys type, match, original, range. | ||
|
||
logger.debug("------ Starting 'annotate_entities_text' -------") | ||
matches = [] | ||
|
||
# Run all expressions created for this entity_name | ||
for expr in self._exprs: | ||
for match in re.finditer(expr, text): | ||
beg = match.group(1) | ||
end = match.group(3) | ||
orig = match.group(2) | ||
|
||
tmp = { | ||
"type": self.key(), | ||
"match": orig, | ||
"original": orig, | ||
"range": [match.start() + len(beg), match.end() - len(end)], | ||
} | ||
matches.append(tmp) | ||
|
||
return matches |
Oops, something went wrong.