Skip to content

Commit

Permalink
feat: set up FastAPI-based model API (#76)
Browse files Browse the repository at this point in the history
Signed-off-by: HolyMichael <34553282+HolyMichael@users.noreply.github.com>
  • Loading branch information
HolyMichael authored Apr 24, 2023
1 parent 4c1691a commit e84fee4
Show file tree
Hide file tree
Showing 32 changed files with 14,263 additions and 1,204 deletions.
4 changes: 4 additions & 0 deletions deepsearch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from deepsearch.core.cli.plugins import get_cli_groups
from deepsearch.cps.cli.main import app as cps_app
from deepsearch.documents.cli.main import app as documents_app
from deepsearch.model.cli.main import app as model_app
from deepsearch.query.cli.main import app as query_app

app.add_typer(cps_app, name="cps", help="Interact with DeepSearch CPS component")
Expand All @@ -13,6 +14,9 @@
name="documents",
help="Interact with DeepSearch Document Conversion component",
)
app.add_typer(
model_app, name="model", help="Interact with the DeepSearch model component"
)

for group in get_cli_groups():
app.add_typer(group)
Expand Down
Empty file added deepsearch/model/__init__.py
Empty file.
Empty file.
196 changes: 196 additions & 0 deletions deepsearch/model/base/base_annotator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Interface -> defines mandatory functions for annotators
from abc import ABCMeta, abstractproperty
from typing import List, Optional, Union

from fastapi import HTTPException
from pydantic import BaseModel, ValidationError


class BaseAnnotator_properties(ABCMeta):

supports = (
entities
) = (
relationships
) = (
properties
) = (
name
) = version = url = author = description = expected_compute_time = abstractproperty

def __call__(self, *args, **kwargs):
obj = super(BaseAnnotator_properties, self).__call__(*args, **kwargs)
try:
getattr(obj, "supports")
except AttributeError:
self.supports = []
try:
getattr(obj, "entities")
except AttributeError:
self.entities = []
try:
getattr(obj, "relationships")
except AttributeError:
self.relationships = []
try:
getattr(obj, "properties")
except AttributeError:
self.properties = []
try:
getattr(obj, "name")
except AttributeError:
self.name = "undefined"
try:
getattr(obj, "version")
except AttributeError:
self.version = "undefined"
try:
getattr(obj, "url")
except AttributeError:
self.url = "undefined"
try:
getattr(obj, "author")
except AttributeError:
self.author = "undefined"
try:
getattr(obj, "description")
except AttributeError:
self.description = "undefined"
try:
getattr(obj, "expected_compute_time")
except AttributeError:
self.expected_compute_time = 1.0
try:
getattr(obj, "kind")
except AttributeError:
self.kind = "undefined"

return obj


class BaseAnnotator(metaclass=BaseAnnotator_properties):

kind: str
supports: Union[tuple, list]
name: str
version: str
url: str
author: str
description: str
expected_compute_time: float
labels: dict

def annotate_batched_entities(
self, object_type: str, items: List, entity_names: Optional[List[str]]
) -> List[dict]:
results = []
for item in items:
try:
results.append(
self.annotate_entities(object_type, item, entity_names)[0]
)
except HTTPException as e:
raise e
return results

def annotate_batched_properties(
self,
object_type: str,
items: List,
entities: List[dict],
property_names: Optional[List[str]],
) -> List[dict]:
results = []
for item, entity in zip(items, entities):
try:
results.append(
self.annotate_properties(object_type, item, entity, property_names)[
0
]
)
except HTTPException as e:
raise e
return results

def annotate_batched_relationships(
self,
object_type: str,
items: List,
entities: List[dict],
relationship_names: Optional[List[str]],
) -> List[dict]:
results = []
for item, entity in zip(items, entities):
try:
results.append(
self.annotate_relationships(
object_type, item, entity, relationship_names
)[0]
)
except HTTPException as e:
raise e
return results

def annotate_entities(
self, object_type: str, item: List, entity_names: Optional[List[str]]
) -> List[dict]:
raise HTTPException(
status_code=501, detail="Unsuported Operation for annotator: findEntities"
)

def annotate_properties(
self,
object_type: str,
item: str,
entity: dict,
property_names: Optional[List[str]],
) -> List[dict]:
# Incomplete
raise HTTPException(
status_code=501, detail="Unsuported Operation for annotator: findProperties"
)

def annotate_relationships(
self,
object_type: str,
item: str,
entity: dict,
relationship_names: Optional[List[str]],
) -> List[dict]:
# Incomplete
raise HTTPException(
status_code=501,
detail="Unsuported Operation for annotator: findRelationships",
)

def get_annotator_info(self) -> dict:
annotator_info = {
"definitions": {
# The extensive url in the issue proposition
"apiVersion": "v1",
"kind": self.kind,
"spec": {
"metadata": {
"name": self.name,
"version": self.version,
"url": self.url,
"author": self.author,
"description": self.description,
"expected_compute_time": self.expected_compute_time,
"supported_object_types": self.supports,
},
"definition": self.labels,
},
}
}

return annotator_info

# def get_entity_names(self):
# return self.annotator_info["spec"]["definition"]["entities"]
#
# def get_relationship_names(self):
# return self.annotator_info["spec"]["definition"]["entities"]
#
# def get_property_names(self):
# return self.annotator_info["spec"]["definition"]["entities"]
Empty file.
31 changes: 31 additions & 0 deletions deepsearch/model/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import importlib.util
import os

import typer
import uvicorn

from deepsearch.model.examples.simple_text_geography_annotator.simple_text_geography_annotator import ( # type: ignore
SimpleTextGeographyAnnotator,
)
from deepsearch.model.server.deepsearch_annotator_app import DeepSearchAnnotatorApp

app = typer.Typer(no_args_is_help=True)


@app.command(name="serve", help="Serve an annotator instance")
def serve(
port: int = typer.Option(8000, "-p", "--port", help="The port to listen on"),
annotator: str = typer.Option(None, "-a", "--annotator", help="Not implemented"),
):
# Load the Annotator App
deepsearch_annotator_app = DeepSearchAnnotatorApp()

# register annotators
deepsearch_annotator_app.register_annotator(SimpleTextGeographyAnnotator())

# Run
uvicorn.run(deepsearch_annotator_app.app, host="0.0.0.0", port=port)


if __name__ == "__main__":
app()
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from typing import Any, Optional

from .common.dictionary_text_entity_annotator import (
Config,
DictionaryTextEntityAnnotator,
)
from .common.utils import resources_dir


class CitiesAnnotator(DictionaryTextEntityAnnotator):
def key(self) -> str:
return "cities"

def description(self) -> str:
return "Names of cities"

def __init__(self):
super().__init__(
Config(dictionary_filename=os.path.join(resources_dir, "cities.json"))
)
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from abc import ABC, abstractmethod
from typing import Any, Optional

# from deepsearch.model.base.base_annotator import BaseAnnotator


class BaseTextEntityAnnotator:
@abstractmethod
def key(self) -> str:
pass

@abstractmethod
def description(self) -> str:
pass

def initialize(self):
return

@abstractmethod
def annotate_entities_text(self, text: str) -> list:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import logging

logger = logging.getLogger("cps-nlp")
import json
import re
from dataclasses import dataclass
from typing import List, Optional

from .base_text_entity_annotator import BaseTextEntityAnnotator


@dataclass
class Config:
dictionary_filename: str


class DictionaryTextEntityAnnotator(BaseTextEntityAnnotator):
def __init__(self, config: Config):
self.config = config
self._initialized = False

def initialize(self):
if self._initialized:
return

## In this example annotator, we load dictionaries.
## Here you might load AI models instead.
logger.info("reading from %s", self.config.dictionary_filename)
with open(self.config.dictionary_filename) as f:
dictionary = json.load(f)
logger.info("loaded %s", len(dictionary))
self._exprs = self._compile_dictionary(dictionary)
logger.info("compiled expr")

self._initialized = True

## Dictionary compilation.
## If you use AI models instead of dictionaries, no similar function is needed.
def _compile_dictionary(self, dictionary):
logger.info("compiling dictionary")

# delimiters used to avoid partial matches
starts_with = "(^|\s|'|\"|\(|\{|\[|\,|\.|\!|\?|\:|\;)"
ends_with = "($|\s|'|\"|\)|\}|\]|\,|\.|\!|\?|\:|\;)"
try:
tmp = []
for item in dictionary:
tmp.append(re.escape(item))

# The regex is split into multiple expressions if we exceed 4096 characters.
# This is needed to avoid length limits of regex.
exprs = []
MAX_LEN = 48
local = []
for texpr in tmp:

if len("|".join(local)) > 4096:

expr_str = starts_with + "(" + "|".join(local) + ")" + ends_with
expr = re.compile(expr_str)
exprs.append(expr)

local = []

if len(texpr) < MAX_LEN:
local.append(texpr)
else:
logger.warning(
"name of entity '%s' is longer than %s chars", texpr, MAX_LEN
)

if len(local) > 0:
expr_str = starts_with + "(" + "|".join(local) + ")" + ends_with
expr = re.compile(expr_str)
exprs.append(expr)
except BaseException as e:
logger.exception("Could not compile the dictionary")
raise RuntimeError("Could not compile the dictionary")
return exprs

def annotate_entities_text(self, text: str) -> list:
self.initialize()

## Annotate one text string with the desired entities.
## Output: List of entities in CPS format, i.e., dicts with keys type, match, original, range.

logger.debug("------ Starting 'annotate_entities_text' -------")
matches = []

# Run all expressions created for this entity_name
for expr in self._exprs:
for match in re.finditer(expr, text):
beg = match.group(1)
end = match.group(3)
orig = match.group(2)

tmp = {
"type": self.key(),
"match": orig,
"original": orig,
"range": [match.start() + len(beg), match.end() - len(end)],
}
matches.append(tmp)

return matches
Loading

0 comments on commit e84fee4

Please sign in to comment.