From ed1a7b91e5225a74609750b5ae8aec3843bfd20b Mon Sep 17 00:00:00 2001 From: Zhao Shenyang Date: Thu, 16 Feb 2023 02:36:21 +0800 Subject: [PATCH] feature(framework): diffusers (#3534) Co-authored-by: Aaron Pham <29749331+aarnphm@users.noreply.github.com> --- .github/workflows/frameworks.yml | 51 ++ requirements/frameworks-requirements.txt | 1 + src/bentoml/__init__.py | 3 + src/bentoml/_internal/frameworks/diffusers.py | 468 ++++++++++++++++++ src/bentoml/diffusers.py | 17 + .../frameworks/models/diffusers.py | 52 ++ 6 files changed, 592 insertions(+) create mode 100644 src/bentoml/_internal/frameworks/diffusers.py create mode 100644 src/bentoml/diffusers.py create mode 100644 tests/integration/frameworks/models/diffusers.py diff --git a/.github/workflows/frameworks.yml b/.github/workflows/frameworks.yml index 5e2b278d136..16eb2060149 100644 --- a/.github/workflows/frameworks.yml +++ b/.github/workflows/frameworks.yml @@ -23,6 +23,7 @@ jobs: runs-on: ubuntu-latest outputs: catboost: ${{ steps.filter.outputs.catboost }} + diffusers: ${{ steps.filter.outputs.diffusers }} fastai: ${{ steps.filter.outputs.fastai }} keras: ${{ steps.filter.outputs.keras }} lightgbm: ${{ steps.filter.outputs.lightgbm }} @@ -56,6 +57,11 @@ jobs: - src/bentoml/catboost.py - src/bentoml/_internal/frameworks/catboost.py - tests/integration/frameworks/models/catboost.py + diffusers: + - *related + - src/bentoml/diffusers.py + - src/bentoml/_internal/frameworks/diffusers.py + - tests/integration/frameworks/models/diffusers.py lightgbm: - *related - src/bentoml/lightgbm.py @@ -173,6 +179,51 @@ jobs: files: ./coverage.xml token: ${{ secrets.CODECOV_TOKEN }} + diffusers_integration_tests: + needs: diff + if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.diffusers == 'true') || github.event_name == 'push' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # fetch all tags and branches + - name: Setup python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + + - name: Get pip cache dir + id: cache-dir + run: | + echo ::set-output name=dir::$(pip cache dir) + + - name: Cache pip dependencies + uses: actions/cache@v3 + id: cache-pip + with: + path: ${{ steps.cache-dir.outputs.dir }} + key: ${{ runner.os }}-tests-${{ hashFiles('requirements/tests-requirements.txt') }} + + - name: Install dependencies + run: | + pip install . + pip install diffusers torch transformers + pip install -r requirements/tests-requirements.txt + + - name: Run tests and generate coverage report + run: | + OPTS=(--cov-config pyproject.toml --cov src/bentoml --cov-append --framework diffusers) + coverage run -m pytest tests/integration/frameworks/test_frameworks.py "${OPTS[@]}" + + - name: Generate coverage + run: coverage xml + + - name: Upload test coverage to Codecov + uses: codecov/codecov-action@v3 + with: + files: ./coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} + fastai_integration_tests: needs: diff if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.fastai == 'true') || github.event_name == 'push' }} diff --git a/requirements/frameworks-requirements.txt b/requirements/frameworks-requirements.txt index dcf3563451f..d1dfe4d5be3 100644 --- a/requirements/frameworks-requirements.txt +++ b/requirements/frameworks-requirements.txt @@ -17,3 +17,4 @@ pytorch-lightning # huggingface dependencies transformers tokenizer +diffusers diff --git a/src/bentoml/__init__.py b/src/bentoml/__init__.py index 136ff714f15..c1a636fba44 100644 --- a/src/bentoml/__init__.py +++ b/src/bentoml/__init__.py @@ -68,6 +68,7 @@ from . import lightgbm from . import onnxmlir from . import detectron + from . import diffusers from . import tensorflow from . import statsmodels from . import torchscript @@ -91,6 +92,7 @@ catboost = _LazyLoader("bentoml.catboost", globals(), "bentoml.catboost") detectron = _LazyLoader("bentoml.detectron", globals(), "bentoml.detectron") + diffusers = _LazyLoader("bentoml.diffusers", globals(), "bentoml.diffusers") easyocr = _LazyLoader("bentoml.easyocr", globals(), "bentoml.easyocr") flax = _LazyLoader("bentoml.flax", globals(), "bentoml.flax") fastai = _LazyLoader("bentoml.fastai", globals(), "bentoml.fastai") @@ -166,6 +168,7 @@ # Framework specific modules "catboost", "detectron", + "diffusers", "easyocr", "flax", "fastai", diff --git a/src/bentoml/_internal/frameworks/diffusers.py b/src/bentoml/_internal/frameworks/diffusers.py new file mode 100644 index 00000000000..375e1e08dad --- /dev/null +++ b/src/bentoml/_internal/frameworks/diffusers.py @@ -0,0 +1,468 @@ +from __future__ import annotations + +import os +import shutil +import typing as t +import logging +from typing import TYPE_CHECKING + +import attr + +import bentoml +from bentoml import Tag +from bentoml.models import ModelContext +from bentoml.exceptions import NotFound +from bentoml.exceptions import BentoMLException +from bentoml.exceptions import MissingDependencyException + +from ..models.model import PartialKwargsModelOptions + +if TYPE_CHECKING: + from types import ModuleType + + from bentoml.types import ModelSignature + from bentoml.types import ModelSignatureDict + + +try: + import torch + import diffusers + from diffusers.utils.import_utils import is_torch_version + from diffusers.utils.import_utils import is_xformers_available + from diffusers.utils.import_utils import is_accelerate_available +except ImportError: # pragma: no cover + raise MissingDependencyException( + "'diffusers' is required in order to use module 'bentoml.diffusers', install diffusers with 'pip install --upgrade diffusers transformers accelerate'. For more information, refer to https://github.com/huggingface/diffusers", + ) + + +MODULE_NAME = "bentoml.diffusers" +DIFFUSION_MODEL_FOLDER = "diffusion_model" +DIFFUSION_MODEL_CONFIG_FILE = "model_index.json" +API_VERSION = "v1" + +logger = logging.getLogger(__name__) + + +@attr.define +class DiffusersOptions(PartialKwargsModelOptions): + """Options for the diffusers model.""" + + pipeline_class: type[diffusers.pipelines.DiffusionPipeline] | None = None + scheduler_class: type[diffusers.SchedulerMixin] | None = None + torch_dtype: str | torch.dtype | None = None + custom_pipeline: str | None = None + enable_xformers: bool | None = None + + +def get(tag_like: str | Tag) -> bentoml.Model: + """ + Get the BentoML model with the given tag. + + Args: + tag_like: The tag of the model to retrieve from the model store. + + Returns: + :obj:`~bentoml.Model`: A BentoML :obj:`~bentoml.Model` with the matching tag. + + Example: + + .. code-block:: python + + import bentoml + # target model must be from the BentoML model store + model = bentoml.diffusers.get("my_stable_diffusion_model") + """ + model = bentoml.models.get(tag_like) + if model.info.module not in (MODULE_NAME, __name__): + raise NotFound( + f"Model {model.tag} was saved with module {model.info.module}, not loading with {MODULE_NAME}." + ) + return model + + +def load_model( + bento_model: str | Tag | bentoml.Model, + pipeline_class: type[ + diffusers.pipelines.DiffusionPipeline + ] = diffusers.StableDiffusionPipeline, + device_map: str | dict[str, int | str | torch.device] | None = None, + custom_pipeline: str | None = None, + scheduler_class: type[diffusers.SchedulerMixin] | None = None, + torch_dtype: str | torch.dtype | None = None, + low_cpu_mem_usage: bool | None = None, + enable_xformers: bool = False, +) -> diffusers.DiffusionPipeline: + """ + Load a Diffusion model and convert it to diffusers `Pipeline `_ + with the given tag from the local BentoML model store. + + Args: + bento_model: + Either the tag of the model to get from the store, or a BentoML + ``~bentoml.Model`` instance to load the model from. + pipeline_class (:code:`type[diffusers.DiffusionPipeline]`, `optional`): + DiffusionPipeline Class use to load the saved diffusion model, default to + ``diffusers.StableDiffusionPipeline``. For more pipeline types, refer to + `Pipeline Overview `_ + device_map (:code:`None | str | Dict[str, Union[int, str, torch.device]]`, `optional`): + A map that specifies where each submodule should go. For more information, refer to + `device_map `_ + custom_pipeline (:code:`None | str`, `optional`): + An identifier of custom pipeline hosted on github. For a list of community + maintained custom piplines, refer to https://github.com/huggingface/diffusers/tree/main/examples/community + scheduler_class (:code:`type[diffusers.SchedulerMixin]`, `optional`): + Scheduler Class to be used by DiffusionPipeline + torch_dtype (:code:`str | torch.dtype`, `optional`): + Override the default `torch.dtype` and load the model under this dtype. + low_cpu_mem_usage (:code:`bool`, `optional`): + Speed up model loading by not initializing the weights and only loading the + pre-trained weights. defaults to `True` if torch version >= 1.9.0 else `False` + enable_xformers (:code:`bool`, `optional`): + Use xformers optimization if it's available. For more info, refer to + https://github.com/facebookresearch/xformers + + Returns: + The Diffusion model loaded as diffusers pipeline from the BentoML model store. + + Example: + + .. code-block:: python + + import bentoml + pipeline = bentoml.diffusers.load_model('my_diffusers_model:latest') + pipeline(prompt) + """ # noqa + if not isinstance(bento_model, bentoml.Model): + bento_model = get(bento_model) + + if bento_model.info.module not in (MODULE_NAME, __name__): + raise NotFound( + f"Model {bento_model.tag} was saved with module {bento_model.info.module}, not loading with {MODULE_NAME}." + ) + + if pipeline_class is None: + pipeline_class = diffusers.StableDiffusionPipeline + + diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) + + if ( + device_map is None + and is_torch_version(">=", "1.9.0") + and is_accelerate_available() + ): + device_map = "auto" + + if low_cpu_mem_usage is None: + if is_torch_version(">=", "1.9.0") and is_accelerate_available(): + low_cpu_mem_usage = True + else: + low_cpu_mem_usage = False + + pipeline: diffusers.DiffusionPipeline = pipeline_class.from_pretrained( + diffusion_model_dir, + torch_dtype=torch_dtype, + low_cpu_mem_usage=low_cpu_mem_usage, + device_map=device_map, + custom_pipeline=custom_pipeline, + ) + + if scheduler_class: + scheduler: diffusers.SchedulerMixin = scheduler_class.from_config( + pipeline.scheduler.config + ) + pipeline.scheduler = scheduler + + if enable_xformers: + pipeline.enable_xformers_memory_efficient_attention() + + return pipeline + + +def import_model( + name: str, + model_name_or_path: str | os.PathLike, + *, + proxies: dict[str, str] | None = None, + revision: str = "main", + signatures: dict[str, ModelSignatureDict | ModelSignature] | None = None, + labels: dict[str, str] | None = None, + custom_objects: dict[str, t.Any] | None = None, + external_modules: t.List[ModuleType] | None = None, + metadata: dict[str, t.Any] | None = None, + # ... +) -> bentoml.Model: + """ + Import Diffusion model from a artifact URI to the BentoML model store. + + Args: + name: + The name to give to the model in the BentoML store. This must be a valid + :obj:`~bentoml.Tag` name. + model_name_or_path: + Can be either: + - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on + https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like + `CompVis/ldm-text2im-large-256`. + - A path to a *directory* containing pipeline weights saved using + [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`. + proxies (`Dict[str, str]`, *optional*): + A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', + 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. + revision (`str`, *optional*, defaults to `"main"`): + The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a + git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any + identifier allowed by git. + signatures: + Signatures of predict methods to be used. If not provided, the signatures + default to {"__call__": {"batchable": False}}. See + :obj:`~bentoml.types.ModelSignature` for more details. + labels: + A default set of management labels to be associated with the model. For + example: ``{"training-set": "data-v1"}``. + custom_objects: + Custom objects to be saved with the model. An example is + ``{"my-normalizer": normalizer}``. Custom objects are serialized with + cloudpickle. + metadata: + Metadata to be associated with the model. An example is ``{"param_a": .2}``. + + Metadata is intended for display in a model management UI and therefore all + values in metadata dictionary must be a primitive Python type, such as + ``str`` or ``int``. + + Returns: + A :obj:`~bentoml.Model` instance referencing a saved model in the local BentoML + model store. + + Example: + + .. code-block:: python + + import bentoml + + bentoml.diffusers.import_model( + 'my_sd15_model', + "runwayml/stable-diffusion-v1-5", + signatures={ + "__call__": {"batchable": False}, + } + ) + """ + context = ModelContext( + framework_name="diffusers", + framework_versions={"diffusers": diffusers.__version__}, + ) + + if signatures is None: + signatures = { + "__call__": {"batchable": False}, + } + logger.info( + 'Using the default model signature for diffusers (%s) for model "%s".', + signatures, + name, + ) + + with bentoml.models.create( + name, + module=MODULE_NAME, + api_version=API_VERSION, + signatures=signatures, + labels=labels, + options=None, + custom_objects=custom_objects, + external_modules=external_modules, + metadata=metadata, + context=context, + ) as bento_model: + + diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) + ignore = shutil.ignore_patterns(".git") + + if os.path.isdir(model_name_or_path): + src_dir = model_name_or_path + + else: + + try: + from huggingface_hub import snapshot_download + except ImportError: # pragma: no cover + raise MissingDependencyException( + "'huggingface_hub' is required in order to download pretrained diffusion models, install with 'pip install huggingface-hub'. For more information, refer to https://huggingface.co/docs/huggingface_hub/quick-start", + ) + + src_dir = snapshot_download( + model_name_or_path, + proxies=proxies, + revision=revision, + ) + + model_config_file = os.path.join(src_dir, DIFFUSION_MODEL_CONFIG_FILE) + if not os.path.exists(model_config_file): + raise BentoMLException(f'artifact "{src_dir}" is not a Diffusion model') + + shutil.copytree(src_dir, diffusion_model_dir, symlinks=False, ignore=ignore) + + return bento_model + + +def save_model( + name: str, + pipeline: diffusers.DiffusionPipeline, + *, + signatures: dict[str, ModelSignatureDict | ModelSignature] | None = None, + labels: dict[str, str] | None = None, + custom_objects: dict[str, t.Any] | None = None, + external_modules: t.List[ModuleType] | None = None, + metadata: dict[str, t.Any] | None = None, +) -> bentoml.Model: + """ + Save a DiffusionPipeline to the BentoML model store. + + Args: + name: + The name to give to the model in the BentoML store. This must be a valid + :obj:`~bentoml.Tag` name. + pipeline: + Instance of the Diffusers pipeline to be saved + signatures: + Signatures of predict methods to be used. If not provided, the signatures + default to {"__call__": {"batchable": False}}. See + :obj:`~bentoml.types.ModelSignature` for more details. + labels: + A default set of management labels to be associated with the model. For + example: ``{"training-set": "data-v1"}``. + custom_objects: + Custom objects to be saved with the model. An example is + ``{"my-normalizer": normalizer}``. Custom objects are serialized with + cloudpickle. + metadata: + Metadata to be associated with the model. An example is ``{"param_a": .2}``. + + Metadata is intended for display in a model management UI and therefore all + values in metadata dictionary must be a primitive Python type, such as + ``str`` or ``int``. + + Returns: + A :obj:`~bentoml.Model` instance referencing a saved model in the local BentoML + model store. + + """ + + if not isinstance(pipeline, diffusers.DiffusionPipeline): + raise BentoMLException( + "'pipeline' must be an instance of 'diffusers.DiffusionPipeline'. " + ) + + context = ModelContext( + framework_name="diffusers", + framework_versions={"diffusers": diffusers.__version__}, + ) + + if signatures is None: + signatures = { + "__call__": {"batchable": False}, + } + logger.info( + 'Using the default model signature for diffusers (%s) for model "%s".', + signatures, + name, + ) + + with bentoml.models.create( + name, + module=MODULE_NAME, + api_version=API_VERSION, + signatures=signatures, + labels=labels, + options=None, + custom_objects=custom_objects, + external_modules=external_modules, + metadata=metadata, + context=context, + ) as bento_model: + + diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) + pipeline.save_pretrained(diffusion_model_dir) + + return bento_model + + +def get_runnable(bento_model: bentoml.Model) -> t.Type[bentoml.Runnable]: + """ + Private API: use :obj:`~bentoml.Model.to_runnable` instead. + """ + + partial_kwargs: t.Dict[str, t.Any] = bento_model.info.options.partial_kwargs # type: ignore + pipeline_class: type[diffusers.DiffusionPipeline] = ( + bento_model.info.options.pipeline_class or diffusers.StableDiffusionPipeline + ) + scheduler_class: type[ + diffusers.SchedulerMixin + ] | None = bento_model.info.options.scheduler_class + custom_pipeline: str | None = bento_model.info.options.custom_pipeline + _enable_xformers: str | None = bento_model.info.options.enable_xformers + _torch_dtype: str | torch.dtype | None = bento_model.info.options.torch_dtype + + class DiffusersRunnable(bentoml.Runnable): + SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu") + SUPPORTS_CPU_MULTI_THREADING = True + + def __init__(self): + super().__init__() + + if torch.cuda.is_available() and _torch_dtype is None: + torch_dtype = torch.float16 + else: + torch_dtype = _torch_dtype + + enable_xformers: bool = False + if torch.cuda.is_available() and _enable_xformers is None: + if is_xformers_available(): + enable_xformers: bool = True + + self.pipeline: diffusers.DiffusionPipeline = load_model( + bento_model, + pipeline_class=pipeline_class, + scheduler_class=scheduler_class, + torch_dtype=torch_dtype, + custom_pipeline=custom_pipeline, + enable_xformers=enable_xformers, + ) + + if torch.cuda.is_available(): + self.pipeline.to("cuda") + + def make_run_method( + method_name: str, partial_kwargs: dict[str, t.Any] | None + ) -> t.Callable[..., t.Any]: + def _run_method( + runnable_self: DiffusersRunnable, + *args: t.Any, + **kwargs: t.Any, + ) -> t.Any: + + if method_partial_kwargs is not None: + kwargs = dict(method_partial_kwargs, **kwargs) + + raw_method = getattr(runnable_self.pipeline, method_name) + if "return_dict" not in kwargs: + kwargs["return_dict"] = False + res = raw_method(*args, **kwargs) + return res + + return _run_method + + for method_name, options in bento_model.info.signatures.items(): + method_partial_kwargs = partial_kwargs.get(method_name) + DiffusersRunnable.add_method( + make_run_method(method_name, method_partial_kwargs), + name=method_name, + batchable=options.batchable, + batch_dim=options.batch_dim, + input_spec=options.input_spec, + output_spec=options.output_spec, + ) + + return DiffusersRunnable diff --git a/src/bentoml/diffusers.py b/src/bentoml/diffusers.py new file mode 100644 index 00000000000..8180f4e7c2b --- /dev/null +++ b/src/bentoml/diffusers.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +from ._internal.frameworks.diffusers import get +from ._internal.frameworks.diffusers import load_model +from ._internal.frameworks.diffusers import save_model +from ._internal.frameworks.diffusers import get_runnable +from ._internal.frameworks.diffusers import import_model +from ._internal.frameworks.diffusers import DiffusersOptions as ModelOptions + +__all__ = [ + "get", + "import_model", + "save_model", + "load_model", + "get_runnable", + "ModelOptions", +] diff --git a/tests/integration/frameworks/models/diffusers.py b/tests/integration/frameworks/models/diffusers.py new file mode 100644 index 00000000000..f9c2d45d4f2 --- /dev/null +++ b/tests/integration/frameworks/models/diffusers.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +import numpy as np +import diffusers + +import bentoml + +from . import FrameworkTestModel +from . import FrameworkTestModelInput as Input +from . import FrameworkTestModelConfiguration as Config + +framework = bentoml.diffusers + +backward_compatible = False + + +def check_output(out): + # output is a tuple of (images, _) + arr = out[0][0] + return arr.shape == (256, 256, 3) + + +pipeline = diffusers.StableDiffusionPipeline.from_pretrained( + "hf-internal-testing/tiny-stable-diffusion-torch" +) + +diffusers_model = FrameworkTestModel( + name="diffusers", + model=pipeline, + configurations=[ + Config( + test_inputs={ + "__call__": [ + Input( + input_args=[], + input_kwargs={ + "prompt": "a bento box", + "width": 256, + "height": 256, + "num_inference_steps": 3, + "output_type": np, + }, + expected=check_output, + ) + ], + }, + ), + ], +) + + +models: list[FrameworkTestModel] = [diffusers_model]