huggingface · sayakpaul · Aug 29, 2023 · Aug 29, 2023 · Aug 30, 2023 · Oct 15, 2023
diff --git a/src/diffusers/configuration_utils.py b/src/diffusers/configuration_utils.py
@@ -157,7 +157,12 @@ def save_config(self, save_directory: Union[str, os.PathLike], push_to_hub: bool
         os.makedirs(save_directory, exist_ok=True)
 
         # If we save using the predefined names, we can load using `from_config`
-        output_config_file = os.path.join(save_directory, self.config_name)
+        filename = kwargs.pop("filename", None)
+        if filename is not None:
+            config_name = filename
+        else:
+            config_name = self.config_name
+        output_config_file = os.path.join(save_directory, config_name)
 
         self.to_json_file(output_config_file)
         logger.info(f"Configuration saved in {output_config_file}")

diff --git a/src/diffusers/loaders.py b/src/diffusers/loaders.py
@@ -1168,6 +1168,8 @@ class LoraLoaderMixin:
     """
     text_encoder_name = TEXT_ENCODER_NAME
     unet_name = UNET_NAME
+    loras_loaded = 0
+    lora_info = {}
     num_fused_loras = 0
 
     def load_lora_weights(
@@ -1224,6 +1226,11 @@ def load_lora_weights(
             adapter_name=adapter_name,
             _pipeline=self,
         )
+        if not USE_PEFT_BACKEND:
+            self.loras_loaded += 1
+            current_lora_info = {"pretrained_model_name_or_path_or_dict": pretrained_model_name_or_path_or_dict}
+            current_lora_info.update(dict(kwargs.items()))
+            self.lora_info.update({f"lora_{self.loras_loaded}": current_lora_info})
 
     @classmethod
     def lora_state_dict(
@@ -2256,6 +2263,15 @@ def unload_lora_weights(self):
         # Safe to call the following regardless of LoRA.
         self._remove_text_encoder_monkey_patch()
 
+        # Housekeeping.
+        # TODO: handle for PEFT backend because adapters can be combined, offloaded, etc.
+        # TODO: handle `fuse_lora()` and `unfuse_lora()` cases.
+        if not USE_PEFT_BACKEND:
+            self.loras_loaded -= 1
+            keys = list(self.lora_info.keys())
+            keys.sort()
+            self.lora_info.pop(keys[-1])
+
     def fuse_lora(
         self,
         fuse_unet: bool = True,
@@ -2832,6 +2848,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
             tokenizer=tokenizer,
             original_config_file=original_config_file,
             config_files=config_files,
+            local_files_only=local_files_only,
         )
 
         if torch_dtype is not None:

diff --git a/src/diffusers/pipelines/pipeline_utils.py b/src/diffusers/pipelines/pipeline_utils.py
@@ -22,6 +22,7 @@
 import sys
 import warnings
 from dataclasses import dataclass
+from functools import partial
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union
 
@@ -56,7 +57,9 @@
     logging,
     numpy_to_pil,
 )
+from ..utils.constants import WORKFLOW_NAME
 from ..utils.torch_utils import is_compiled_module
+from ..workflow_utils import _NON_CALL_ARGUMENTS
 
 
 if is_transformers_available():
@@ -66,6 +69,7 @@
     from transformers.utils import SAFE_WEIGHTS_NAME as TRANSFORMERS_SAFE_WEIGHTS_NAME
     from transformers.utils import WEIGHTS_NAME as TRANSFORMERS_WEIGHTS_NAME
 
+
 from ..utils import FLAX_WEIGHTS_NAME, ONNX_EXTERNAL_WEIGHTS_NAME, ONNX_WEIGHTS_NAME, PushToHubMixin
 
 
@@ -1977,3 +1981,52 @@ def set_attention_slice(self, slice_size: Optional[int]):
 
         for module in modules:
             module.set_attention_slice(slice_size)
+
+    def load_workflow(self, workflow_id_or_path: Union[str, dict], filename: Optional[str] = None):
+        r"""Loads a workflow from the Hub or from a local path. Also patches the pipeline call arguments with values from the
+        workflow.
+
+        Args:
+            workflow_id_or_path (`str` or `dict`):
+                Can be either:
+
+                    - A string, the workflow id (for example `sayakpaul/sdxl-workflow`) of a workflow hosted on the
+                      Hub.
+                    - A path to a directory (for example `./my_workflow_directory`) containing the workflow file with
+                      [`Workflow.save_workflow`] or [`Workflow.push_to_hub`].
+                    - A Python dictionary.
+
+            filename (`str`, *optional*):
+                Optional name of the workflow file to load. Especially useful when working with multiple workflow
+                files.
+        """
+        filename = filename or WORKFLOW_NAME
+
+        # Load workflow.
+        if not isinstance(workflow_id_or_path, dict):
+            if os.path.isdir(workflow_id_or_path):
+                workflow_filepath = os.path.join(workflow_id_or_path, filename)
+            elif os.path.isfile(workflow_id_or_path):
+                workflow_filepath = workflow_id_or_path
+            else:
+                workflow_filepath = hf_hub_download(repo_id=workflow_id_or_path, filename=filename)
+            workflow = self._dict_from_json_file(workflow_filepath)
+        else:
+            workflow = workflow_id_or_path
+
+        # Handle generator.
+        seed = workflow.pop("seed", None)
+        if seed is not None:
+            generator = torch.manual_seed(seed)
+        else:
+            generator = None
+        workflow.update({"generator": generator})
+
+        # Handle non-call arguments.
+        # Note: Instead of popping the non-call arguments off, it's better to keep them in
+        # the workflow object should it be reused.
+        final_call_args = {k: v for k, v in workflow.items() if k not in _NON_CALL_ARGUMENTS}
+
+        # Handle the call here.
+        partial_call = partial(self.__call__, **final_call_args)
+        setattr(self.__class__, "__call__", partial_call)
diff --git a/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py b/src/diffusers/pipelines/stable_diffusion/convert_from_ckpt.py
@@ -787,7 +787,12 @@ def _copy_layers(hf_layers, pt_layers):
 def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
     if text_encoder is None:
         config_name = "openai/clip-vit-large-patch14"
-        config = CLIPTextConfig.from_pretrained(config_name, local_files_only=local_files_only)
+        try:
+            config = CLIPTextConfig.from_pretrained(config_name, local_files_only=local_files_only)
+        except Exception:
+            raise ValueError(
+                f"With local_files_only set to {local_files_only}, you must first locally save the configuration in the following path: 'openai/clip-vit-large-patch14'."
+            )
 
         ctx = init_empty_weights if is_accelerate_available() else nullcontext
         with ctx():
@@ -922,7 +927,12 @@ def convert_open_clip_checkpoint(
     # text_model = CLIPTextModelWithProjection.from_pretrained(
     #    "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
     # )
-    config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs, local_files_only=local_files_only)
+    try:
+        config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs, local_files_only=local_files_only)
+    except Exception:
+        raise ValueError(
+            f"With local_files_only set to {local_files_only}, you must first locally save the configuration in the following path: '{config_name}'."
+        )
 
     ctx = init_empty_weights if is_accelerate_available() else nullcontext
     with ctx():
@@ -1211,7 +1221,6 @@ def download_from_original_stable_diffusion_ckpt(
                 - `xl_refiner`: Config file for Stable Diffusion XL Refiner
         return: A StableDiffusionPipeline object representing the passed-in `.ckpt`/`.safetensors` file.
     """
-
     # import pipelines here to avoid circular import error when using from_single_file method
     from diffusers import (
         LDMTextToImagePipeline,
@@ -1464,11 +1473,19 @@ def download_from_original_stable_diffusion_ckpt(
         config_name = "stabilityai/stable-diffusion-2"
         config_kwargs = {"subfolder": "text_encoder"}
 
-        text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
-        tokenizer = CLIPTokenizer.from_pretrained(
-            "stabilityai/stable-diffusion-2", subfolder="tokenizer", local_files_only=local_files_only
+        text_model = convert_open_clip_checkpoint(
+            checkpoint, config_name, **config_kwargs, local_files_only=local_files_only
         )
 
+        try:
+            tokenizer = CLIPTokenizer.from_pretrained(
+                "stabilityai/stable-diffusion-2", subfolder="tokenizer", local_files_only=local_files_only
+            )
+        except Exception:
+            raise ValueError(
+                f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'stabilityai/stable-diffusion-2'."
+            )
+
         if stable_unclip is None:
             if controlnet:
                 pipe = pipeline_class(
@@ -1545,10 +1562,14 @@ def download_from_original_stable_diffusion_ckpt(
                     prior = PriorTransformer.from_pretrained(
                         karlo_model, subfolder="prior", local_files_only=local_files_only
                     )
-
-                    prior_tokenizer = CLIPTokenizer.from_pretrained(
-                        "openai/clip-vit-large-patch14", local_files_only=local_files_only
-                    )
+                    try:
+                        prior_tokenizer = CLIPTokenizer.from_pretrained(
+                            "openai/clip-vit-large-patch14", local_files_only=local_files_only
+                        )
+                    except Exception:
+                        raise ValueError(
+                            f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
+                        )
                     prior_text_model = CLIPTextModelWithProjection.from_pretrained(
                         "openai/clip-vit-large-patch14", local_files_only=local_files_only
                     )
@@ -1581,7 +1602,14 @@ def download_from_original_stable_diffusion_ckpt(
                 raise NotImplementedError(f"unknown `stable_unclip` type: {stable_unclip}")
     elif model_type == "PaintByExample":
         vision_model = convert_paint_by_example_checkpoint(checkpoint)
-        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
+        try:
+            tokenizer = CLIPTokenizer.from_pretrained(
+                "openai/clip-vit-large-patch14", local_files_only=local_files_only
+            )
+        except Exception:
+            raise ValueError(
+                f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
+            )
         feature_extractor = AutoFeatureExtractor.from_pretrained(
             "CompVis/stable-diffusion-safety-checker", local_files_only=local_files_only
         )
@@ -1597,11 +1625,16 @@ def download_from_original_stable_diffusion_ckpt(
         text_model = convert_ldm_clip_checkpoint(
             checkpoint, local_files_only=local_files_only, text_encoder=text_encoder
         )
-        tokenizer = (
-            CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
-            if tokenizer is None
-            else tokenizer
-        )
+        try:
+            tokenizer = (
+                CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
+                if tokenizer is None
+                else tokenizer
+            )
+        except Exception:
+            raise ValueError(
+                f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
+            )
 
         if load_safety_checker:
             safety_checker = StableDiffusionSafetyChecker.from_pretrained(
@@ -1637,18 +1670,33 @@ def download_from_original_stable_diffusion_ckpt(
             )
     elif model_type in ["SDXL", "SDXL-Refiner"]:
         if model_type == "SDXL":
-            tokenizer = CLIPTokenizer.from_pretrained(
-                "openai/clip-vit-large-patch14", local_files_only=local_files_only
-            )
+            try:
+                tokenizer = CLIPTokenizer.from_pretrained(
+                    "openai/clip-vit-large-patch14", local_files_only=local_files_only
+                )
+            except Exception:
+                raise ValueError(
+                    f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'openai/clip-vit-large-patch14'."
+                )
             text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
-            tokenizer_2 = CLIPTokenizer.from_pretrained(
-                "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
-            )
+            try:
+                tokenizer_2 = CLIPTokenizer.from_pretrained(
+                    "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
+                )
+            except Exception:
+                raise ValueError(
+                    f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
+                )
 
             config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
             config_kwargs = {"projection_dim": 1280}
             text_encoder_2 = convert_open_clip_checkpoint(
-                checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
+                checkpoint,
+                config_name,
+                prefix="conditioner.embedders.1.model.",
+                has_projection=True,
+                local_files_only=local_files_only,
+                **config_kwargs,
             )
 
             if is_accelerate_available():  # SBM Now move model to cpu.
@@ -1682,14 +1730,24 @@ def download_from_original_stable_diffusion_ckpt(
         else:
             tokenizer = None
             text_encoder = None
-            tokenizer_2 = CLIPTokenizer.from_pretrained(
-                "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
-            )
+            try:
+                tokenizer_2 = CLIPTokenizer.from_pretrained(
+                    "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!", local_files_only=local_files_only
+                )
+            except Exception:
+                raise ValueError(
+                    f"With local_files_only set to {local_files_only}, you must first locally save the tokenizer in the following path: 'laion/CLIP-ViT-bigG-14-laion2B-39B-b160k' with `pad_token` set to '!'."
+                )
 
             config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
             config_kwargs = {"projection_dim": 1280}
             text_encoder_2 = convert_open_clip_checkpoint(
-                checkpoint, config_name, prefix="conditioner.embedders.0.model.", has_projection=True, **config_kwargs
+                checkpoint,
+                config_name,
+                prefix="conditioner.embedders.0.model.",
+                has_projection=True,
+                local_files_only=local_files_only,
+                **config_kwargs,
             )
 
             if is_accelerate_available():  # SBM Now move model to cpu.

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_output.py b/src/diffusers/pipelines/stable_diffusion/pipeline_output.py
@@ -19,10 +19,13 @@ class StableDiffusionPipelineOutput(BaseOutput):
         nsfw_content_detected (`List[bool]`)
             List indicating whether the corresponding generated image contains "not-safe-for-work" (nsfw) content or
             `None` if safety checking could not be performed.
+        workflow (`dict`):
+            Dictionary containing pipeline component configurations and call arguments
     """
 
     images: Union[List[PIL.Image.Image], np.ndarray]
     nsfw_content_detected: Optional[List[bool]]
+    workflow: dict
 
 
 if is_flax_available():

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -34,6 +34,7 @@
     unscale_lora_layers,
 )
 from ...utils.torch_utils import randn_tensor
+from ...workflow_utils import populate_workflow_from_pipeline
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import StableDiffusionPipelineOutput
 from .safety_checker import StableDiffusionSafetyChecker
@@ -592,6 +593,7 @@ def __call__(
         callback_steps: int = 1,
         cross_attention_kwargs: Optional[Dict[str, Any]] = None,
         guidance_rescale: float = 0.0,
+        return_workflow: bool = False,
         clip_skip: Optional[int] = None,
     ):
         r"""
@@ -649,6 +651,8 @@ def __call__(
                 Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
                 Flawed](https://arxiv.org/pdf/2305.08891.pdf). Guidance rescale factor should fix overexposure when
                 using zero terminal SNR.
+            return_workflow(`bool`, *optional*, defaults to `False`):
+                Whether to return pipeline component configurations and call arguments.
             clip_skip (`int`, *optional*):
                 Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
                 the output of the pre-final layer will be used for computing the prompt embeddings.
@@ -779,7 +783,21 @@ def __call__(
         # Offload all models
         self.maybe_free_model_hooks()
 
+        workflow = None
+        if return_workflow:
+            signature = inspect.signature(self.__call__)
+            argument_names = [param.name for param in signature.parameters.values()]
+            call_arg_values = inspect.getargvalues(inspect.currentframe()).locals
+            workflow = populate_workflow_from_pipeline(
+                argument_names, call_arg_values, self.lora_info, self.config._name_or_path
+            )
+
         if not return_dict:
-            return (image, has_nsfw_concept)
+            outputs = (image, has_nsfw_concept)
+
+            if workflow is not None:
+                outputs += (workflow,)
+
+            return outputs
 
-        return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
+        return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept, workflow=workflow)
diff --git a/src/diffusers/utils/constants.py b/src/diffusers/utils/constants.py
@@ -31,6 +31,7 @@
 ONNX_WEIGHTS_NAME = "model.onnx"
 SAFETENSORS_WEIGHTS_NAME = "diffusion_pytorch_model.safetensors"
 ONNX_EXTERNAL_WEIGHTS_NAME = "weights.pb"
+WORKFLOW_NAME = "diffusion_workflow.json"
 HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", "https://huggingface.co")
 DIFFUSERS_CACHE = default_cache_path
 DIFFUSERS_DYNAMIC_MODULE_NAME = "diffusers_modules"