From 4e28bea1bca3033da9c82047daa717e44d170a97 Mon Sep 17 00:00:00 2001 From: Sayak Paul Date: Tue, 16 Apr 2024 22:15:55 +0530 Subject: [PATCH] [Core] `is_cosxl_edit` arg in SDXL ip2p. (#7650) * is_cosxl_edit arg in SDXL ip2p. * Empty-Commit Co-authored-by: Yiyi Xu * doc * remove redundant logic. * reflect drhuv's comments. --------- Co-authored-by: Yiyi Xu Co-authored-by: Dhruv Nair --- .../pipeline_stable_diffusion_xl_instruct_pix2pix.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py index 31dc5acc8995..f40333db35ae 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py @@ -169,6 +169,8 @@ class StableDiffusionXLInstructPix2PixPipeline( Whether to use the [invisible_watermark library](https://github.com/ShieldMnt/invisible-watermark/) to watermark output images. If not defined, it will default to True if the package is installed, otherwise no watermarker will be used. + is_cosxl_edit (`bool`, *optional*): + When set the image latents are scaled. """ model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae" @@ -185,6 +187,7 @@ def __init__( scheduler: KarrasDiffusionSchedulers, force_zeros_for_empty_prompt: bool = True, add_watermarker: Optional[bool] = None, + is_cosxl_edit: Optional[bool] = False, ): super().__init__() @@ -201,6 +204,7 @@ def __init__( self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.default_sample_size = self.unet.config.sample_size + self.is_cosxl_edit = is_cosxl_edit add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available() @@ -551,6 +555,9 @@ def prepare_image_latents( if image_latents.dtype != self.vae.dtype: image_latents = image_latents.to(dtype=self.vae.dtype) + if self.is_cosxl_edit: + image_latents = image_latents * self.vae.config.scaling_factor + return image_latents # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline._get_add_time_ids