From 4420bf6ff8b7a4d07294fa2d20a83364d7e50791 Mon Sep 17 00:00:00 2001 From: YiYi Xu Date: Mon, 23 Oct 2023 09:24:51 -1000 Subject: [PATCH] fix a few issues in controlnet inpaint pipelines (#5470) * add * Update docs/source/en/api/pipelines/controlnet_sdxl.md Co-authored-by: Patrick von Platen --------- Co-authored-by: yiyixuxu Co-authored-by: Patrick von Platen --- .../en/api/pipelines/controlnet_sdxl.md | 9 ++++++ .../controlnet/pipeline_controlnet_inpaint.py | 16 ++++------ .../pipeline_controlnet_inpaint_sd_xl.py | 32 +++++++++---------- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/docs/source/en/api/pipelines/controlnet_sdxl.md b/docs/source/en/api/pipelines/controlnet_sdxl.md index e517a56046ed5..bea83f2603a4a 100644 --- a/docs/source/en/api/pipelines/controlnet_sdxl.md +++ b/docs/source/en/api/pipelines/controlnet_sdxl.md @@ -41,6 +41,15 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers) - all - __call__ +## StableDiffusionXLControlNetImg2ImgPipeline +[[autodoc]] StableDiffusionXLControlNetImg2ImgPipeline + - all + - __call__ + +## StableDiffusionXLControlNetInpaintPipeline +[[autodoc]] StableDiffusionXLControlNetInpaintPipeline + - all + - __call__ ## StableDiffusionPipelineOutput [[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput \ No newline at end of file diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 3c0d1943a32f8..d03a8eea7f6df 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -68,18 +68,16 @@ >>> mask_image = mask_image.resize((512, 512)) - >>> def make_inpaint_condition(image, image_mask): - ... image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 - ... image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0 - - ... assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size" - ... image[image_mask > 0.5] = -1.0 # set as masked pixel - ... image = np.expand_dims(image, 0).transpose(0, 3, 1, 2) - ... image = torch.from_numpy(image) + >>> def make_canny_condition(image): + ... image = np.array(image) + ... image = cv2.Canny(image, 100, 200) + ... image = image[:, :, None] + ... image = np.concatenate([image, image, image], axis=2) + ... image = Image.fromarray(image) ... return image - >>> control_image = make_inpaint_condition(init_image, mask_image) + >>> control_image = make_canny_condition(init_image) >>> controlnet = ControlNetModel.from_pretrained( ... "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16 diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 2b66bfd1c72ca..6c5d9a3993d40 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -75,27 +75,24 @@ >>> mask_image = mask_image.resize((1024, 1024)) - >>> def make_inpaint_condition(image, image_mask): - ... image = np.array(image.convert("RGB")).astype(np.float32) / 255.0 - ... image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0 - - ... assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size" - ... image[image_mask < 0.5] = 0 # set as masked pixel - ... image = np.expand_dims(image, 0).transpose(0, 3, 1, 2) - ... image = torch.from_numpy(image) + >>> def make_canny_condition(image): + ... image = np.array(image) + ... image = cv2.Canny(image, 100, 200) + ... image = image[:, :, None] + ... image = np.concatenate([image, image, image], axis=2) + ... image = Image.fromarray(image) ... return image - >>> control_image = make_inpaint_condition(init_image, mask_image) + >>> control_image = make_canny_condition(init_image) >>> controlnet = ControlNetModel.from_pretrained( - ... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float32 + ... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16 ... ) >>> pipe = StableDiffusionXLControlNetInpaintPipeline.from_pretrained( - ... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float32 + ... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16 ... ) - >>> pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) >>> pipe.enable_model_cpu_offload() >>> # generate image @@ -777,13 +774,14 @@ def prepare_latents( "However, either the image or the noise timestep has not been provided." ) - if image.shape[1] == 4: - image_latents = image.to(device=device, dtype=dtype) - elif return_image_latents or (latents is None and not is_strength_max): + if return_image_latents or (latents is None and not is_strength_max): image = image.to(device=device, dtype=dtype) - image_latents = self._encode_vae_image(image=image, generator=generator) - image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1) + if image.shape[1] == 4: + image_latents = image + else: + image_latents = self._encode_vae_image(image=image, generator=generator) + image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1) if latents is None and add_noise: noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)