Merge branch 'release_candidate'

anapnoe · Mar 2, 2024 · bef51ae · bef51ae
2 parents cf2772f + 1398485
commit bef51ae
Show file tree

Hide file tree

Showing 152 changed files with 5,883 additions and 5,612 deletions.
diff --git a/.eslintrc.js b/.eslintrc.js
@@ -86,8 +86,6 @@ module.exports = {
         // imageviewer.js
         modalPrevImage: "readonly",
         modalNextImage: "readonly",
-        // token-counters.js
-        setupTokenCounters: "readonly",
         // localStorage.js
         localSet: "readonly",
         localGet: "readonly",

diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
@@ -20,6 +20,12 @@ jobs:
           cache-dependency-path: |
             **/requirements*txt
             launch.py
+      - name: Cache models
+        id: cache-models
+        uses: actions/cache@v3
+        with:
+          path: models
+          key: "2023-12-30"
       - name: Install test dependencies
         run: pip install wait-for-it -r requirements-test.txt
         env:
@@ -33,6 +39,8 @@ jobs:
           TORCH_INDEX_URL: https://download.pytorch.org/whl/cpu
           WEBUI_LAUNCH_LIVE_OUTPUT: "1"
           PYTHONUNBUFFERED: "1"
+      - name: Print installed packages
+        run: pip freeze
       - name: Start test server
         run: >
           python -m coverage run
@@ -49,7 +57,7 @@ jobs:
           2>&1 | tee output.txt &
       - name: Run tests
         run: |
-          wait-for-it --service 127.0.0.1:7860 -t 600
+          wait-for-it --service 127.0.0.1:7860 -t 20
           python -m pytest -vv --junitxml=test/results.xml --cov . --cov-report=xml --verify-base-url test
       - name: Kill test server
         if: always()

diff --git a/.gitignore b/.gitignore
@@ -37,3 +37,4 @@ notification.mp3
 /node_modules
 /package-lock.json
 /.coverage*
+/test/test_outputs
diff --git a/CHANGELOG.md b/CHANGELOG.md
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 # Stable Diffusion web UI
-A browser interface based on Gradio library for Stable Diffusion.
+A web interface for Stable Diffusion, implemented using Gradio library.
 
 ![](screenshot.png)
 
@@ -151,11 +151,12 @@ Licenses for borrowed code can be found in `Settings -> Licenses` screen, and al
 
 - Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers
 - k-diffusion - https://github.com/crowsonkb/k-diffusion.git
-- GFPGAN - https://github.com/TencentARC/GFPGAN.git
-- CodeFormer - https://github.com/sczhou/CodeFormer
-- ESRGAN - https://github.com/xinntao/ESRGAN
-- SwinIR - https://github.com/JingyunLiang/SwinIR
-- Swin2SR - https://github.com/mv-lab/swin2sr
+- Spandrel - https://github.com/chaiNNer-org/spandrel implementing
+  - GFPGAN - https://github.com/TencentARC/GFPGAN.git
+  - CodeFormer - https://github.com/sczhou/CodeFormer
+  - ESRGAN - https://github.com/xinntao/ESRGAN
+  - SwinIR - https://github.com/JingyunLiang/SwinIR
+  - Swin2SR - https://github.com/mv-lab/swin2sr
 - LDSR - https://github.com/Hafiidz/latent-diffusion
 - MiDaS - https://github.com/isl-org/MiDaS
 - Ideas for optimizations - https://github.com/basujindal/stable-diffusion

diff --git a/configs/sd_xl_inpaint.yaml b/configs/sd_xl_inpaint.yaml
@@ -0,0 +1,98 @@
+model:
+  target: sgm.models.diffusion.DiffusionEngine
+  params:
+    scale_factor: 0.13025
+    disable_first_stage_autocast: True
+
+    denoiser_config:
+      target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
+      params:
+        num_idx: 1000
+
+        weighting_config:
+          target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting
+        scaling_config:
+          target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
+        discretization_config:
+          target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization
+
+    network_config:
+      target: sgm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        adm_in_channels: 2816
+        num_classes: sequential
+        use_checkpoint: True
+        in_channels: 9
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [4, 2]
+        num_res_blocks: 2
+        channel_mult: [1, 2, 4]
+        num_head_channels: 64
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: [1, 2, 10]  # note: the first is unused (due to attn_res starting at 2) 32, 16, 8 --> 64, 32, 16
+        context_dim: 2048
+        spatial_transformer_attn_type: softmax-xformers
+        legacy: False
+
+    conditioner_config:
+      target: sgm.modules.GeneralConditioner
+      params:
+        emb_models:
+          # crossattn cond
+          - is_trainable: False
+            input_key: txt
+            target: sgm.modules.encoders.modules.FrozenCLIPEmbedder
+            params:
+              layer: hidden
+              layer_idx: 11
+          # crossattn and vector cond
+          - is_trainable: False
+            input_key: txt
+            target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2
+            params:
+              arch: ViT-bigG-14
+              version: laion2b_s39b_b160k
+              freeze: True
+              layer: penultimate
+              always_return_pooled: True
+              legacy: False
+          # vector cond
+          - is_trainable: False
+            input_key: original_size_as_tuple
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: crop_coords_top_left
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+          # vector cond
+          - is_trainable: False
+            input_key: target_size_as_tuple
+            target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND
+            params:
+              outdim: 256  # multiplied by two
+
+    first_stage_config:
+      target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          attn_type: vanilla-xformers
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult: [1, 2, 4, 4]
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
diff --git a/extensions-builtin/Lora/network.py b/extensions-builtin/Lora/network.py
@@ -3,6 +3,9 @@
 from collections import namedtuple
 import enum
 
+import torch.nn as nn
+import torch.nn.functional as F
+
 from modules import sd_models, cache, errors, hashes, shared
 
 NetworkWeights = namedtuple('NetworkWeights', ['network_key', 'sd_key', 'w', 'sd_module'])
@@ -115,6 +118,29 @@ def __init__(self, net: Network, weights: NetworkWeights):
         if hasattr(self.sd_module, 'weight'):
             self.shape = self.sd_module.weight.shape
 
+        self.ops = None
+        self.extra_kwargs = {}
+        if isinstance(self.sd_module, nn.Conv2d):
+            self.ops = F.conv2d
+            self.extra_kwargs = {
+                'stride': self.sd_module.stride,
+                'padding': self.sd_module.padding
+            }
+        elif isinstance(self.sd_module, nn.Linear):
+            self.ops = F.linear
+        elif isinstance(self.sd_module, nn.LayerNorm):
+            self.ops = F.layer_norm
+            self.extra_kwargs = {
+                'normalized_shape': self.sd_module.normalized_shape,
+                'eps': self.sd_module.eps
+            }
+        elif isinstance(self.sd_module, nn.GroupNorm):
+            self.ops = F.group_norm
+            self.extra_kwargs = {
+                'num_groups': self.sd_module.num_groups,
+                'eps': self.sd_module.eps
+            }
+
         self.dim = None
         self.bias = weights.w.get("bias")
         self.alpha = weights.w["alpha"].item() if "alpha" in weights.w else None
@@ -137,7 +163,7 @@ def calc_scale(self):
     def finalize_updown(self, updown, orig_weight, output_shape, ex_bias=None):
         if self.bias is not None:
             updown = updown.reshape(self.bias.shape)
-            updown += self.bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            updown += self.bias.to(orig_weight.device, dtype=updown.dtype)
             updown = updown.reshape(output_shape)
 
         if len(output_shape) == 4:
@@ -155,5 +181,10 @@ def calc_updown(self, target):
         raise NotImplementedError()
 
     def forward(self, x, y):
-        raise NotImplementedError()
+        """A general forward implementation for all modules"""
+        if self.ops is None:
+            raise NotImplementedError()
+        else:
+            updown, ex_bias = self.calc_updown(self.sd_module.weight)
+            return y + self.ops(x, weight=updown, bias=ex_bias, **self.extra_kwargs)
 
diff --git a/extensions-builtin/Lora/network_full.py b/extensions-builtin/Lora/network_full.py
@@ -18,9 +18,9 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
     def calc_updown(self, orig_weight):
         output_shape = self.weight.shape
-        updown = self.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.weight.to(orig_weight.device)
         if self.ex_bias is not None:
-            ex_bias = self.ex_bias.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.ex_bias.to(orig_weight.device)
         else:
             ex_bias = None
 

diff --git a/extensions-builtin/Lora/network_glora.py b/extensions-builtin/Lora/network_glora.py
@@ -22,12 +22,12 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.w2b = weights.w["b2.weight"]
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
-        updown = ((w2b @ w1b) + ((orig_weight @ w2a) @ w1a))
+        updown = ((w2b @ w1b) + ((orig_weight.to(dtype = w1a.dtype) @ w2a) @ w1a))
 
         return self.finalize_updown(updown, orig_weight, output_shape)
diff --git a/extensions-builtin/Lora/network_hada.py b/extensions-builtin/Lora/network_hada.py
@@ -27,16 +27,16 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.t2 = weights.w.get("hada_t2")
 
     def calc_updown(self, orig_weight):
-        w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-        w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+        w1a = self.w1a.to(orig_weight.device)
+        w1b = self.w1b.to(orig_weight.device)
+        w2a = self.w2a.to(orig_weight.device)
+        w2b = self.w2b.to(orig_weight.device)
 
         output_shape = [w1a.size(0), w1b.size(1)]
 
         if self.t1 is not None:
             output_shape = [w1a.size(1), w1b.size(1)]
-            t1 = self.t1.to(orig_weight.device, dtype=orig_weight.dtype)
+            t1 = self.t1.to(orig_weight.device)
             updown1 = lyco_helpers.make_weight_cp(t1, w1a, w1b)
             output_shape += t1.shape[2:]
         else:
@@ -45,7 +45,7 @@ def calc_updown(self, orig_weight):
             updown1 = lyco_helpers.rebuild_conventional(w1a, w1b, output_shape)
 
         if self.t2 is not None:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
             updown2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
         else:
             updown2 = lyco_helpers.rebuild_conventional(w2a, w2b, output_shape)

diff --git a/extensions-builtin/Lora/network_ia3.py b/extensions-builtin/Lora/network_ia3.py
@@ -17,7 +17,7 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
         self.on_input = weights.w["on_input"].item()
 
     def calc_updown(self, orig_weight):
-        w = self.w.to(orig_weight.device, dtype=orig_weight.dtype)
+        w = self.w.to(orig_weight.device)
 
         output_shape = [w.size(0), orig_weight.size(1)]
         if self.on_input:

diff --git a/extensions-builtin/Lora/network_lokr.py b/extensions-builtin/Lora/network_lokr.py
@@ -37,22 +37,22 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
     def calc_updown(self, orig_weight):
         if self.w1 is not None:
-            w1 = self.w1.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1 = self.w1.to(orig_weight.device)
         else:
-            w1a = self.w1a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w1b = self.w1b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w1a = self.w1a.to(orig_weight.device)
+            w1b = self.w1b.to(orig_weight.device)
             w1 = w1a @ w1b
 
         if self.w2 is not None:
-            w2 = self.w2.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2 = self.w2.to(orig_weight.device)
         elif self.t2 is None:
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = w2a @ w2b
         else:
-            t2 = self.t2.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2a = self.w2a.to(orig_weight.device, dtype=orig_weight.dtype)
-            w2b = self.w2b.to(orig_weight.device, dtype=orig_weight.dtype)
+            t2 = self.t2.to(orig_weight.device)
+            w2a = self.w2a.to(orig_weight.device)
+            w2b = self.w2b.to(orig_weight.device)
             w2 = lyco_helpers.make_weight_cp(t2, w2a, w2b)
 
         output_shape = [w1.size(0) * w2.size(0), w1.size(1) * w2.size(1)]

diff --git a/extensions-builtin/Lora/network_lora.py b/extensions-builtin/Lora/network_lora.py
@@ -61,13 +61,13 @@ def create_module(self, weights, key, none_ok=False):
         return module
 
     def calc_updown(self, orig_weight):
-        up = self.up_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
-        down = self.down_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+        up = self.up_model.weight.to(orig_weight.device)
+        down = self.down_model.weight.to(orig_weight.device)
 
         output_shape = [up.size(0), down.size(1)]
         if self.mid_model is not None:
             # cp-decomposition
-            mid = self.mid_model.weight.to(orig_weight.device, dtype=orig_weight.dtype)
+            mid = self.mid_model.weight.to(orig_weight.device)
             updown = lyco_helpers.rebuild_cp_decomposition(up, down, mid)
             output_shape += mid.shape[2:]
         else:

diff --git a/extensions-builtin/Lora/network_norm.py b/extensions-builtin/Lora/network_norm.py
@@ -18,10 +18,10 @@ def __init__(self,  net: network.Network, weights: network.NetworkWeights):
 
     def calc_updown(self, orig_weight):
         output_shape = self.w_norm.shape
-        updown = self.w_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+        updown = self.w_norm.to(orig_weight.device)
 
         if self.b_norm is not None:
-            ex_bias = self.b_norm.to(orig_weight.device, dtype=orig_weight.dtype)
+            ex_bias = self.b_norm.to(orig_weight.device)
         else:
             ex_bias = None