sync abi api and update README.md about disclaimer

seasonjs · Nov 26, 2023 · a283bcb · a283bcb
1 parent 6716d60
commit a283bcb
Show file tree

Hide file tree

Showing 12 changed files with 163 additions and 57 deletions.
diff --git a/.idea/stable-diffusion.iml b/.idea/stable-diffusion.iml
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ of ggml.cpp.
 go get github.com/seasonjs/stable-diffusion
 ```
 
-## Compatibility
+## AutoModel Compatibility
 
 See `deps` folder for dylib compatibility, push request is welcome.
 
@@ -27,6 +27,18 @@ See `deps` folder for dylib compatibility, push request is welcome.
 | linux    | not support | support                 | not support |
 | darwin   | not support | support  (no test)      | support     |
 
+## AutoModel Dynamic Libraries Disclaimer
+
+#### The Source of dynamic Libraries
+These dynamic libraries come from [stable-diffusion.cpp-build release](https://github.com/seasonjs/stable-diffusion.cpp-build/releases), The dynamic library version can be obtained by viewing [stable-diffusion.version file](./deps/stable-diffusion.version)
+Anyone can check the consistency of the file by checksum the md5 of the file.
+
+#### The Security Of Dynamic Libraries
+All I can say is that the creation of the dynamic library is public and does not contain any subjective malicious logic.
+If you are worried about the security of the dynamic library during the use process, you can build it yourself.
+
+**I and any author related to dynamic libraries do not assume any problems, responsibilities or legal liability during use.**
+
 ## Usage
 
 This `stable-diffusion` golang library provide two api `Predict` and `ImagePredict`.

diff --git a/binding.go b/binding.go
@@ -7,8 +7,6 @@ import (
 	"encoding/base64"
 	"errors"
 	"github.com/ebitengine/purego"
-	"runtime"
-	"unsafe"
 )
 
 type SDLogLevel string
@@ -108,8 +106,8 @@ type CStableDiffusion struct {
 	cCreateStableDiffusion  func(nThreads int, vaeDecodeOnly bool, freeParamsImmediately bool, loraModelDir string, rngType string) uintptr
 	cDestroyStableDiffusion func(sd uintptr)
 	cLoadFromFile           func(sd uintptr, path string, schedule string)
-	cTxt2img                func(sd uintptr, options uintptr, byteSize *int64) *byte
-	cImg2img                func(sd uintptr, options uintptr, byte2 *int64) *byte
+	cTxt2img                func(sd uintptr, options uintptr) string
+	cImg2img                func(sd uintptr, options uintptr) string
 
 	cSetStableDiffusionLogLevel   func(level string)
 	cGetStableDiffusionSystemInfo func() string
@@ -146,8 +144,8 @@ func NewCStableDiffusion(libraryPath string) (*CStableDiffusion, error) {
 		createStableDiffusion  func(nThreads int, vaeDecodeOnly bool, freeParamsImmediately bool, loraModelDir string, rngType string) uintptr
 		destroyStableDiffusion func(sd uintptr)
 		loadFromFile           func(sd uintptr, path string, schedule string)
-		txt2img                func(sd uintptr, options uintptr, byteSize *int64) *byte
-		img2img                func(sd uintptr, options uintptr, byte2 *int64) *byte
+		txt2img                func(sd uintptr, options uintptr) string
+		img2img                func(sd uintptr, options uintptr) string
 
 		setStableDiffusionLogLevel   func(level string)
 		getStableDiffusionSystemInfo func() string
@@ -257,16 +255,11 @@ func (c *CSDCtx) StableDiffusionTextToImage(prompt string, negativePrompt string
 	c.csd.cSetTxt2imgSampleMethod(options, string(sampleMethod))
 	c.csd.cSetTxt2imgSampleSteps(options, sampleSteps)
 	c.csd.cSetTxt2imgSeed(options, seed)
-	minSize := int64(width * height * 3)
-	var size int64
-	runtime.KeepAlive(size)
-	output := c.csd.cTxt2img(c.ctx, options, &size)
-	if size < minSize {
-		size = minSize
+	output := c.csd.cTxt2img(c.ctx, options)
+	data, err := base64.StdEncoding.DecodeString(output)
+	if err != nil {
+		return nil, err
 	}
-	data := unsafe.Slice(output, size)
-	size = 0
-	c.csd.cFreeBuffer(uintptr(unsafe.Pointer(output)))
 	return data, nil
 }
 
@@ -287,16 +280,11 @@ func (c *CSDCtx) StableDiffusionImageToImage(initImg []byte, prompt string, nega
 	c.csd.cSetImg2imgSampleSteps(options, sampleSteps)
 	c.csd.cSetImg2imgStrength(options, strength)
 	c.csd.cSetImg2imgSeed(options, seed)
-	minSize := int64(width * height * 3)
-	var size int64
-	runtime.KeepAlive(size)
-	output := c.csd.cImg2img(c.ctx, options, &size)
-	if size < minSize {
-		size = minSize
+	output := c.csd.cImg2img(c.ctx, options)
+	data, err := base64.StdEncoding.DecodeString(output)
+	if err != nil {
+		return nil, err
 	}
-	data := unsafe.Slice(output, size)
-	size = 0
-	c.csd.cFreeBuffer(uintptr(unsafe.Pointer(output)))
 	return data, nil
 }
 

diff --git a/deps/darwin/libsd-abi.dylib b/deps/darwin/libsd-abi.dylib
diff --git a/deps/linux/libsd-abi.so b/deps/linux/libsd-abi.so
diff --git a/deps/stable-diffusion-abi.h b/deps/stable-diffusion-abi.h
@@ -79,15 +79,15 @@ STABLE_DIFFUSION_API void destroy_stable_diffusion(void* sd);
 
 STABLE_DIFFUSION_API bool load_from_file(void* sd, const char* file_path, const char* schedule);
 
-STABLE_DIFFUSION_API uint8_t* txt2img(void* sd, const sd_txt2img_options* opt, int64_t* output_size);
+STABLE_DIFFUSION_API const char * txt2img(void* sd, const sd_txt2img_options* opt);
 
-STABLE_DIFFUSION_API uint8_t* img2img(void* sd, const sd_img2img_options* opt, int64_t* output_size);
+STABLE_DIFFUSION_API const char* img2img(void* sd, const sd_img2img_options* opt);
 
 STABLE_DIFFUSION_API void set_stable_diffusion_log_level(const char* level);
 
 STABLE_DIFFUSION_API const char* get_stable_diffusion_system_info();
 
-STABLE_DIFFUSION_API void free_buffer(const uint8_t* buffer);
+STABLE_DIFFUSION_API void free_buffer(const char* buffer);
 
 #ifdef __cplusplus
 }

diff --git a/deps/stable-diffusion.version b/deps/stable-diffusion.version
@@ -1 +1 @@
-common: main-601f409
+common: main-251abfe
diff --git a/deps/windows/sd-abi_avx.dll b/deps/windows/sd-abi_avx.dll
diff --git a/deps/windows/sd-abi_avx2.dll b/deps/windows/sd-abi_avx2.dll
diff --git a/deps/windows/sd-abi_avx512.dll b/deps/windows/sd-abi_avx512.dll
diff --git a/models/convert.py b/models/convert.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import torch
+import re
 import safetensors.torch
 
 this_file_dir = os.path.dirname(__file__)
@@ -100,7 +101,7 @@ def quantize_q5_1(x):
 def quantize_q8_0(x):
     assert x.shape[-1] % QK8_0 == 0 and x.shape[-1] > QK8_0
     x = x.reshape(-1, QK8_0)
-    amax = np.max(np.abs(x), axis=-1, keepdims=True) 
+    amax = np.max(np.abs(x), axis=-1, keepdims=True)
     d = amax / ((1 << 7) - 1)
     qs = (x / d).round().clip(min=-128, max=127).astype(np.int8)
     d = d.astype(np.float16).view(np.int8)
@@ -177,7 +178,7 @@ def preprocess(state_dict):
         print("no alphas_cumprod in file, generate new one")
         alphas_cumprod = get_alpha_comprod()
         state_dict["alphas_cumprod"] = alphas_cumprod
-    
+
     new_state_dict = {}
     for name, w in state_dict.items():
         # ignore unused tensors
@@ -191,7 +192,7 @@ def preprocess(state_dict):
         if skip:
             continue
 
-        # # convert BF16 to FP16
+        # convert BF16 to FP16
         if w.dtype == torch.bfloat16:
             w = w.to(torch.float16)
 
@@ -250,7 +251,7 @@ def preprocess(state_dict):
                 new_state_dict[new_name] = w
                 print(f"preprocess {name} => {new_name}")
             continue
-        
+
         # convert unet transformer linear to conv2d 1x1
         if name.startswith("model.diffusion_model.") and (name.endswith("proj_in.weight") or name.endswith("proj_out.weight")):
             if len(w.shape) == 2:
@@ -270,21 +271,112 @@ def preprocess(state_dict):
         new_state_dict[name] = w
     return new_state_dict
 
-def convert(model_path, out_type = None, out_file=None):
+re_digits = re.compile(r"\d+")
+re_x_proj = re.compile(r"(.*)_([qkv]_proj)$")
+re_compiled = {}
+
+suffix_conversion = {
+    "attentions": {},
+    "resnets": {
+        "conv1": "in_layers_2",
+        "conv2": "out_layers_3",
+        "norm1": "in_layers_0",
+        "norm2": "out_layers_0",
+        "time_emb_proj": "emb_layers_1",
+        "conv_shortcut": "skip_connection",
+    }
+}
+
+
+def convert_diffusers_name_to_compvis(key):
+    def match(match_list, regex_text):
+        regex = re_compiled.get(regex_text)
+        if regex is None:
+            regex = re.compile(regex_text)
+            re_compiled[regex_text] = regex
+
+        r = re.match(regex, key)
+        if not r:
+            return False
+
+        match_list.clear()
+        match_list.extend([int(x) if re.match(re_digits, x) else x for x in r.groups()])
+        return True
+
+    m = []
+
+    if match(m, r"lora_unet_conv_in(.*)"):
+        return f'model_diffusion_model_input_blocks_0_0{m[0]}'
+
+    if match(m, r"lora_unet_conv_out(.*)"):
+        return f'model_diffusion_model_out_2{m[0]}'
+
+    if match(m, r"lora_unet_time_embedding_linear_(\d+)(.*)"):
+        return f"model_diffusion_model_time_embed_{m[0] * 2 - 2}{m[1]}"
+
+    if match(m, r"lora_unet_down_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"model_diffusion_model_input_blocks_{1 + m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+
+    if match(m, r"lora_unet_mid_block_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[0], {}).get(m[2], m[2])
+        return f"model_diffusion_model_middle_block_{1 if m[0] == 'attentions' else m[1] * 2}_{suffix}"
+
+    if match(m, r"lora_unet_up_blocks_(\d+)_(attentions|resnets)_(\d+)_(.+)"):
+        suffix = suffix_conversion.get(m[1], {}).get(m[3], m[3])
+        return f"model_diffusion_model_output_blocks_{m[0] * 3 + m[2]}_{1 if m[1] == 'attentions' else 0}_{suffix}"
+
+    if match(m, r"lora_unet_down_blocks_(\d+)_downsamplers_0_conv"):
+        return f"model_diffusion_model_input_blocks_{3 + m[0] * 3}_0_op"
+
+    if match(m, r"lora_unet_up_blocks_(\d+)_upsamplers_0_conv"):
+        return f"model_diffusion_model_output_blocks_{2 + m[0] * 3}_{2 if m[0]>0 else 1}_conv"
+
+    if match(m, r"lora_te_text_model_encoder_layers_(\d+)_(.+)"):
+        return f"cond_stage_model_transformer_text_model_encoder_layers_{m[0]}_{m[1]}"
+
+    return None
+
+def preprocess_lora(state_dict):
+    new_state_dict = {}
+    for name, w in state_dict.items():
+        if not isinstance(w, torch.Tensor):
+            continue
+
+        # convert BF16 to FP16
+        if w.dtype == torch.bfloat16:
+            w = w.to(torch.float16)
+
+        name_without_network_parts, network_part = name.split(".", 1)
+        new_name_without_network_parts = convert_diffusers_name_to_compvis(name_without_network_parts)
+        if new_name_without_network_parts == None:
+            raise Exception(f"unknown lora tensor: {name}")
+        new_name = new_name_without_network_parts + "." + network_part
+        print(f"preprocess {name} => {new_name}")
+        new_state_dict[new_name] = w
+    return new_state_dict
+
+def convert(model_path, out_type = None, out_file=None, lora=False):
     # load model
-    with open(os.path.join(vocab_dir, "vocab.json"), encoding="utf-8") as f:
-        clip_vocab = json.load(f)
-
+    if not lora:
+        with open(os.path.join(vocab_dir, "vocab.json"), encoding="utf-8") as f:
+            clip_vocab = json.load(f)
+
     state_dict = load_model_from_file(model_path)
-    model_type = SD1
-    if "cond_stage_model.model.token_embedding.weight" in state_dict.keys():
+    model_type = SD1 # lora only for SD1 now
+    if not lora and "cond_stage_model.model.token_embedding.weight" in state_dict.keys():
         model_type = SD2
         print("Stable diffuison 2.x")
     else:
         print("Stable diffuison 1.x")
-    state_dict = preprocess(state_dict)
+    if lora:
+        state_dict = preprocess_lora(state_dict)
+    else:
+        state_dict = preprocess(state_dict)
 
     # output option
+    if lora:
+        out_type = "f16" # only f16 for now
     if out_type == None:
         weight = state_dict["model.diffusion_model.input_blocks.0.0.weight"].numpy()
         if weight.dtype == np.float32:
@@ -296,7 +388,10 @@ def convert(model_path, out_type = None, out_file=None):
         else:
             raise Exception("unsupported weight type %s" % weight.dtype)
     if out_file == None:
-        out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
+        if lora:
+            out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-lora.bin"
+        else:
+            out_file = os.path.splitext(os.path.basename(model_path))[0] + f"-ggml-model-{out_type}.bin"
         out_file = os.path.join(os.getcwd(), out_file)
     print(f"Saving GGML compatible file to {out_file}")
 
@@ -309,14 +404,15 @@ def convert(model_path, out_type = None, out_file=None):
         file.write(struct.pack("i", ftype))
 
         # vocab
-        byte_encoder = bytes_to_unicode()
-        byte_decoder = {v: k for k, v in byte_encoder.items()}
-        file.write(struct.pack("i", len(clip_vocab)))
-        for key in clip_vocab:
-            text = bytearray([byte_decoder[c] for c in key])
-            file.write(struct.pack("i", len(text)))
-            file.write(text)
-
+        if not lora:
+            byte_encoder = bytes_to_unicode()
+            byte_decoder = {v: k for k, v in byte_encoder.items()}
+            file.write(struct.pack("i", len(clip_vocab)))
+            for key in clip_vocab:
+                text = bytearray([byte_decoder[c] for c in key])
+                file.write(struct.pack("i", len(text)))
+                file.write(text)
+
         # weights
         for name in state_dict.keys():
             if not isinstance(state_dict[name], torch.Tensor):
@@ -330,14 +426,15 @@ def convert(model_path, out_type = None, out_file=None):
                 continue
             if name in unused_tensors:
                 continue
+
             data = state_dict[name].numpy()
 
             n_dims = len(data.shape)
             shape = data.shape
             old_type = data.dtype
 
             ttype = "f32"
-            if n_dims == 4:
+            if n_dims == 4 and not lora:
                 data = data.astype(np.float16)
                 ttype = "f16"
             elif n_dims == 2 and name[-7:] == ".weight":
@@ -361,7 +458,7 @@ def convert(model_path, out_type = None, out_file=None):
             else:
                 data = data.astype(np.float32)
                 ttype = "f32"
-            
+
             print("Processing tensor: {} with shape {}, {} -> {}".format(name, data.shape, old_type, ttype))
 
             # header
@@ -380,6 +477,7 @@ def convert(model_path, out_type = None, out_file=None):
     parser = argparse.ArgumentParser(description="Convert Stable Diffuison model to GGML compatible file format")
     parser.add_argument("--out_type", choices=["f32", "f16", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0"], help="output format (default: based on input)")
     parser.add_argument("--out_file", help="path to write to; default: based on input and current working directory")
+    parser.add_argument("--lora", action='store_true', default = False, help="convert lora weight; default: false")
     parser.add_argument("model_path", help="model file path (*.pth, *.pt, *.ckpt, *.safetensors)")
     args = parser.parse_args()
-    convert(args.model_path, args.out_type, args.out_file)
+    convert(args.model_path, args.out_type, args.out_file, args.lora)