From 3269cab5a5ad97e4602322bd605f8497c57c5c37 Mon Sep 17 00:00:00 2001 From: Carsten Date: Wed, 3 Jan 2024 14:55:41 +0000 Subject: [PATCH 01/12] feat(mega): add megamerge script Will use multiple merge configurations as seperate documents in a yaml file and do them in the correct order. --- mergekit/scripts/megamerge.py | 141 ++++++++++++++++++++++++++++++++++ pyproject.toml | 9 ++- 2 files changed, 146 insertions(+), 4 deletions(-) create mode 100644 mergekit/scripts/megamerge.py diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py new file mode 100644 index 00000000..46f29c0f --- /dev/null +++ b/mergekit/scripts/megamerge.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +import yaml +import os +import re +from typing import Optional +from typing_extensions import Annotated +import typer +import logging +import os + +from mergekit.merge import MergeOptions, run_merge +from mergekit.common import parse_kmb +from mergekit.config import MergeConfiguration + +# Regex that matches huggingface path +hf_path = r"^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\._]+(?:\+.+)$" +merges = {} + +def merge(m, options): + # check if out_path exists + if os.path.exists(m): + print(f"Skipping {m} as it already exists") + del merges[m] + return + + if len(merges[m]["deps"]) != 0: + for dep in merges[m]["deps"]: + if dep in merges: + merge(dep, options) + print(f"Merging {m}") + merge_config: MergeConfiguration = MergeConfiguration.model_validate(merges[m]) + run_merge( + merge_config, + merges[m]["out_path"], + options=options, + ) + del merges[m] + +def main( + config_file: Annotated[str, typer.Argument(help="YAML configuration file")], + lora_merge_cache: Annotated[ + Optional[str], + typer.Option(help="Path to store merged LORA models", metavar="PATH"), + ] = None, + transformers_cache: Annotated[ + Optional[str], + typer.Option( + help="Override storage path for downloaded models", metavar="PATH" + ), + ] = None, + cuda: Annotated[ + bool, typer.Option(help="Perform matrix arithmetic on GPU") + ] = False, + low_cpu_memory: Annotated[ + bool, + typer.Option( + help="Store results and intermediate values on GPU. Useful if VRAM > RAM" + ), + ] = False, + copy_tokenizer: Annotated[ + bool, typer.Option(help="Copy a tokenizer to the output") + ] = True, + allow_crimes: Annotated[ + bool, typer.Option(help="Allow mixing architectures") + ] = False, + out_shard_size: Annotated[ + Optional[int], + typer.Option( + help="Number of parameters per output shard [default: 5B]", + parser=parse_kmb, + show_default=False, + metavar="NUM", + ), + ] = parse_kmb("5B"), + verbose: Annotated[bool, typer.Option("-v", help="Verbose logging")] = False, + trust_remote_code: Annotated[ + bool, typer.Option(help="Trust remote code when merging LoRAs") + ] = False, + clone_tensors: Annotated[ + bool, + typer.Option( + help="Clone tensors before saving, to allow multiple occurrences of the same layer" + ), + ] = False, + lazy_unpickle: Annotated[ + bool, typer.Option(help="Experimental lazy unpickler for lower memory usage") + ] = False, +): + logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) + + with open(config_file, "r") as f: + data = yaml.load_all(f, Loader=yaml.FullLoader) + + # find leaf merges, the ones that don't have a local path specified in slices[].sources[].model or models[].model + leaf_merges = [] + for d in data: + merges[d["out_path"]] = d + merges[d["out_path"]]["deps"] = [] + if "slices" in d: + for slc in d["slices"]: + for src in slc["sources"]: + if "model" in src and src["model"] is not None: + # if the model is a huggingface model, skip it + if not re.match(hf_path, src["model"]): + merges[d["out_path"]]["deps"].append(src["model"]) + if "models" in d: + for mdl in d["models"]: + if "model" in mdl and mdl["model"] is not None: + # if the model is a huggingface model, skip it + if not re.match(hf_path, mdl["model"]): + merges[d["out_path"]]["deps"].append(mdl["model"]) + + options = MergeOptions( + lora_merge_cache=lora_merge_cache, + transformers_cache=transformers_cache, + cuda=cuda, + low_cpu_memory=low_cpu_memory, + copy_tokenizer=copy_tokenizer, + allow_crimes=allow_crimes, + out_shard_size=out_shard_size, + trust_remote_code=trust_remote_code, + clone_tensors=clone_tensors, + lazy_unpickle=lazy_unpickle, + ) + + print("Merging:\n" + '\n'.join(merges)) + + while len(merges) != 0: + m = list(merges.keys())[0] + merge(m, options) + + + +def _main(): + # just a wee li'l stub for setuptools + typer.run(main) + + +if __name__ == "__main__": + _main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index c498a332..0c3176db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,10 +33,11 @@ repository = "https://github.com/cg123/mergekit" [project.scripts] -mergekit-yaml = "mergekit.scripts.run_yaml:main" -mergekit-legacy = "mergekit.scripts.legacy:main" -mergekit-layershuffle = "mergekit.scripts.layershuffle:main" -bakllama = "mergekit.scripts.bakllama:main" +mergekit-yaml = "mergekit.scripts.run_yaml:_main" +mergekit-mega = "mergekit.scripts.megamerge:_main" +mergekit-legacy = "mergekit.scripts.legacy:_main" +mergekit-layershuffle = "mergekit.scripts.layershuffle:_main" +bakllama = "mergekit.scripts.bakllama:_main" [tool.setuptools] packages = ["mergekit"] From 5efe8af30001794e67e1655bd67b046e3257b2da Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 14:44:30 +0000 Subject: [PATCH 02/12] feat(mega): add force flag to overwrite existing merges --- mergekit/scripts/megamerge.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index 46f29c0f..bfeb89ba 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -17,18 +17,21 @@ hf_path = r"^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\._]+(?:\+.+)$" merges = {} -def merge(m, options): +def merge(m, options, force): # check if out_path exists - if os.path.exists(m): - print(f"Skipping {m} as it already exists") + if not force and os.path.exists(m): + logging.info(f"Skipping {m} as it already exists") del merges[m] return + elif force and os.path.exists(m): + logging.info(f"Overwriting {m} as --force was specified") if len(merges[m]["deps"]) != 0: for dep in merges[m]["deps"]: if dep in merges: - merge(dep, options) - print(f"Merging {m}") + merge(dep, options, force) + + logging.info(f"Merging model {m}") merge_config: MergeConfiguration = MergeConfiguration.model_validate(merges[m]) run_merge( merge_config, @@ -83,6 +86,9 @@ def main( help="Clone tensors before saving, to allow multiple occurrences of the same layer" ), ] = False, + force: Annotated[ + bool, typer.Option(help="Force overwrite of existing output") + ] = False, lazy_unpickle: Annotated[ bool, typer.Option(help="Experimental lazy unpickler for lower memory usage") ] = False, @@ -128,7 +134,7 @@ def main( while len(merges) != 0: m = list(merges.keys())[0] - merge(m, options) + merge(m, options, force) From a55ad9c7c402960c4670ceb8becf5c22d2a98856 Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 14:52:37 +0000 Subject: [PATCH 03/12] docs(mega): add example yaml for mergekit-mega --- examples/mega.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 examples/mega.yml diff --git a/examples/mega.yml b/examples/mega.yml new file mode 100644 index 00000000..f73564dc --- /dev/null +++ b/examples/mega.yml @@ -0,0 +1,37 @@ +slices: + - sources: + - model: psmathur/orca_mini_v3_13b + layer_range: [0, 40] + - model: garage-bAInd/Platypus2-13B + layer_range: [0, 40] +merge_method: slerp +base_model: psmathur/orca_mini_v3_13b +parameters: + t: + - filter: self_attn + value: [0, 0.5, 0.3, 0.7, 1] + - filter: mlp + value: [1, 0.5, 0.7, 0.3, 0] + - value: 0.5 # fallback for rest of tensors +dtype: float16 +out_path: ./models/gradient-slerp +--- +models: + - model: ./models/gradient-slerp + parameters: + density: [1, 0.7, 0.1] # density gradient + weight: 1.0 + - model: WizardLM/WizardMath-13B-V1.0 + parameters: + density: 0.33 + weight: + - filter: mlp + value: 0.5 + - value: 0 +merge_method: ties +base_model: TheBloke/Llama-2-13B-fp16 +parameters: + normalize: true + int8_mask: true +dtype: float16 +out_path: ./models/gradient-slerp-ties From 30520f50ed8e0476d3b5fa0ac09150666f84aa12 Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 15:08:14 +0000 Subject: [PATCH 04/12] fix(mega): move to click and fix rest of pyproject scripts --- mergekit/scripts/megamerge.py | 96 +++++++---------------------------- pyproject.toml | 10 ++-- 2 files changed, 23 insertions(+), 83 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index bfeb89ba..4291d414 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -3,15 +3,13 @@ import yaml import os import re -from typing import Optional -from typing_extensions import Annotated -import typer +import click import logging import os from mergekit.merge import MergeOptions, run_merge -from mergekit.common import parse_kmb from mergekit.config import MergeConfiguration +from mergekit.options import add_merge_options # Regex that matches huggingface path hf_path = r"^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\._]+(?:\+.+)$" @@ -40,58 +38,20 @@ def merge(m, options, force): ) del merges[m] +@click.command("mergekit-mega") +@click.argument("config_file") +@click.option( + "--verbose", "-v", type=bool, default=False, is_flag=True, help="Verbose logging" +) +@click.option( + "--force", "-f", type=bool, default=False, is_flag=True, help="overwrite existing merge results instead of skipping them" +) +@add_merge_options def main( - config_file: Annotated[str, typer.Argument(help="YAML configuration file")], - lora_merge_cache: Annotated[ - Optional[str], - typer.Option(help="Path to store merged LORA models", metavar="PATH"), - ] = None, - transformers_cache: Annotated[ - Optional[str], - typer.Option( - help="Override storage path for downloaded models", metavar="PATH" - ), - ] = None, - cuda: Annotated[ - bool, typer.Option(help="Perform matrix arithmetic on GPU") - ] = False, - low_cpu_memory: Annotated[ - bool, - typer.Option( - help="Store results and intermediate values on GPU. Useful if VRAM > RAM" - ), - ] = False, - copy_tokenizer: Annotated[ - bool, typer.Option(help="Copy a tokenizer to the output") - ] = True, - allow_crimes: Annotated[ - bool, typer.Option(help="Allow mixing architectures") - ] = False, - out_shard_size: Annotated[ - Optional[int], - typer.Option( - help="Number of parameters per output shard [default: 5B]", - parser=parse_kmb, - show_default=False, - metavar="NUM", - ), - ] = parse_kmb("5B"), - verbose: Annotated[bool, typer.Option("-v", help="Verbose logging")] = False, - trust_remote_code: Annotated[ - bool, typer.Option(help="Trust remote code when merging LoRAs") - ] = False, - clone_tensors: Annotated[ - bool, - typer.Option( - help="Clone tensors before saving, to allow multiple occurrences of the same layer" - ), - ] = False, - force: Annotated[ - bool, typer.Option(help="Force overwrite of existing output") - ] = False, - lazy_unpickle: Annotated[ - bool, typer.Option(help="Experimental lazy unpickler for lower memory usage") - ] = False, + merge_options: MergeOptions, + config_file: str, + force: bool, + verbose: bool, ): logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) @@ -117,31 +77,11 @@ def main( if not re.match(hf_path, mdl["model"]): merges[d["out_path"]]["deps"].append(mdl["model"]) - options = MergeOptions( - lora_merge_cache=lora_merge_cache, - transformers_cache=transformers_cache, - cuda=cuda, - low_cpu_memory=low_cpu_memory, - copy_tokenizer=copy_tokenizer, - allow_crimes=allow_crimes, - out_shard_size=out_shard_size, - trust_remote_code=trust_remote_code, - clone_tensors=clone_tensors, - lazy_unpickle=lazy_unpickle, - ) - - print("Merging:\n" + '\n'.join(merges)) + logging.info("Merging: " + ', '.join(merges)) while len(merges) != 0: m = list(merges.keys())[0] - merge(m, options, force) - - - -def _main(): - # just a wee li'l stub for setuptools - typer.run(main) - + merge(m, merge_options, force) if __name__ == "__main__": - _main() \ No newline at end of file + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 0c3176db..54277d26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,11 +33,11 @@ repository = "https://github.com/cg123/mergekit" [project.scripts] -mergekit-yaml = "mergekit.scripts.run_yaml:_main" -mergekit-mega = "mergekit.scripts.megamerge:_main" -mergekit-legacy = "mergekit.scripts.legacy:_main" -mergekit-layershuffle = "mergekit.scripts.layershuffle:_main" -bakllama = "mergekit.scripts.bakllama:_main" +mergekit-yaml = "mergekit.scripts.run_yaml:main" +mergekit-mega = "mergekit.scripts.megamerge:main" +mergekit-legacy = "mergekit.scripts.legacy:main" +mergekit-layershuffle = "mergekit.scripts.layershuffle:main" +bakllama = "mergekit.scripts.bakllama:main" [tool.setuptools] packages = ["mergekit"] From 5faea1a200d3944543044c0900ff40963edaf4da Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 19:41:03 +0000 Subject: [PATCH 05/12] feat(mega): detect circular dependencies --- mergekit/scripts/megamerge.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index 4291d414..6c421cd8 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -15,6 +15,31 @@ hf_path = r"^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\._]+(?:\+.+)$" merges = {} +def has_circular_dependency(nodes): + def dfs(node, visited, stack): + visited[node] = True + stack[node] = True + + for dependency in nodes[node]["deps"]: + if not visited[dependency]: + if dfs(dependency, visited, stack): + return True + elif stack[dependency]: + return True + + stack[node] = False + return False + + visited = {key: False for key in nodes} + stack = {key: False for key in nodes} + + for node in nodes: + if not visited[node]: + if dfs(node, visited, stack): + return node + + return None + def merge(m, options, force): # check if out_path exists if not force and os.path.exists(m): @@ -79,6 +104,10 @@ def main( logging.info("Merging: " + ', '.join(merges)) + if (dep := has_circular_dependency(merges)) is not None: + logging.error(f"Circular dependency detected: {dep}") + exit(1) + while len(merges) != 0: m = list(merges.keys())[0] merge(m, merge_options, force) From 0038c55ed49707f16893b7ca4751a65449c9afef Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 19:42:25 +0000 Subject: [PATCH 06/12] chore(mega): remove old comment and unused code --- mergekit/scripts/megamerge.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index 6c421cd8..f5c7c5b0 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -83,8 +83,6 @@ def main( with open(config_file, "r") as f: data = yaml.load_all(f, Loader=yaml.FullLoader) - # find leaf merges, the ones that don't have a local path specified in slices[].sources[].model or models[].model - leaf_merges = [] for d in data: merges[d["out_path"]] = d merges[d["out_path"]]["deps"] = [] From 134af396fd0534af04725270e80001efa2644a9a Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 20:21:26 +0000 Subject: [PATCH 07/12] feat(mega): use name in yaml and out_path cli arg instead of out_path in yaml --- examples/mega.yml | 6 ++-- mergekit/scripts/megamerge.py | 59 ++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 24 deletions(-) diff --git a/examples/mega.yml b/examples/mega.yml index f73564dc..4732403b 100644 --- a/examples/mega.yml +++ b/examples/mega.yml @@ -14,10 +14,10 @@ parameters: value: [1, 0.5, 0.7, 0.3, 0] - value: 0.5 # fallback for rest of tensors dtype: float16 -out_path: ./models/gradient-slerp +name: gradient-slerp --- models: - - model: ./models/gradient-slerp + - model: gradient-slerp parameters: density: [1, 0.7, 0.1] # density gradient weight: 1.0 @@ -34,4 +34,4 @@ parameters: normalize: true int8_mask: true dtype: float16 -out_path: ./models/gradient-slerp-ties +name: gradient-slerp-ties diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index f5c7c5b0..b021f492 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -6,13 +6,12 @@ import click import logging import os +from pathlib import Path from mergekit.merge import MergeOptions, run_merge from mergekit.config import MergeConfiguration from mergekit.options import add_merge_options -# Regex that matches huggingface path -hf_path = r"^[a-zA-Z0-9\-]+/[a-zA-Z0-9\-\._]+(?:\+.+)$" merges = {} def has_circular_dependency(nodes): @@ -40,31 +39,33 @@ def dfs(node, visited, stack): return None -def merge(m, options, force): - # check if out_path exists - if not force and os.path.exists(m): - logging.info(f"Skipping {m} as it already exists") - del merges[m] - return - elif force and os.path.exists(m): - logging.info(f"Overwriting {m} as --force was specified") +def merge(m, merge_options, force, out_path): + # check if output_path exists + if os.path.exists(out_path / m): + if not force: + logging.info(f"Skipping {m} as it already exists") + del merges[m] + return + else: + logging.info(f"Overwriting {m} as --force was specified") if len(merges[m]["deps"]) != 0: for dep in merges[m]["deps"]: if dep in merges: - merge(dep, options, force) + merge(dep, merge_options, force, out_path) logging.info(f"Merging model {m}") merge_config: MergeConfiguration = MergeConfiguration.model_validate(merges[m]) run_merge( merge_config, - merges[m]["out_path"], - options=options, + str(out_path / merges[m]["name"]), + options=merge_options, ) del merges[m] @click.command("mergekit-mega") @click.argument("config_file") +@click.argument("out_path") @click.option( "--verbose", "-v", type=bool, default=False, is_flag=True, help="Verbose logging" ) @@ -75,30 +76,46 @@ def merge(m, options, force): def main( merge_options: MergeOptions, config_file: str, + out_path: str, force: bool, verbose: bool, ): logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) + out_path = Path(out_path) with open(config_file, "r") as f: data = yaml.load_all(f, Loader=yaml.FullLoader) for d in data: - merges[d["out_path"]] = d - merges[d["out_path"]]["deps"] = [] + merges[d["name"]] = d + merges[d["name"]]["deps"] = [] if "slices" in d: for slc in d["slices"]: for src in slc["sources"]: if "model" in src and src["model"] is not None: - # if the model is a huggingface model, skip it - if not re.match(hf_path, src["model"]): - merges[d["out_path"]]["deps"].append(src["model"]) + model_lora = src["model"].split("+") + # name must not have a slash to avoid path traversal + # therefore, we can use it to check if its a merge from the config + if "/" not in model_lora[0]: + # avoid duplicate deps + if model_lora[0] not in merges[d["name"]]["deps"]: + merges[d["name"]]["deps"].append(model_lora[0]) + src["model"] = str(out_path / model_lora[0]) + if len(model_lora) == 2: + src["model"] += "+" + model_lora[1] if "models" in d: for mdl in d["models"]: if "model" in mdl and mdl["model"] is not None: - # if the model is a huggingface model, skip it - if not re.match(hf_path, mdl["model"]): - merges[d["out_path"]]["deps"].append(mdl["model"]) + model_lora = mdl["model"].split("+") + # name must not have a slash to avoid path traversal + # therefore, we can use it to check if its a merge from the config + if "/" not in model_lora[0]: + # avoid duplicate deps + if model_lora[0] not in merges[d["name"]]["deps"]: + merges[d["name"]]["deps"].append(model_lora[0]) + mdl["model"] = str(out_path / model_lora[0]) + if len(model_lora) == 2: + mdl["model"] += "+" + model_lora[1] logging.info("Merging: " + ', '.join(merges)) From fc6fb361302912833c72cec9971f82e6b33edbb7 Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 20:24:26 +0000 Subject: [PATCH 08/12] feat(mega): add error when name contains slash --- mergekit/scripts/megamerge.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index b021f492..eed22cea 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -87,6 +87,9 @@ def main( data = yaml.load_all(f, Loader=yaml.FullLoader) for d in data: + if "/" in d["name"]: + logging.error("name must not contain a slash") + exit(1) merges[d["name"]] = d merges[d["name"]]["deps"] = [] if "slices" in d: From 1c5fabadf8e75903f98393b13c62d0b18a3d10f8 Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 20:27:43 +0000 Subject: [PATCH 09/12] chore(mega): run black formatter --- mergekit/scripts/megamerge.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index eed22cea..8f71e2b9 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -14,6 +14,7 @@ merges = {} + def has_circular_dependency(nodes): def dfs(node, visited, stack): visited[node] = True @@ -37,7 +38,8 @@ def dfs(node, visited, stack): if dfs(node, visited, stack): return node - return None + return None + def merge(m, merge_options, force, out_path): # check if output_path exists @@ -63,6 +65,7 @@ def merge(m, merge_options, force, out_path): ) del merges[m] + @click.command("mergekit-mega") @click.argument("config_file") @click.argument("out_path") @@ -70,7 +73,12 @@ def merge(m, merge_options, force, out_path): "--verbose", "-v", type=bool, default=False, is_flag=True, help="Verbose logging" ) @click.option( - "--force", "-f", type=bool, default=False, is_flag=True, help="overwrite existing merge results instead of skipping them" + "--force", + "-f", + type=bool, + default=False, + is_flag=True, + help="overwrite existing merge results instead of skipping them", ) @add_merge_options def main( @@ -120,7 +128,7 @@ def main( if len(model_lora) == 2: mdl["model"] += "+" + model_lora[1] - logging.info("Merging: " + ', '.join(merges)) + logging.info("Merging: " + ", ".join(merges)) if (dep := has_circular_dependency(merges)) is not None: logging.error(f"Circular dependency detected: {dep}") @@ -130,5 +138,6 @@ def main( m = list(merges.keys())[0] merge(m, merge_options, force) + if __name__ == "__main__": - main() \ No newline at end of file + main() From 68cacc73d1efd881b3869a6c7bf07f4bbb6f368e Mon Sep 17 00:00:00 2001 From: Carsten Date: Thu, 4 Jan 2024 20:34:00 +0000 Subject: [PATCH 10/12] chore(mega): solve obvious linting errors R1702 not fixed due to the inherently nested nature of the config format E1120 not fixed due to arguments handled by click R0912 not fixed due to being introduced by click --- mergekit/scripts/megamerge.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index 8f71e2b9..da093715 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -1,13 +1,13 @@ #!/usr/bin/env python3 -import yaml import os -import re -import click +import sys import logging -import os from pathlib import Path +import click +import yaml + from mergekit.merge import MergeOptions, run_merge from mergekit.config import MergeConfiguration from mergekit.options import add_merge_options @@ -45,18 +45,17 @@ def merge(m, merge_options, force, out_path): # check if output_path exists if os.path.exists(out_path / m): if not force: - logging.info(f"Skipping {m} as it already exists") + logging.info("Skipping %s as it already exists", m) del merges[m] return - else: - logging.info(f"Overwriting {m} as --force was specified") + logging.info("Overwriting %s as --force was specified", m) if len(merges[m]["deps"]) != 0: for dep in merges[m]["deps"]: if dep in merges: merge(dep, merge_options, force, out_path) - logging.info(f"Merging model {m}") + logging.info("Merging model %s", m) merge_config: MergeConfiguration = MergeConfiguration.model_validate(merges[m]) run_merge( merge_config, @@ -91,13 +90,14 @@ def main( logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) out_path = Path(out_path) - with open(config_file, "r") as f: + with open(config_file, "r", encoding="utf-8") as f: data = yaml.load_all(f, Loader=yaml.FullLoader) for d in data: if "/" in d["name"]: logging.error("name must not contain a slash") - exit(1) + sys.exit(1) + merges[d["name"]] = d merges[d["name"]]["deps"] = [] if "slices" in d: @@ -128,11 +128,11 @@ def main( if len(model_lora) == 2: mdl["model"] += "+" + model_lora[1] - logging.info("Merging: " + ", ".join(merges)) + logging.info("Merging: %s", ", ".join(merges)) if (dep := has_circular_dependency(merges)) is not None: - logging.error(f"Circular dependency detected: {dep}") - exit(1) + logging.error("Circular dependency detected: %s", dep) + sys.exit(1) while len(merges) != 0: m = list(merges.keys())[0] From 48b46a04f271213a703d21d81a7343444a0157c4 Mon Sep 17 00:00:00 2001 From: Carsten Date: Fri, 5 Jan 2024 11:09:22 +0000 Subject: [PATCH 11/12] feat(mega): run isort, move dependcy checking to seperate function to reuse for slices and models syntax --- mergekit/scripts/megamerge.py | 72 +++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index da093715..c4ef5313 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -1,22 +1,34 @@ #!/usr/bin/env python3 +""" +Merges multiple models and their dependencies into a single model +using multiple merge yaml documents in a single yaml file as the input +""" +import logging import os import sys -import logging from pathlib import Path import click import yaml -from mergekit.merge import MergeOptions, run_merge from mergekit.config import MergeConfiguration +from mergekit.merge import MergeOptions, run_merge from mergekit.options import add_merge_options merges = {} def has_circular_dependency(nodes): + """ + Detects circular in merges dependencies using DFS + Returns the node where the cycle is detected + """ + def dfs(node, visited, stack): + """ + Returns True if a cycle is detected + """ visited[node] = True stack[node] = True @@ -42,6 +54,15 @@ def dfs(node, visited, stack): def merge(m, merge_options, force, out_path): + """ + Merges a model and its dependencies + + Params: + m: name of the model to merge + merge_options: MergeOptions + force: overwrite existing merge results + out_path: output path + """ # check if output_path exists if os.path.exists(out_path / m): if not force: @@ -65,6 +86,23 @@ def merge(m, merge_options, force, out_path): del merges[m] +def add_model_deps(model, name, out_path): + """ + Adds a model to `name`s dependencies if it is not already there and is a merge + """ + if "model" in model and model["model"] is not None: + model_lora = model["model"].split("+") + # name must not have a slash to avoid path traversal + # therefore, we can use it to check if its a merge from the config + if "/" not in model_lora[0]: + # avoid duplicate deps + if model_lora[0] not in merges[name]["deps"]: + merges[name]["deps"].append(model_lora[0]) + model["model"] = str(out_path / model_lora[0]) + if len(model_lora) == 2: + model["model"] += "+" + model_lora[1] + + @click.command("mergekit-mega") @click.argument("config_file") @click.argument("out_path") @@ -87,6 +125,10 @@ def main( force: bool, verbose: bool, ): + """ + Main entrypoint for mergekit-mega command see module docstring for more info + Params are supplied by click decorators + """ logging.basicConfig(level=logging.INFO if verbose else logging.WARNING) out_path = Path(out_path) @@ -103,30 +145,10 @@ def main( if "slices" in d: for slc in d["slices"]: for src in slc["sources"]: - if "model" in src and src["model"] is not None: - model_lora = src["model"].split("+") - # name must not have a slash to avoid path traversal - # therefore, we can use it to check if its a merge from the config - if "/" not in model_lora[0]: - # avoid duplicate deps - if model_lora[0] not in merges[d["name"]]["deps"]: - merges[d["name"]]["deps"].append(model_lora[0]) - src["model"] = str(out_path / model_lora[0]) - if len(model_lora) == 2: - src["model"] += "+" + model_lora[1] + add_model_deps(src, d["name"], out_path) if "models" in d: for mdl in d["models"]: - if "model" in mdl and mdl["model"] is not None: - model_lora = mdl["model"].split("+") - # name must not have a slash to avoid path traversal - # therefore, we can use it to check if its a merge from the config - if "/" not in model_lora[0]: - # avoid duplicate deps - if model_lora[0] not in merges[d["name"]]["deps"]: - merges[d["name"]]["deps"].append(model_lora[0]) - mdl["model"] = str(out_path / model_lora[0]) - if len(model_lora) == 2: - mdl["model"] += "+" + model_lora[1] + add_model_deps(mdl, d["name"], out_path) logging.info("Merging: %s", ", ".join(merges)) @@ -136,7 +158,7 @@ def main( while len(merges) != 0: m = list(merges.keys())[0] - merge(m, merge_options, force) + merge(m, merge_options, force, out_path) if __name__ == "__main__": From e0307333c736e9dcf1890bef0b1a50d12d252864 Mon Sep 17 00:00:00 2001 From: Carsten Date: Fri, 5 Jan 2024 14:50:19 +0000 Subject: [PATCH 12/12] fix(mega): allow other merges as base_model --- mergekit/scripts/megamerge.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/mergekit/scripts/megamerge.py b/mergekit/scripts/megamerge.py index c4ef5313..02ea0f2e 100644 --- a/mergekit/scripts/megamerge.py +++ b/mergekit/scripts/megamerge.py @@ -90,17 +90,18 @@ def add_model_deps(model, name, out_path): """ Adds a model to `name`s dependencies if it is not already there and is a merge """ - if "model" in model and model["model"] is not None: - model_lora = model["model"].split("+") - # name must not have a slash to avoid path traversal - # therefore, we can use it to check if its a merge from the config - if "/" not in model_lora[0]: - # avoid duplicate deps - if model_lora[0] not in merges[name]["deps"]: - merges[name]["deps"].append(model_lora[0]) - model["model"] = str(out_path / model_lora[0]) - if len(model_lora) == 2: - model["model"] += "+" + model_lora[1] + model_lora = model.split("+") + # name must not have a slash to avoid path traversal + # therefore, we can use it to check if its a merge from the config + print(model_lora) + if "/" not in model_lora[0]: + print(model_lora) + # avoid duplicate deps + if model_lora[0] not in merges[name]["deps"]: + merges[name]["deps"].append(model_lora[0]) + model = str(out_path / model_lora[0]) + if len(model_lora) == 2: + model += "+" + model_lora[1] @click.command("mergekit-mega") @@ -142,13 +143,17 @@ def main( merges[d["name"]] = d merges[d["name"]]["deps"] = [] + if "base_model" in d: + add_model_deps(d["base_model"], d["name"], out_path) + if "/" not in d["base_model"]: + d["base_model"] = str(out_path / d["base_model"]) if "slices" in d: for slc in d["slices"]: for src in slc["sources"]: - add_model_deps(src, d["name"], out_path) + add_model_deps(src["model"], d["name"], out_path) if "models" in d: for mdl in d["models"]: - add_model_deps(mdl, d["name"], out_path) + add_model_deps(mdl["model"], d["name"], out_path) logging.info("Merging: %s", ", ".join(merges))