From ed4031e2b9da445c634f10bdaa79f8f5c95ffcd2 Mon Sep 17 00:00:00 2001 From: riffmaster <173845832+riffmaster-2001@users.noreply.github.com> Date: Mon, 26 Aug 2024 21:54:01 -0700 Subject: [PATCH 1/3] fixing typo in flux document for preserve_data_backend_cache key --- documentation/DATALOADER.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/DATALOADER.md b/documentation/DATALOADER.md index dcb9740a..2c5ddc89 100644 --- a/documentation/DATALOADER.md +++ b/documentation/DATALOADER.md @@ -203,7 +203,7 @@ Images are not resized before cropping **unless** `maximum_image_size` and `targ - This is equivalent to the commandline option `--skip_file_discovery` - This is helpful if you have datasets you don't need the trainer to scan on every startup, eg. their latents/embeds are already cached fully. This allows quicker startup and resumption of training. -### `preserve_data_cache_backend` +### `preserve_data_backend_cache` - You probably don't want to ever set this - it is useful only for very large AWS datasets. - Like `skip_file_discovery`, this option can be set to prevent unnecessary, lengthy and costly filesystem scans at startup. From a62afcabc58d57dcf53cd00aec5cbf06938b504c Mon Sep 17 00:00:00 2001 From: bghira Date: Tue, 27 Aug 2024 08:26:44 -0600 Subject: [PATCH 2/3] cosineannealinghardrestarts needs last_step set for newer accelerate preparation --- helpers/training/custom_schedule.py | 1 + 1 file changed, 1 insertion(+) diff --git a/helpers/training/custom_schedule.py b/helpers/training/custom_schedule.py index 63ebb4d4..a2d6fe3b 100644 --- a/helpers/training/custom_schedule.py +++ b/helpers/training/custom_schedule.py @@ -369,6 +369,7 @@ def __init__( self.T_mult = T_mult self.eta_min = eta_min self.T_cur = last_step + self.last_step = last_step super().__init__(optimizer, last_step, verbose) def get_lr(self): From cebac83bfb7202e9804a1d712cac3bb6d363855a Mon Sep 17 00:00:00 2001 From: bghira Date: Tue, 27 Aug 2024 11:50:59 -0600 Subject: [PATCH 3/3] reintroduce timestep dependent shift as an option during flux training for dev and schnell, disabled by default --- helpers/arguments.py | 13 +++++++++++++ train.py | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/helpers/arguments.py b/helpers/arguments.py index 7a2697f8..b2182c13 100644 --- a/helpers/arguments.py +++ b/helpers/arguments.py @@ -140,6 +140,19 @@ def parse_args(input_args=None): " which has improved results in short experiments. Thanks to @mhirki for the contribution." ), ) + parser.add_argument( + "--flux_schedule_shift", + type=float, + default=None, + help=( + "Shift the noise schedule. This is a value between 0 and ~4.0, where 0 disables the timestep-dependent shift," + " and anything greater than 0 will shift the timestep sampling accordingly. The SD3 model was trained with" + " a shift value of 3. The value for Flux is unknown. Higher values result in less noisy timesteps sampled," + " which results in a lower mean loss value, but not necessarily better results. Early reports indicate" + " that modification of this value can change how the contrast is learnt by the model, and whether fine" + " details are ignored or accentuated." + ), + ) parser.add_argument( "--flux_guidance_mode", type=str, diff --git a/train.py b/train.py index ff58ac0c..07dc478c 100644 --- a/train.py +++ b/train.py @@ -1670,6 +1670,13 @@ def main(): device=accelerator.device, ) timesteps = sigmas * 1000.0 + if ( + args.flux_schedule_shift is not None + and args.flux_schedule_shift > 0 + ): + timesteps = (timesteps * args.flux_schedule_shift) / ( + 1 + (args.flux_schedule_shift - 1) * timesteps + ) sigmas = sigmas.view(-1, 1, 1, 1) else: # Sample a random timestep for each image, potentially biased by the timestep weights.