Turn off resizing images with --resize=False (#71)

* Make image resize optional with --resize Toggle off image resizing using --resize=False. Default is true for to maintain consistent operation. * Make image resize optional with --resize Toggle off image resizing using --resize=False. Default is true for to maintain consistent operation. * Make image resize optional with --resize Toggle off image resizing using --resize=False. Default is true for to maintain consistent operation.
cloneofsimo · Dec 24, 2022 · 39affb7 · 39affb7
1 parent 4869fe3
commit 39affb7
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 4 deletions.
diff --git a/train_lora_dreambooth.py b/train_lora_dreambooth.py
@@ -62,10 +62,12 @@ def __init__(
         size=512,
         center_crop=False,
         color_jitter=False,
+        resize=False,
     ):
         self.size = size
         self.center_crop = center_crop
         self.tokenizer = tokenizer
+        self.resize = resize
 
         self.instance_data_root = Path(instance_data_root)
         if not self.instance_data_root.exists():
@@ -90,7 +92,9 @@ def __init__(
             [
                 transforms.Resize(
                     size, interpolation=transforms.InterpolationMode.BILINEAR
-                ),
+                ) 
+                if resize
+                else transforms.Lambda(lambda x: x),
                 transforms.CenterCrop(size)
                 if center_crop
                 else transforms.RandomCrop(size),
@@ -419,6 +423,13 @@ def parse_args(input_args=None):
         default=None,
         help=("File path for text encoder lora to resume training."),
     )
+    parser.add_argument(
+        "--resize",
+        type=bool,
+        default=True,
+        required=False,
+        help="Should images be resized to --resolution before training?"
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -648,6 +659,8 @@ def main(args):
         size=args.resolution,
         center_crop=args.center_crop,
         color_jitter=args.color_jitter,
+        resize=args.resize,
+
     )
 
     def collate_fn(examples):

diff --git a/train_lora_pt_caption.py b/train_lora_pt_caption.py
@@ -78,10 +78,12 @@ def __init__(
         size=512,
         center_crop=False,
         color_jitter=False,
+        resize=False,
     ):
         self.size = size
         self.center_crop = center_crop
         self.tokenizer = tokenizer
+        self.resize = resize
 
         self.instance_data_root = Path(instance_data_root)
         if not self.instance_data_root.exists():
@@ -109,7 +111,9 @@ def __init__(
             [
                 transforms.Resize(
                     size, interpolation=transforms.InterpolationMode.BILINEAR
-                ),
+                ) 
+                if resize
+                else transforms.Lambda(lambda x: x),
                 transforms.CenterCrop(size)
                 if center_crop
                 else transforms.RandomCrop(size),
@@ -482,6 +486,13 @@ def parse_args(input_args=None):
         action="store_true",
         help="Debug to see just ti",
     )
+    parser.add_argument(
+        "--resize",
+        type=bool,
+        default=True,
+        required=False,
+        help="Should images be resized to --resolution before training?"
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -749,6 +760,7 @@ def main(args):
         size=args.resolution,
         center_crop=args.center_crop,
         color_jitter=args.color_jitter,
+        resize=args.resize,
     )
 
     def collate_fn(examples):

diff --git a/train_lora_w_ti.py b/train_lora_w_ti.py
@@ -131,10 +131,13 @@ def __init__(
         size=512,
         center_crop=False,
         color_jitter=False,
+        resize=False,
     ):
         self.size = size
         self.center_crop = center_crop
         self.tokenizer = tokenizer
+        self.resize = resize
+
 
         self.instance_data_root = Path(instance_data_root)
         if not self.instance_data_root.exists():
@@ -168,7 +171,9 @@ def __init__(
             [
                 transforms.Resize(
                     size, interpolation=transforms.InterpolationMode.BILINEAR
-                ),
+                ) 
+                if resize
+                else transforms.Lambda(lambda x: x),
                 transforms.CenterCrop(size)
                 if center_crop
                 else transforms.RandomCrop(size),
@@ -545,6 +550,13 @@ def parse_args(input_args=None):
         action="store_true",
         help="Debug to see just ti",
     )
+    parser.add_argument(
+        "--resize",
+        type=bool,
+        default=True,
+        required=False,
+        help="Should images be resized to --resolution before training?"
+    )
 
     if input_args is not None:
         args = parser.parse_args(input_args)
@@ -812,6 +824,7 @@ def main(args):
         size=args.resolution,
         center_crop=args.center_crop,
         color_jitter=args.color_jitter,
+        resize=args.resize,
     )
 
     def collate_fn(examples):
@@ -1104,7 +1117,6 @@ def collate_fn(examples):
 
                 if global_step >= args.max_train_steps:
                     break
-
     accelerator.wait_for_everyone()
 
     # Create the pipeline using using the trained modules and save it.