orobix · lorenzomammana · Feb 8, 2024 · Jan 19, 2024 · Jan 19, 2024 · Jan 19, 2024
diff --git a/docs/tutorials/devices_setup.md b/docs/tutorials/devices_setup.md
@@ -12,7 +12,6 @@ devices: [0]
 accelerator: gpu
 min_epochs: 1
 max_epochs: 10
-resume_from_checkpoint: null
 log_every_n_steps: 10
 ```
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "quadra"
-version = "2.0.0a3"
+version = "2.0.0a7"
 description = "Deep Learning experiment orchestration library"
 authors = [
   "Federico Belotti <federico.belotti@orobix.com>",
@@ -40,15 +40,16 @@ quadra = "quadra.main:main"
 [tool.poetry.dependencies]
 python = ">=3.9,<3.11"
 poetry = "1.7.1"
-# This will make hydra-plugins available also when running with poetry install
-hydra-plugins = { path = "quadra_hydra_plugin", optional = true }
-# TODO: We could support previous torch version using mutually exclusive python version but it's bad...
-# TODO: Right now it seems that poetry will download every kind of possible dependency from cu116
-# To make it faster we could hardcode the correct version of the dependencies
-torch = { version = "~2.1", source = "torch_cu121" }
+
+torch = [
+  { url = "https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp310-cp310-linux_x86_64.whl", markers = "sys_platform == 'linux' and python_version == '3.10'" },
+  { url = "https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp310-cp310-win_amd64.whl", markers = "sys_platform == 'win32' and python_version == '3.10'" },
+  { url = "https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp39-cp39-linux_x86_64.whl", markers = "sys_platform == 'linux' and python_version == '3.9'" },
+  { url = "https://download.pytorch.org/whl/cu121/torch-2.1.2%2Bcu121-cp39-cp39-win_amd64.whl", markers = "sys_platform == 'win32' and python_version == '3.9'" },
+]
 torchvision = { version = "~0.16", source = "torch_cu121" }
 
-pytorch_lightning = "1.9.5"
+pytorch_lightning = "~2.1"
 torchsummary = "~1.5"
 torchmetrics = "~0.10"
 hydra_core = "~1.3"
@@ -78,7 +79,7 @@ h5py = "~3.8"
 timm = "0.9.12"
 # Right now only this ref supports timm 0.9.12
 segmentation_models_pytorch = { git = "https://github.com/qubvel/segmentation_models.pytorch", rev = "7b381f899ed472a477a89d381689caf535b5d0a6" }
-anomalib = { git = "https://github.com/orobix/anomalib.git", tag = "v0.7.0+obx.1.2.11" }
+anomalib = { git = "https://github.com/orobix/anomalib.git", tag = "v0.7.0+obx.1.3.0" }
 xxhash = "~3.2"
 torchinfo = "~1.8"
 
@@ -130,6 +131,12 @@ name = "onnx_cu12"
 url = "https://pkgs.dev.azure.com/onnxruntime/onnxruntime/_packaging/onnxruntime-cuda-12/pypi/simple/"
 priority = "explicit"
 
+[tool.poetry.group.dev]
+optional = true
+
+[tool.poetry.group.dev.dependencies]
+hydra-plugins = { path = "quadra_hydra_plugin" }
+
 [tool.poetry.extras]
 dev = [
   "black",

diff --git a/quadra/__init__.py b/quadra/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "2.0.0a3"
+__version__ = "2.0.0a7"
 
 
 def get_version():

diff --git a/quadra/configs/experiment/base/anomaly/cfa.yaml b/quadra/configs/experiment/base/anomaly/cfa.yaml
@@ -42,43 +42,6 @@ logger:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: gpu # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   devices: [0]
-  enable_checkpointing: true
-  default_root_dir: null
-  gradient_clip_val: 0
-  gradient_clip_algorithm: norm
-  num_nodes: 1
-  enable_progress_bar: true
-  overfit_batches: 0.0
-  track_grad_norm: -1
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
-  fast_dev_run: false
-  accumulate_grad_batches: 1
-  max_epochs: 30
-  min_epochs: null
-  max_steps: -1
-  min_steps: null
-  max_time: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  limit_predict_batches: 1.0
   val_check_interval: 1.0 # Don't validate before extracting features.
-  log_every_n_steps: 50
-  strategy: null
-  sync_batchnorm: false
-  precision: 32
-  enable_model_summary: true
-  num_sanity_val_steps: 0
-  profiler: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_n_epochs: 0
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  detect_anomaly: false
-  auto_scale_batch_size: false
-  plugins: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
diff --git a/quadra/configs/experiment/base/anomaly/cflow.yaml b/quadra/configs/experiment/base/anomaly/cflow.yaml
@@ -41,43 +41,7 @@ logger:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: gpu # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   devices: [0]
-  enable_checkpointing: true
-  default_root_dir: null
-  gradient_clip_val: 0
-  gradient_clip_algorithm: norm
-  num_nodes: 1
-  enable_progress_bar: true
-  overfit_batches: 0.0
-  track_grad_norm: -1
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
-  fast_dev_run: false
-  accumulate_grad_batches: 1
-  max_epochs: 50
-  min_epochs: null
-  max_steps: -1
-  min_steps: null
-  max_time: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  limit_predict_batches: 1.0
   val_check_interval: 1.0 # Don't validate before extracting features.
-  log_every_n_steps: 50
-  strategy: null
-  sync_batchnorm: false
-  precision: 32
-  enable_model_summary: true
-  num_sanity_val_steps: 0
-  profiler: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_n_epochs: 0
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  detect_anomaly: false
-  auto_scale_batch_size: false
-  plugins: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
+  max_epochs: 50
diff --git a/quadra/configs/experiment/base/anomaly/csflow.yaml b/quadra/configs/experiment/base/anomaly/csflow.yaml
@@ -40,43 +40,9 @@ logger:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: gpu # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   devices: [0]
-  enable_checkpointing: true
-  default_root_dir: null
   gradient_clip_val: 1 # Grad clip value set based on the official implementation
   gradient_clip_algorithm: norm
-  num_nodes: 1
-  enable_progress_bar: true
-  overfit_batches: 0.0
-  track_grad_norm: -1
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
-  fast_dev_run: false
-  accumulate_grad_batches: 1
-  max_epochs: 240
-  min_epochs: null
-  max_steps: -1
-  min_steps: null
-  max_time: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  limit_predict_batches: 1.0
   val_check_interval: 1.0 # Don't validate before extracting features.
-  log_every_n_steps: 50
-  strategy: null
-  sync_batchnorm: false
-  precision: 32
-  enable_model_summary: true
-  num_sanity_val_steps: 0
-  profiler: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_n_epochs: 0
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  detect_anomaly: false
-  auto_scale_batch_size: false
-  plugins: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
+  max_epochs: 240
diff --git a/quadra/configs/experiment/base/anomaly/draem.yaml b/quadra/configs/experiment/base/anomaly/draem.yaml
@@ -43,43 +43,9 @@ logger:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: gpu # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   devices: [0]
-  enable_checkpointing: true
-  default_root_dir: null
   gradient_clip_val: 0
   gradient_clip_algorithm: norm
-  num_nodes: 1
-  enable_progress_bar: true
-  overfit_batches: 0.0
-  track_grad_norm: -1
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
-  fast_dev_run: false
-  accumulate_grad_batches: 1
   max_epochs: 700
-  min_epochs: null
-  max_steps: -1
-  min_steps: null
-  max_time: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  limit_predict_batches: 1.0
   val_check_interval: 1.0 # Don't validate before extracting features.
-  log_every_n_steps: 50
-  strategy: null
-  sync_batchnorm: false
-  precision: 32
-  enable_model_summary: true
-  num_sanity_val_steps: 0
-  profiler: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_n_epochs: 0
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  detect_anomaly: false
-  auto_scale_batch_size: false
-  plugins: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
diff --git a/quadra/configs/experiment/base/anomaly/efficient_ad.yaml b/quadra/configs/experiment/base/anomaly/efficient_ad.yaml
@@ -33,44 +33,11 @@ logger:
 
 trainer:
   devices: [2]
-  accelerator: auto
-  strategy:
-  accumulate_grad_batches: 1
-  amp_backend: native
-  auto_lr_find: false
-  auto_scale_batch_size: false
-  auto_select_gpus: false
-  benchmark: false
   check_val_every_n_epoch: ${trainer.max_epochs}
-  default_root_dir: null
-  detect_anomaly: false
-  deterministic: false
-  enable_checkpointing: true
-  enable_model_summary: true
-  enable_progress_bar: true
-  fast_dev_run: false
-  gradient_clip_val: 0
-  ipus: null
-  limit_predict_batches: 1.0
-  limit_test_batches: 1.0
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  log_every_n_steps: 50
   max_epochs: 20
   max_steps: 20000
-  max_time: null
-  min_epochs: null
-  min_steps: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
-  num_nodes: 1
-  num_sanity_val_steps: 0
-  overfit_batches: 0.0
-  plugins: null
-  precision: 32
-  profiler: null
-  replace_sampler_ddp: true
-  sync_batchnorm: false
-  tpu_cores: null
-  track_grad_norm: -1
   val_check_interval: 1.0 # Don't validate before extracting features.
+  # This will avoid issues with ModelSignatureWrapper
+  # As the default forward for EfficientAD is performed with a None attribute
+  # Which we currently can't handle
+  num_sanity_val_steps: 0
diff --git a/quadra/configs/experiment/base/anomaly/fastflow.yaml b/quadra/configs/experiment/base/anomaly/fastflow.yaml
@@ -37,45 +37,10 @@ logger:
     experiment_name:
     run_name: ${core.name}
 
-# PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: gpu # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   devices: [0]
-  enable_checkpointing: true
-  default_root_dir: null
   gradient_clip_val: 0
   gradient_clip_algorithm: norm
-  num_nodes: 1
-  enable_progress_bar: true
-  overfit_batches: 0.0
-  track_grad_norm: -1
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
-  fast_dev_run: false
-  accumulate_grad_batches: 1
-  max_epochs: 500
-  min_epochs: null
-  max_steps: -1
-  min_steps: null
-  max_time: null
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  limit_test_batches: 1.0
-  limit_predict_batches: 1.0
   val_check_interval: 1.0 # Don't validate before extracting features.
-  log_every_n_steps: 50
-  strategy: null
-  sync_batchnorm: false
-  precision: 32
-  enable_model_summary: true
-  num_sanity_val_steps: 0
-  profiler: null
-  benchmark: false
-  deterministic: false
-  reload_dataloaders_every_n_epochs: 0
-  auto_lr_find: false
-  replace_sampler_ddp: true
-  detect_anomaly: false
-  auto_scale_batch_size: false
-  plugins: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
+  max_epochs: 500
diff --git a/quadra/configs/experiment/base/anomaly/padim.yaml b/quadra/configs/experiment/base/anomaly/padim.yaml
@@ -33,44 +33,5 @@ logger:
 
 trainer:
   devices: [2]
-  accelerator: auto
-  strategy:
-  accumulate_grad_batches: 1
-  amp_backend: native
-  auto_lr_find: false
-  auto_scale_batch_size: false
-  auto_select_gpus: false
-  benchmark: false
   check_val_every_n_epoch: ${trainer.max_epochs} # Don't validate before extracting features.
-  default_root_dir: null
-  detect_anomaly: false
-  deterministic: false
-  enable_checkpointing: true
-  enable_model_summary: true
-  enable_progress_bar: true
-  fast_dev_run: false
-  gradient_clip_val: 0
-  ipus: null
-  limit_predict_batches: 1.0
-  limit_test_batches: 1.0
-  limit_train_batches: 1.0
-  limit_val_batches: 1.0
-  log_every_n_steps: 50
   max_epochs: 1
-  max_steps: -1
-  max_time: null
-  min_epochs: null
-  min_steps: null
-  move_metrics_to_cpu: false
-  multiple_trainloader_mode: max_size_cycle
-  num_nodes: 1
-  num_sanity_val_steps: 0
-  overfit_batches: 0.0
-  plugins: null
-  precision: 32
-  profiler: null
-  replace_sampler_ddp: true
-  sync_batchnorm: false
-  tpu_cores: null
-  track_grad_norm: -1
-  val_check_interval: 1.0 # Don't validate before extracting features.