From ff61f7c088487dd30af567af3eb0f796ce7149e1 Mon Sep 17 00:00:00 2001 From: mzouink Date: Wed, 7 Feb 2024 17:27:21 -0500 Subject: [PATCH 1/3] bug fixes and better logs --- .../datasplits/datasets/arrays/concat_array.py | 7 ++++++- .../datasplits/datasets/arrays/dvid_array.py | 2 +- .../datasplits/datasets/arrays/numpy_array.py | 2 +- .../datasplits/datasets/arrays/zarr_array.py | 2 +- dacapo/experiments/model.py | 2 +- dacapo/train.py | 10 ++++++++-- dacapo/validate.py | 2 ++ setup.py | 4 ++++ 8 files changed, 24 insertions(+), 7 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 122526b1..1475c7b9 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -5,6 +5,9 @@ import numpy as np from typing import Dict, Any +import logging + +logger = logging.getLogger(__file__) class ConcatArray(Array): @@ -116,5 +119,7 @@ def __getitem__(self, roi: Roi) -> np.ndarray: axis=0, ) if concatenated.shape[0] == 1: - raise Exception(f"{concatenated.shape}, shapes") + logger.info( + f"Concatenated array has only one channel: {self.name} {concatenated.shape}" + ) return concatenated diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py index beaa474d..e08ffe56 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py @@ -41,7 +41,7 @@ def attrs(self): @property def axes(self): - return ["t", "z", "y", "x"][-self.dims :] + return ["c", "z", "y", "x"][-self.dims :] @property def dims(self) -> int: diff --git a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py index 7101d737..5f2bc048 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py @@ -35,7 +35,7 @@ def from_gp_array(cls, array: gp.Array): ((["b", "c"] if len(array.data.shape) == instance.dims + 2 else [])) + (["c"] if len(array.data.shape) == instance.dims + 1 else []) + [ - "t", + "c", "z", "y", "x", diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py index cadfcb6c..42030e70 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py @@ -54,7 +54,7 @@ def axes(self): f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" f"Using default {['t', 'z', 'y', 'x'][-self.dims::]}", ) - return ["t", "z", "y", "x"][-self.dims : :] + return ["c", "z", "y", "x"][-self.dims : :] @property def dims(self) -> int: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index bbaacb2d..fe1f8e7d 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -24,7 +24,7 @@ def __init__( self, architecture: Architecture, prediction_head: torch.nn.Module, - eval_activation: torch.nn.Module = None, + eval_activation: torch.nn.Module | None = None, ): super().__init__() diff --git a/dacapo/train.py b/dacapo/train.py index 9203c1be..86473ee3 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -16,6 +16,7 @@ def train(run_name: str, compute_context: ComputeContext = LocalTorch()): """Train a run""" if compute_context.train(run_name): + logger.error("Run %s is already being trained", run_name) # if compute context runs train in some other process # we are done here. return @@ -96,10 +97,15 @@ def train_run( weights_store.retrieve_weights(run, iteration=trained_until) elif latest_weights_iteration > trained_until: - raise RuntimeError( + weights_store.retrieve_weights(run, iteration=latest_weights_iteration) + logger.error( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}." ) + # raise RuntimeError( + # f"Found weights for iteration {latest_weights_iteration}, but " + # f"run {run.name} was only trained until {trained_until}." + # ) # start/resume training @@ -157,7 +163,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - run.model = run.model.to(torch.device("cpu")) + # run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) weights_store.store_weights(run, iteration_stats.iteration + 1) diff --git a/dacapo/validate.py b/dacapo/validate.py index 25b7463e..a1cf9da7 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -141,6 +141,7 @@ def validate_run( prediction_array_identifier = array_store.validation_prediction_array( run.name, iteration, validation_dataset ) + logger.info("Predicting on dataset %s", validation_dataset.name) predict( run.model, validation_dataset.raw, @@ -148,6 +149,7 @@ def validate_run( compute_context=compute_context, output_roi=validation_dataset.gt.roi, ) + logger.info("Predicted on dataset %s", validation_dataset.name) post_processor.set_prediction(prediction_array_identifier) diff --git a/setup.py b/setup.py index 3ba1f0d0..b38a41ed 100644 --- a/setup.py +++ b/setup.py @@ -36,5 +36,9 @@ "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", "lsds>=0.1.3", + "xarray", + "cattrs", + "numpy-indexed", + "click", ], ) From 149044093f96790ac88e0fad700017f1521325e8 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 17:38:46 -0500 Subject: [PATCH 2/3] Update train.py --- dacapo/train.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/dacapo/train.py b/dacapo/train.py index 86473ee3..cc020794 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -102,10 +102,6 @@ def train_run( f"Found weights for iteration {latest_weights_iteration}, but " f"run {run.name} was only trained until {trained_until}." ) - # raise RuntimeError( - # f"Found weights for iteration {latest_weights_iteration}, but " - # f"run {run.name} was only trained until {trained_until}." - # ) # start/resume training From 3c5f2da14348fbde081d5a6e190a4984645bfd08 Mon Sep 17 00:00:00 2001 From: Marwan Zouinkhi Date: Wed, 7 Feb 2024 17:39:21 -0500 Subject: [PATCH 3/3] Update train.py --- dacapo/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/train.py b/dacapo/train.py index cc020794..e8667d8b 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -159,7 +159,7 @@ def train_run( run.model.eval() # free up optimizer memory to allow larger validation blocks - # run.model = run.model.to(torch.device("cpu")) + run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) weights_store.store_weights(run, iteration_stats.iteration + 1)