janelia-cellmap · rhoadesScholar · Mar 20, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.hdf
 *.h5
 # *.ipynb
+.ipynb_checkpoints/
 *.pyc
 *.egg-info
 *.dat

diff --git a/dacapo/blockwise/predict_worker.py b/dacapo/blockwise/predict_worker.py
@@ -1,5 +1,6 @@
 import sys
 from pathlib import Path
+from typing import Optional
 
 import torch
 from dacapo.experiments.datasplits.datasets.arrays import ZarrArray
@@ -45,9 +46,9 @@ def cli(log_level):
 @click.option(
     "-i",
     "--iteration",
-    required=True,
-    type=int,
+    type=Optional[int],
     help="The training iteration of the model to use for prediction.",
+    default=None,
 )
 @click.option(
     "-ic",
@@ -62,7 +63,7 @@ def cli(log_level):
 @click.option("-od", "--output_dataset", required=True, type=str)
 def start_worker(
     run_name: str,
-    iteration: int,
+    iteration: int | None,
     input_container: Path | str,
     input_dataset: str,
     output_container: Path | str,
@@ -76,11 +77,12 @@ def start_worker(
     run_config = config_store.retrieve_run_config(run_name)
     run = Run(run_config)
 
-    # create weights store
-    weights_store = create_weights_store()
+    if iteration is not None:
+        # create weights store
+        weights_store = create_weights_store()
 
-    # load weights
-    weights_store.retrieve_weights(run_name, iteration)
+        # load weights
+        weights_store.retrieve_weights(run_name, iteration)
 
     # get arrays
     input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset)
@@ -178,15 +180,15 @@ def start_worker(
 
 def spawn_worker(
     run_name: str,
-    iteration: int,
+    iteration: int | None,
     input_array_identifier: "LocalArrayIdentifier",
     output_array_identifier: "LocalArrayIdentifier",
 ):
     """Spawn a worker to predict on a given dataset.
 
     Args:
         run_name (str): The name of the run to apply.
-        iteration (int): The training iteration of the model to use for prediction.
+        iteration (int or None): The training iteration of the model to use for prediction.
         input_array_identifier (LocalArrayIdentifier): The raw data to predict on.
         output_array_identifier (LocalArrayIdentifier): The identifier of the prediction array.
     """
@@ -200,8 +202,6 @@ def spawn_worker(
         "start-worker",
         "--run-name",
         run_name,
-        "--iteration",
-        iteration,
         "--input_container",
         input_array_identifier.container,
         "--input_dataset",
@@ -211,6 +211,8 @@ def spawn_worker(
         "--output_dataset",
         output_array_identifier.dataset,
     ]
+    if iteration is not None:
+        command.extend(["--iteration", str(iteration)])
 
     print("Defining worker with command: ", compute_context.wrap_command(command))
 

diff --git a/dacapo/examples/distance_task/cosem_example.ipynb b/dacapo/examples/distance_task/cosem_example.ipynb
@@ -252,7 +252,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/dacapo/examples/distance_task/cosem_example.py b/dacapo/examples/distance_task/cosem_example.py
@@ -1,4 +1,3 @@
-
 # %% [markdown]
 # # Dacapo
 #

diff --git a/dacapo/examples/distance_task/cosem_example_fill_in_the_blank.py b/dacapo/examples/distance_task/cosem_example_fill_in_the_blank.py
@@ -26,8 +26,7 @@
 # Create the datasplit, produce the neuroglancer link and store the datasplit
 datasplit = ...
 viewer = ...
-config_store...
-
+config_store
 # %% [markdown]
 # ## Task
 # What do you want to learn? An instance segmentation? If so, how? Affinities,
@@ -40,9 +39,8 @@
 
 # Create a distance task config where the clip_distance=tol_distance=10x the output resolution,
 # and scale_factor = 20x the output resolution
-task_config = 
-config_store....
-
+task_config = ...
+config_store
 # %% [markdown]
 # ## Architecture
 #
@@ -97,14 +95,14 @@
         # Create a gamma augment config with range .5 to 2
         ...,
         # Create an intensity scale shift agument config to rescale data from the range 0->1 to -1->1
-       ...,
+        ...,
     ],
     snapshot_interval=10000,
     min_masked=0.05,
     clip_raw=True,
 )
 # Store the trainer
-config_store....
+config_store
 
 # %% [markdown]
 # ## Run
@@ -128,7 +126,7 @@
 run_config = ...
 
 print(run_config.name)
-config_store...
+config_store
 
 # %% [markdown]
 # ## Train
@@ -138,6 +136,7 @@
 # %%
 from dacapo.train import train_run
 from dacapo.experiments.run import Run
+
 # load the run and train it
-run = Run(config_store...)
+run = Run(config_store)
 train_run(run)
diff --git a/dacapo/examples/distance_task/synthetic_example.py b/dacapo/examples/distance_task/synthetic_example.py
@@ -137,7 +137,7 @@
 
 datasplit = datasplit_config.datasplit_type(datasplit_config)
 viewer = datasplit._neuroglancer()
-config_store.store_datasplit_config(datasplit_config)
+# config_store.store_datasplit_config(datasplit_config)
 
 # %% [markdown]
 # The above datasplit_generator automates a lot of the heavy lifting for configuring data to set up a run. The following shows everything that it is doing, and an equivalent way to set up the datasplit.
@@ -232,7 +232,7 @@
     tol_distance=80.0,
     scale_factor=160.0,
 )
-config_store.store_task_config(task_config)
+# config_store.store_task_config(task_config)
 
 # %% [markdown]
 # ## Architecture
@@ -252,11 +252,11 @@
     downsample_factors=[(2, 2, 2), (2, 2, 2), (2, 2, 2)],
     eval_shape_increase=(72, 72, 72),
 )
-try:
-    config_store.store_architecture_config(architecture_config)
-except:
-    config_store.delete_architecture_config(architecture_config.name)
-    config_store.store_architecture_config(architecture_config)
+# try:
+#     config_store.store_architecture_config(architecture_config)
+# except:
+#     config_store.delete_architecture_config(architecture_config.name)
+#     config_store.store_architecture_config(architecture_config)
 
 # %% [markdown]
 # ## Trainer
@@ -293,7 +293,7 @@
     min_masked=0.05,
     clip_raw=True,
 )
-config_store.store_trainer_config(trainer_config)
+# config_store.store_trainer_config(trainer_config)
 
 # %% [markdown]
 # ## Run
@@ -311,7 +311,7 @@
 #     "best",
 # )
 
-iterations = 2000
+iterations = 200
 validation_interval = iterations // 2
 repetitions = 1
 for i in range(repetitions):
@@ -376,7 +376,7 @@
 # %%
 from dacapo.validate import validate
 
-validate(run_config.name, iterations, num_workers=16, overwrite=True)
+validate(run_config.name, iterations, num_workers=1, overwrite=True)
 
 # %% [markdown]
 # ## Predict

diff --git a/dacapo/experiments/run.py b/dacapo/experiments/run.py
@@ -59,8 +59,14 @@ def __init__(self, run_config):
             if run_config.start_config is not None
             else None
         )
-        if self.start is not None:
-            self.start.initialize_weights(self.model)
+        if self.start is None:
+            return
+        else:
+            if hasattr(run_config.task_config,"channels"):
+                new_head = run_config.task_config.channels
+            else:
+                new_head = None
+            self.start.initialize_weights(self.model,new_head=new_head)
 
     @staticmethod
     def get_validation_scores(run_config) -> ValidationScores:

diff --git a/dacapo/experiments/starts/cosem_start.py b/dacapo/experiments/starts/cosem_start.py
@@ -2,15 +2,39 @@
 import logging
 from cellmap_models import cosem
 from pathlib import Path
-from .start import Start
+from .start import Start, _set_weights
 
 logger = logging.getLogger(__file__)
 
-
+def get_model_setup(run):
+    try:
+        model = cosem.load_model(run)
+        if hasattr(model, "classes_channels"):
+            classes_channels =  model.classes_channels
+        else:
+            classes_channels = None
+        if hasattr(model, "voxel_size_input"):
+            voxel_size_input = model.voxel_size_input
+        else:
+            voxel_size_input = None
+        if hasattr(model, "voxel_size_output"):
+            voxel_size_output = model.voxel_size_output
+        else:
+            voxel_size_output = None
+        return classes_channels, voxel_size_input, voxel_size_output
+    except Exception as e:
+        logger.error(f"could not load model setup: {e} - Not a big deal, model will train wiithout head matching")
+        return None, None, None
+
 class CosemStart(Start):
     def __init__(self, start_config):
-        super().__init__(start_config)
+        self.run = start_config.run
+        self.criterion = start_config.criterion
         self.name = f"{self.run}/{self.criterion}"
+        channels, voxel_size_input, voxel_size_output = get_model_setup(self.run)
+        if voxel_size_input is not None:
+            logger.warning(f"Starter model resolution: input {voxel_size_input} output {voxel_size_output}, Make sure to set the correct resolution for the input data.")
+        self.channels = channels
 
     def check(self):
         from dacapo.store.create_store import create_weights_store
@@ -25,7 +49,8 @@ def check(self):
         else:
             logger.info(f"Checkpoint for {self.name} exists.")
 
-    def initialize_weights(self, model):
+    def initialize_weights(self, model, new_head=None):
+        self.check()
         from dacapo.store.create_store import create_weights_store
 
         weights_store = create_weights_store()
@@ -36,4 +61,6 @@ def initialize_weights(self, model):
             path = weights_dir / self.criterion
             cosem.download_checkpoint(self.name, path)
         weights = weights_store._retrieve_weights(self.run, self.criterion)
-        super._set_weights(model, weights)
+        _set_weights(model, weights, self.run, self.criterion, self.channels, new_head)
+
+