Dev/main (#207)

janelia-cellmap · Mar 21, 2024 · f292439 · f292439
2 parents b0df46e + 6f156ec
commit f292439
Show file tree

Hide file tree

Showing 14 changed files with 2,926 additions and 490 deletions.
diff --git a/dacapo/blockwise/predict_worker.py b/dacapo/blockwise/predict_worker.py
@@ -82,7 +82,9 @@ def start_worker(
         weights_store = create_weights_store()
 
         # load weights
-        weights_store.retrieve_weights(run_name, iteration)
+        run.model.load_state_dict(
+            weights_store.retrieve_weights(run_name, iteration).model
+        )
 
     # get arrays
     input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset)
@@ -124,7 +126,7 @@ def start_worker(
     pipeline += gp.Normalize(raw)
 
     # predict
-    model.eval()
+    # model.eval()
     pipeline += gp_torch.Predict(
         model=model,
         inputs={"x": raw},

diff --git a/dacapo/utils/view.py b/dacapo/utils/view.py
@@ -1,17 +1,13 @@
-from typing import Optional
 import neuroglancer
 from IPython.display import IFrame
 import numpy as np
-import gunpowder as gp
-from funlib.persistence import Array
-from dacapo.experiments.datasplits.datasets.arrays import ZarrArray
 from funlib.persistence import open_ds
-from threading import Thread
+import threading
 import neuroglancer
 from neuroglancer.viewer_state import ViewerState
 import os
 from dacapo.experiments.run import Run
-from dacapo.store.create_store import create_array_store
+from dacapo.store.create_store import create_array_store, create_stats_store
 from IPython.display import IFrame
 import time
 import copy
@@ -86,25 +82,74 @@ def add_scalar_layer(state, name, data, voxel_size):
     )
 
 
-class NeuroglancerRunViewer:
+class BestScore:
     def __init__(self, run: Run):
         self.run: Run = run
-        self.most_recent_iteration = 0
-        self.prediction = None
+        self.score: float = -1
+        self.iteration: int = 0
+        self.parameter: str = None
+        self.validation_parameters = run.validation_scores.parameters
+
+        self.array_store = create_array_store()
+        self.stats_store = create_stats_store()
+
+    def get_ds(self, iteration, validation_dataset):
+        prediction_array_identifier = self.array_store.validation_prediction_array(
+            self.run.name, iteration, validation_dataset.name
+        )
+        container = str(prediction_array_identifier.container)
+        dataset = str(
+            os.path.join(
+                str(iteration), validation_dataset.name, "output", str(self.parameter)
+            )
+        )
+        self.ds = open_ds(container, dataset)
+
+    def does_new_best_exist(self):
+        new_best_exists = False
+        self.validation_scores = self.stats_store.retrieve_validation_iteration_scores(
+            self.run.name
+        )
+
+        for validation_idx, validation_dataset in enumerate(
+            self.run.datasplit.validate
+        ):
+            for iteration_scores in self.validation_scores:
+                iteration = iteration_scores.iteration
+                for parameter_idx, parameter in enumerate(self.validation_parameters):
+                    # hardcoded for f1_score
+                    current_score = iteration_scores.scores[validation_idx][
+                        parameter_idx
+                    ][20]
+                    if current_score > self.score:
+                        self.iteration = iteration
+                        self.score = current_score
+                        self.parameter = parameter
+                        self.get_ds(iteration, validation_dataset)
+                        new_best_exists = True
+        return new_best_exists
+
+
+class NeuroglancerRunViewer:
+
+    def __init__(self, run: Run, embedded=False):
+        self.run: Run = run
+        self.best_score = BestScore(run)
+        self.embedded = embedded
 
     def updated_neuroglancer_layer(self, layer_name, ds):
         source = neuroglancer.LocalVolume(
             data=ds.data,
             dimensions=neuroglancer.CoordinateSpace(
-                names=["c", "z", "y", "x"],
-                units=["", "nm", "nm", "nm"],
-                scales=[1] + list(ds.voxel_size),
+                names=["z", "y", "x"],
+                units=["nm", "nm", "nm"],
+                scales=list(ds.voxel_size),
             ),
-            voxel_offset=[0] + list(ds.roi.offset),
+            voxel_offset=list(ds.roi.offset),
         )
         new_state = copy.deepcopy(self.viewer.state)
         if len(new_state.layers) == 1:
-            new_state.layers[layer_name] = neuroglancer.ImageLayer(source=source)
+            new_state.layers[layer_name] = neuroglancer.SegmentationLayer(source=source)
         else:
             # replace name everywhere to preserve state, like what is selected
             new_state_str = json.dumps(new_state.to_json())
@@ -113,7 +158,6 @@ def updated_neuroglancer_layer(self, layer_name, ds):
             new_state.layers[layer_name].source = source
 
         self.viewer.set_state(new_state)
-        print(self.viewer.state)
 
     def deprecated_start_neuroglancer(self):
         neuroglancer.set_server_bind_address("0.0.0.0")
@@ -122,6 +166,7 @@ def deprecated_start_neuroglancer(self):
     def start_neuroglancer(self):
         neuroglancer.set_server_bind_address("0.0.0.0")
         self.viewer = neuroglancer.Viewer()
+        print(f"Neuroglancer viewer: {self.viewer}")
         with self.viewer.txn() as state:
             state.showSlices = False
 
@@ -136,9 +181,11 @@ def start_neuroglancer(self):
                     voxel_offset=self.raw.roi.offset,
                 ),
             )
-        return IFrame(src=self.viewer, width=1800, height=900)
+        if self.embedded:
+            return IFrame(src=self.viewer, width=1800, height=900)
 
     def start(self):
+        self.run_thread = True
         self.array_store = create_array_store()
         self.get_datasets()
         self.new_validation_checker()
@@ -152,85 +199,45 @@ def open_from_array_identitifier(self, array_identifier):
 
     def get_datasets(self):
         for validation_dataset in self.run.datasplit.validate:
-            (
-                input_raw_array_identifier,
-                input_gt_array_identifier,
-            ) = self.array_store.validation_input_arrays(
-                self.run.name, validation_dataset.name
-            )
+            raw = validation_dataset.raw._source_array
+            gt = validation_dataset.gt._source_array
+            self.raw = open_ds(str(raw.file_name), raw.dataset)
+            self.gt = open_ds(str(gt.file_name), gt.dataset)
 
-            self.raw = self.open_from_array_identitifier(input_raw_array_identifier)
-            self.gt = self.open_from_array_identitifier(input_gt_array_identifier)
-        print(self.raw)
+    def update_best_info(self):
+        self.segmentation = self.best_score.ds
+        self.most_recent_iteration = self.best_score.iteration
 
-    def update_best_info(self, iteration, validation_dataset_name):
-        prediction_array_identifier = self.array_store.validation_prediction_array(
-            self.run.name,
-            iteration,
-            validation_dataset_name,
-        )
-        self.prediction = self.open_from_array_identitifier(prediction_array_identifier)
-        self.most_recent_iteration = iteration
-
-    def update_neuroglancer(self, iteration):
+    def update_neuroglancer(self):
         self.updated_neuroglancer_layer(
-            f"prediction at iteration {iteration}", self.prediction
+            f"prediction at iteration {self.best_score.iteration}, f1 score {self.best_score.score}",
+            self.segmentation,
         )
         return None
 
-    def update_best(self, iteration, validation_dataset_name):
-        self.update_best_info(iteration, validation_dataset_name)
-        self.update_neuroglancer(iteration)
+    def update_best_layer(self):
+        self.update_best_info()
+        self.update_neuroglancer()
 
     def new_validation_checker(self):
-        self.process = Thread(target=self.update_with_new_validation_if_possible)
-        self.process.daemon = True
-        self.process.start()
+        self.thread = threading.Thread(
+            target=self.update_with_new_validation_if_possible, daemon=True
+        )
+        self.thread.run_thread = True
+        self.thread.start()
 
     def update_with_new_validation_if_possible(self):
+        thread = threading.currentThread()
         # Here we are assuming that we are checking the directory .../valdiation_config/prediction
         # Ideally we will only have to check for the current best validation
-        while True:
-            time.sleep(3)
-            for validation_dataset in self.run.datasplit.validate:
-                most_recent_iteration_previous = self.most_recent_iteration
-                prediction_array_identifier = (
-                    self.array_store.validation_prediction_array(
-                        self.run.name,
-                        self.most_recent_iteration,
-                        validation_dataset.name,
-                    )
+        while getattr(thread, "run_thread", True):
+            time.sleep(10)
+            new_best_exists = self.best_score.does_new_best_exist()
+            if new_best_exists:
+                print(
+                    f"New best f1 score of {self.best_score.score} at iteration {self.best_score.iteration} and parameter {self.best_score.parameter}"
                 )
+                self.update_best_layer()
 
-                container = prediction_array_identifier.container
-                if os.path.exists(container):
-                    iteration_dirs = [
-                        name
-                        for name in os.listdir(container)
-                        if os.path.isdir(os.path.join(container, name))
-                        and name.isnumeric()
-                    ]
-
-                    for iteration_dir in iteration_dirs:
-                        if int(iteration_dir) > self.most_recent_iteration:
-                            inference_dir = os.path.join(
-                                container,
-                                iteration_dir,
-                                "validation_config",
-                                "prediction",
-                            )
-                            if os.path.exists(inference_dir):
-                                # Ignore basic zarr and n5 files
-                                inference_dir_contents = [
-                                    f
-                                    for f in os.listdir(inference_dir)
-                                    if not f.startswith(".") and not f.endswith(".json")
-                                ]
-                                if inference_dir_contents:
-                                    # then it should have at least a chunk writtent out, assume it has all of it written out
-                                    self.most_recent_iteration = int(iteration_dir)
-                    if most_recent_iteration_previous != self.most_recent_iteration:
-                        self.update_best(
-                            self.most_recent_iteration,
-                            validation_dataset.name,
-                        )
+    def stop(self):
+        self.thread.run_thread = False
diff --git a/dacapo/validate.py b/dacapo/validate.py
@@ -53,7 +53,7 @@ def validate(
 def validate_run(
     run: Run,
     iteration: int,
-    num_workers: int = 30,
+    num_workers: int = 1,
     output_dtype: str = "uint8",
     overwrite: bool = True,
 ):

diff --git a/examples/__init__.py b/examples/__init__.py
@@ -1,3 +0,0 @@
-from .random_source_pipeline import random_source_pipeline
-from .synthetic_source_worker import generate_synthetic_dataset
-from .utils import get_viewer

diff --git a/examples/cosem/run_cosem.py b/examples/cosem/run_cosem.py
@@ -1,5 +1,6 @@
 # %%
 import dacapo
+
 # %%
 from dacapo.store.create_store import create_config_store
 
@@ -54,6 +55,7 @@
     IntensityScaleShiftAugmentConfig,
 )
 from dacapo.experiments.trainers import GunpowderTrainerConfig
+
 trainer_config = GunpowderTrainerConfig(
     name="cosem_finetune2",
     batch_size=1,

diff --git a/examples/cosem/train_hela.py b/examples/cosem/train_hela.py
@@ -5,6 +5,8 @@
 run_config = config_store.retrieve_run_config("cosem_distance_run_4nm_finetune3")
 
 from dacapo.experiments.run import Run
+
 run = Run(run_config)
 from dacapo.train import train_run
+
 train_run(run)
diff --git a/examples/distance_task/cosem_example.ipynb b/examples/distance_task/cosem_example.ipynb
@@ -9,8 +9,7 @@
     "# First we need to create a config store to store our configurations\n",
     "from dacapo.store.create_store import create_config_store\n",
     "\n",
-    "config_store = create_config_store()\n",
-    "\n"
+    "config_store = create_config_store()"
    ]
   },
   {
@@ -42,7 +41,7 @@
     "\n",
     "datasplit = datasplit_config.datasplit_type(datasplit_config)\n",
     "viewer = datasplit._neuroglancer()\n",
-    "config_store.store_datasplit_config(datasplit_config)\n"
+    "config_store.store_datasplit_config(datasplit_config)"
    ]
   },
   {
@@ -71,7 +70,7 @@
     "    tol_distance=40.0,\n",
     "    scale_factor=80.0,\n",
     ")\n",
-    "config_store.store_task_config(task_config)\n"
+    "config_store.store_task_config(task_config)"
    ]
   },
   {
@@ -103,7 +102,7 @@
     "    constant_upsample=True,\n",
     "    upsample_factors=[(2, 2, 2)],\n",
     ")\n",
-    "config_store.store_architecture_config(architecture_config)\n"
+    "config_store.store_architecture_config(architecture_config)"
    ]
   },
   {
@@ -150,7 +149,7 @@
     "    min_masked=0.05,\n",
     "    clip_raw=True,\n",
     ")\n",
-    "config_store.store_trainer_config(trainer_config)\n"
+    "config_store.store_trainer_config(trainer_config)"
    ]
   },
   {
@@ -207,15 +206,14 @@
     "    )\n",
     "\n",
     "    print(run_config.name)\n",
-    "    config_store.store_run_config(run_config)\n"
+    "    config_store.store_run_config(run_config)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     " ## Train\n",
-    " To train one of the runs, you can either do it by first creating a **Run** directly from the run config\n",
     " NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors"
    ]
   },
@@ -232,7 +230,7 @@
     "config_store = create_config_store()\n",
     "\n",
     "run = Run(config_store.retrieve_run_config(\"cosem_distance_run_4nm\"))\n",
-    "train_run(run)\n"
+    "train_run(run)"
    ]
   }
  ],

diff --git a/examples/distance_task/cosem_example.py b/examples/distance_task/cosem_example.py
@@ -214,7 +214,6 @@
 # %% [markdown]
 # ## Train
 
-# To train one of the runs, you can either do it by first creating a **Run** directly from the run config
 # NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors
 # %%
 from dacapo.train import train_run

diff --git a/examples/distance_task/cosem_example_fill_in_the_blank.ipynb b/examples/distance_task/cosem_example_fill_in_the_blank.ipynb
@@ -100,7 +100,7 @@
     "    constant_upsample=True,\n",
     "    upsample_factors=[(2, 2, 2)],\n",
     ")\n",
-    "config_store.store_architecture_config(architecture_config)\n"
+    "config_store.store_architecture_config(architecture_config)"
    ]
   },
   {
@@ -193,7 +193,6 @@
    "metadata": {},
    "source": [
     " ## Train\n",
-    " To train one of the runs, you can either do it by first creating a **Run** directly from the run config\n",
     " NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors"
    ]
   },