Skip to content

Commit

Permalink
Dev/main (#207)
Browse files Browse the repository at this point in the history
  • Loading branch information
mzouink authored Mar 21, 2024
2 parents b0df46e + 6f156ec commit f292439
Show file tree
Hide file tree
Showing 14 changed files with 2,926 additions and 490 deletions.
6 changes: 4 additions & 2 deletions dacapo/blockwise/predict_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def start_worker(
weights_store = create_weights_store()

# load weights
weights_store.retrieve_weights(run_name, iteration)
run.model.load_state_dict(
weights_store.retrieve_weights(run_name, iteration).model
)

# get arrays
input_array_identifier = LocalArrayIdentifier(Path(input_container), input_dataset)
Expand Down Expand Up @@ -124,7 +126,7 @@ def start_worker(
pipeline += gp.Normalize(raw)

# predict
model.eval()
# model.eval()
pipeline += gp_torch.Predict(
model=model,
inputs={"x": raw},
Expand Down
175 changes: 91 additions & 84 deletions dacapo/utils/view.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
from typing import Optional
import neuroglancer
from IPython.display import IFrame
import numpy as np
import gunpowder as gp
from funlib.persistence import Array
from dacapo.experiments.datasplits.datasets.arrays import ZarrArray
from funlib.persistence import open_ds
from threading import Thread
import threading
import neuroglancer
from neuroglancer.viewer_state import ViewerState
import os
from dacapo.experiments.run import Run
from dacapo.store.create_store import create_array_store
from dacapo.store.create_store import create_array_store, create_stats_store
from IPython.display import IFrame
import time
import copy
Expand Down Expand Up @@ -86,25 +82,74 @@ def add_scalar_layer(state, name, data, voxel_size):
)


class NeuroglancerRunViewer:
class BestScore:
def __init__(self, run: Run):
self.run: Run = run
self.most_recent_iteration = 0
self.prediction = None
self.score: float = -1
self.iteration: int = 0
self.parameter: str = None
self.validation_parameters = run.validation_scores.parameters

self.array_store = create_array_store()
self.stats_store = create_stats_store()

def get_ds(self, iteration, validation_dataset):
prediction_array_identifier = self.array_store.validation_prediction_array(
self.run.name, iteration, validation_dataset.name
)
container = str(prediction_array_identifier.container)
dataset = str(
os.path.join(
str(iteration), validation_dataset.name, "output", str(self.parameter)
)
)
self.ds = open_ds(container, dataset)

def does_new_best_exist(self):
new_best_exists = False
self.validation_scores = self.stats_store.retrieve_validation_iteration_scores(
self.run.name
)

for validation_idx, validation_dataset in enumerate(
self.run.datasplit.validate
):
for iteration_scores in self.validation_scores:
iteration = iteration_scores.iteration
for parameter_idx, parameter in enumerate(self.validation_parameters):
# hardcoded for f1_score
current_score = iteration_scores.scores[validation_idx][
parameter_idx
][20]
if current_score > self.score:
self.iteration = iteration
self.score = current_score
self.parameter = parameter
self.get_ds(iteration, validation_dataset)
new_best_exists = True
return new_best_exists


class NeuroglancerRunViewer:

def __init__(self, run: Run, embedded=False):
self.run: Run = run
self.best_score = BestScore(run)
self.embedded = embedded

def updated_neuroglancer_layer(self, layer_name, ds):
source = neuroglancer.LocalVolume(
data=ds.data,
dimensions=neuroglancer.CoordinateSpace(
names=["c", "z", "y", "x"],
units=["", "nm", "nm", "nm"],
scales=[1] + list(ds.voxel_size),
names=["z", "y", "x"],
units=["nm", "nm", "nm"],
scales=list(ds.voxel_size),
),
voxel_offset=[0] + list(ds.roi.offset),
voxel_offset=list(ds.roi.offset),
)
new_state = copy.deepcopy(self.viewer.state)
if len(new_state.layers) == 1:
new_state.layers[layer_name] = neuroglancer.ImageLayer(source=source)
new_state.layers[layer_name] = neuroglancer.SegmentationLayer(source=source)
else:
# replace name everywhere to preserve state, like what is selected
new_state_str = json.dumps(new_state.to_json())
Expand All @@ -113,7 +158,6 @@ def updated_neuroglancer_layer(self, layer_name, ds):
new_state.layers[layer_name].source = source

self.viewer.set_state(new_state)
print(self.viewer.state)

def deprecated_start_neuroglancer(self):
neuroglancer.set_server_bind_address("0.0.0.0")
Expand All @@ -122,6 +166,7 @@ def deprecated_start_neuroglancer(self):
def start_neuroglancer(self):
neuroglancer.set_server_bind_address("0.0.0.0")
self.viewer = neuroglancer.Viewer()
print(f"Neuroglancer viewer: {self.viewer}")
with self.viewer.txn() as state:
state.showSlices = False

Expand All @@ -136,9 +181,11 @@ def start_neuroglancer(self):
voxel_offset=self.raw.roi.offset,
),
)
return IFrame(src=self.viewer, width=1800, height=900)
if self.embedded:
return IFrame(src=self.viewer, width=1800, height=900)

def start(self):
self.run_thread = True
self.array_store = create_array_store()
self.get_datasets()
self.new_validation_checker()
Expand All @@ -152,85 +199,45 @@ def open_from_array_identitifier(self, array_identifier):

def get_datasets(self):
for validation_dataset in self.run.datasplit.validate:
(
input_raw_array_identifier,
input_gt_array_identifier,
) = self.array_store.validation_input_arrays(
self.run.name, validation_dataset.name
)
raw = validation_dataset.raw._source_array
gt = validation_dataset.gt._source_array
self.raw = open_ds(str(raw.file_name), raw.dataset)
self.gt = open_ds(str(gt.file_name), gt.dataset)

self.raw = self.open_from_array_identitifier(input_raw_array_identifier)
self.gt = self.open_from_array_identitifier(input_gt_array_identifier)
print(self.raw)
def update_best_info(self):
self.segmentation = self.best_score.ds
self.most_recent_iteration = self.best_score.iteration

def update_best_info(self, iteration, validation_dataset_name):
prediction_array_identifier = self.array_store.validation_prediction_array(
self.run.name,
iteration,
validation_dataset_name,
)
self.prediction = self.open_from_array_identitifier(prediction_array_identifier)
self.most_recent_iteration = iteration

def update_neuroglancer(self, iteration):
def update_neuroglancer(self):
self.updated_neuroglancer_layer(
f"prediction at iteration {iteration}", self.prediction
f"prediction at iteration {self.best_score.iteration}, f1 score {self.best_score.score}",
self.segmentation,
)
return None

def update_best(self, iteration, validation_dataset_name):
self.update_best_info(iteration, validation_dataset_name)
self.update_neuroglancer(iteration)
def update_best_layer(self):
self.update_best_info()
self.update_neuroglancer()

def new_validation_checker(self):
self.process = Thread(target=self.update_with_new_validation_if_possible)
self.process.daemon = True
self.process.start()
self.thread = threading.Thread(
target=self.update_with_new_validation_if_possible, daemon=True
)
self.thread.run_thread = True
self.thread.start()

def update_with_new_validation_if_possible(self):
thread = threading.currentThread()
# Here we are assuming that we are checking the directory .../valdiation_config/prediction
# Ideally we will only have to check for the current best validation
while True:
time.sleep(3)
for validation_dataset in self.run.datasplit.validate:
most_recent_iteration_previous = self.most_recent_iteration
prediction_array_identifier = (
self.array_store.validation_prediction_array(
self.run.name,
self.most_recent_iteration,
validation_dataset.name,
)
while getattr(thread, "run_thread", True):
time.sleep(10)
new_best_exists = self.best_score.does_new_best_exist()
if new_best_exists:
print(
f"New best f1 score of {self.best_score.score} at iteration {self.best_score.iteration} and parameter {self.best_score.parameter}"
)
self.update_best_layer()

container = prediction_array_identifier.container
if os.path.exists(container):
iteration_dirs = [
name
for name in os.listdir(container)
if os.path.isdir(os.path.join(container, name))
and name.isnumeric()
]

for iteration_dir in iteration_dirs:
if int(iteration_dir) > self.most_recent_iteration:
inference_dir = os.path.join(
container,
iteration_dir,
"validation_config",
"prediction",
)
if os.path.exists(inference_dir):
# Ignore basic zarr and n5 files
inference_dir_contents = [
f
for f in os.listdir(inference_dir)
if not f.startswith(".") and not f.endswith(".json")
]
if inference_dir_contents:
# then it should have at least a chunk writtent out, assume it has all of it written out
self.most_recent_iteration = int(iteration_dir)
if most_recent_iteration_previous != self.most_recent_iteration:
self.update_best(
self.most_recent_iteration,
validation_dataset.name,
)
def stop(self):
self.thread.run_thread = False
2 changes: 1 addition & 1 deletion dacapo/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def validate(
def validate_run(
run: Run,
iteration: int,
num_workers: int = 30,
num_workers: int = 1,
output_dtype: str = "uint8",
overwrite: bool = True,
):
Expand Down
3 changes: 0 additions & 3 deletions examples/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +0,0 @@
from .random_source_pipeline import random_source_pipeline
from .synthetic_source_worker import generate_synthetic_dataset
from .utils import get_viewer
2 changes: 2 additions & 0 deletions examples/cosem/run_cosem.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# %%
import dacapo

# %%
from dacapo.store.create_store import create_config_store

Expand Down Expand Up @@ -54,6 +55,7 @@
IntensityScaleShiftAugmentConfig,
)
from dacapo.experiments.trainers import GunpowderTrainerConfig

trainer_config = GunpowderTrainerConfig(
name="cosem_finetune2",
batch_size=1,
Expand Down
2 changes: 2 additions & 0 deletions examples/cosem/train_hela.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
run_config = config_store.retrieve_run_config("cosem_distance_run_4nm_finetune3")

from dacapo.experiments.run import Run

run = Run(run_config)
from dacapo.train import train_run

train_run(run)
16 changes: 7 additions & 9 deletions examples/distance_task/cosem_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
"# First we need to create a config store to store our configurations\n",
"from dacapo.store.create_store import create_config_store\n",
"\n",
"config_store = create_config_store()\n",
"\n"
"config_store = create_config_store()"
]
},
{
Expand Down Expand Up @@ -42,7 +41,7 @@
"\n",
"datasplit = datasplit_config.datasplit_type(datasplit_config)\n",
"viewer = datasplit._neuroglancer()\n",
"config_store.store_datasplit_config(datasplit_config)\n"
"config_store.store_datasplit_config(datasplit_config)"
]
},
{
Expand Down Expand Up @@ -71,7 +70,7 @@
" tol_distance=40.0,\n",
" scale_factor=80.0,\n",
")\n",
"config_store.store_task_config(task_config)\n"
"config_store.store_task_config(task_config)"
]
},
{
Expand Down Expand Up @@ -103,7 +102,7 @@
" constant_upsample=True,\n",
" upsample_factors=[(2, 2, 2)],\n",
")\n",
"config_store.store_architecture_config(architecture_config)\n"
"config_store.store_architecture_config(architecture_config)"
]
},
{
Expand Down Expand Up @@ -150,7 +149,7 @@
" min_masked=0.05,\n",
" clip_raw=True,\n",
")\n",
"config_store.store_trainer_config(trainer_config)\n"
"config_store.store_trainer_config(trainer_config)"
]
},
{
Expand Down Expand Up @@ -207,15 +206,14 @@
" )\n",
"\n",
" print(run_config.name)\n",
" config_store.store_run_config(run_config)\n"
" config_store.store_run_config(run_config)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
" ## Train\n",
" To train one of the runs, you can either do it by first creating a **Run** directly from the run config\n",
" NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors"
]
},
Expand All @@ -232,7 +230,7 @@
"config_store = create_config_store()\n",
"\n",
"run = Run(config_store.retrieve_run_config(\"cosem_distance_run_4nm\"))\n",
"train_run(run)\n"
"train_run(run)"
]
}
],
Expand Down
1 change: 0 additions & 1 deletion examples/distance_task/cosem_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@
# %% [markdown]
# ## Train

# To train one of the runs, you can either do it by first creating a **Run** directly from the run config
# NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors
# %%
from dacapo.train import train_run
Expand Down
3 changes: 1 addition & 2 deletions examples/distance_task/cosem_example_fill_in_the_blank.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
" constant_upsample=True,\n",
" upsample_factors=[(2, 2, 2)],\n",
")\n",
"config_store.store_architecture_config(architecture_config)\n"
"config_store.store_architecture_config(architecture_config)"
]
},
{
Expand Down Expand Up @@ -193,7 +193,6 @@
"metadata": {},
"source": [
" ## Train\n",
" To train one of the runs, you can either do it by first creating a **Run** directly from the run config\n",
" NOTE: The run stats are stored in the `runs_base_dir/stats` directory. You can delete this directory to remove all stored stats if you want to re-run training. Otherwise, the stats will be appended to the existing files, and the run won't start from scratch. This may cause errors"
]
},
Expand Down
Loading

0 comments on commit f292439

Please sign in to comment.