Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨♻️ refactor preprocessing #64

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"monai",
"nibabel",
"pytorch-lightning",
"scikit-image",
"scikit-learn",
"SimpleITK",
"sitk-cli",
Expand Down
58 changes: 57 additions & 1 deletion scripts/make_datalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
import typer

from segmantic.image.labels import load_tissue_list
from segmantic.seg.dataset import PairedDataSet
from segmantic.utils.file_iterators import find_matching_files

app = typer.Typer()


@app.command()
def make_datalist(
data_dir: Path = typer.Option(
...,
Expand Down Expand Up @@ -73,8 +77,60 @@ def make_datalist(
return datalist_path.write_text(json.dumps(data_config, indent=2))


@app.command()
def extend_datalist(
data_dir: Path = typer.Option(
...,
help="root data directory. Paths in datalist will be relative to this directory",
),
image_dir: Path = typer.Option(..., help="Directory containing images"),
labels_dir: Path = typer.Option(None, help="Directory containing labels"),
datalist_path: Path = typer.Option(..., help="Filename of input datalist"),
output_path: Path = typer.Option(..., help="Filename of output datalist"),
image_glob: str = "*.nii.gz",
labels_glob: str = "*.nii.gz",
):
ds = PairedDataSet.load_from_json(datalist_path)
images = [d["image"].name.lower() for d in ds.training_files()]
images += [d["image"].name.lower() for d in ds.validation_files()]
images += [d["image"].name.lower() for d in ds.test_files()]

if image_dir.is_absolute():
image_dir = image_dir.relative_to(data_dir)
if labels_dir.is_absolute():
labels_dir = labels_dir.relative_to(data_dir)

matches = find_matching_files(
[data_dir / image_dir / image_glob, data_dir / labels_dir / labels_glob]
)

training_data = list(ds.training_files())
for p in matches:
image_name = p[0].name.lower()
if image_name not in images:
training_data.append({"image": p[0], "label": p[1]})

def make_relative(d: dict[str, Path]):
return {key: str(d[key].relative_to(data_dir)) for key in d}

data_config = json.loads(datalist_path.read_text())

data_config["training"] = [make_relative(v) for v in training_data]
data_config["validation"] = [make_relative(v) for v in ds.validation_files()]
# data_config["test"] = (make_relative(v)["image"] for v in ds.test_files())
return output_path.write_text(json.dumps(data_config, indent=2))


@app.command()
def print_stats(datalist: Path):
ds = PairedDataSet.load_from_json(datalist)
print(f"Training cases: {len(ds.training_files())}")
print(f"Validation cases: {len(ds.validation_files())}")
print(f"Test cases: {len(ds.test_files())}")


def main():
typer.run(make_datalist)
app()


if __name__ == "__main__":
Expand Down
47 changes: 47 additions & 0 deletions scripts/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from pathlib import Path

import typer
from monai.transforms import (
Compose,
CropForegroundd,
ForegroundMaskd,
LoadImaged,
NormalizeIntensityd,
SaveImaged,
)

from segmantic.seg.transforms import SelectChanneld


def pre_process(input_file: Path, output_dir: Path, margin: int = 0, channel: int = 0):

transforms = Compose(
[
LoadImaged(
keys="img",
reader="ITKReader",
image_only=False,
ensure_channel_first=True,
),
SelectChanneld(keys="img", channel=channel, new_key_postfix="_0"),
ForegroundMaskd(keys="img_0", invert=True, new_key_prefix="mask"),
CropForegroundd(
keys="img", source_key="maskimg_0", allow_smaller=False, margin=margin
),
NormalizeIntensityd(keys="img"),
SaveImaged(
keys="img",
writer="ITKWriter",
output_dir=output_dir,
output_postfix="",
resample=False,
separate_folder=False,
print_log=True,
),
]
)
transforms({"img": input_file})


if __name__ == "__main__":
typer.run(pre_process)
2 changes: 1 addition & 1 deletion src/segmantic/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" ML-based segmentation for medical images
"""

__version__ = "0.4.0"
__version__ = "0.5.0"
2 changes: 1 addition & 1 deletion src/segmantic/commands/monai_unet_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def predict(
None, "--tissue-list", "-t", help="label descriptors in iSEG format"
),
results_dir: Path = typer.Option(
None, "--results-dir", "-r", help="output directory"
..., "--results-dir", "-r", help="output directory"
),
spacing: list[float] = typer.Option(
[], "--spacing", help="if specified, the image is first resampled"
Expand Down
87 changes: 87 additions & 0 deletions src/segmantic/seg/ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from collections.abc import Sequence
from typing import Optional, Union

import torch
from monai.config import KeysCollection
from monai.config.type_definitions import NdarrayOrTensor
from monai.networks.utils import one_hot
from monai.transforms.post.array import Ensemble
from monai.transforms.post.dictionary import Ensembled
from monai.transforms.transform import Transform
from monai.utils import TransformBackends


class SelectBestEnsemble(Ensemble, Transform):
"""
Execute select best ensemble on the input data.
The input data can be a list or tuple of PyTorch Tensor with shape: [C[, H, W, D]],
Or a single PyTorch Tensor with shape: [E[, C, H, W, D]], the `E` dimension represents
the output data from different models.
Typically, the input data is model output of segmentation task or classification task.

Note:
This select best transform expects the input data is discrete single channel values.
It selects the tissue of the model which performed best in a generalization analysis.
The mapping is saved in the label_model_dict.
The output data has the same shape as every item of the input data.

Args:
label_model_dict: dictionary containing the best models index for each tissue and
the tissue labels.
"""

backend = [TransformBackends.TORCH]

def __init__(self, label_model_dict: dict[int, int]) -> None:
self.label_model_dict = label_model_dict

def __call__(
self, img: Union[Sequence[NdarrayOrTensor], NdarrayOrTensor]
) -> NdarrayOrTensor:
img_ = self.get_stacked_torch(img)

has_ch_dim = False
if img_.ndimension() > 1 and img_.shape[1] > 1:
# convert multi-channel (One-Hot) images to argmax
img_ = torch.argmax(img_, dim=1, keepdim=True)
has_ch_dim = True

# combining the tissues from the best performing models
out_pt = torch.empty(img_.size()[1:])
for tissue_id, model_id in self.label_model_dict.items():
temp_best_tissue = img_[model_id, ...]
out_pt[temp_best_tissue == tissue_id] = tissue_id

if has_ch_dim:
# convert back to multi-channel (One-Hot)
num_classes = max(self.label_model_dict.keys()) + 1
out_pt = one_hot(out_pt, num_classes, dim=0)

return self.post_convert(out_pt, img)


class SelectBestEnsembled(Ensembled):
"""
Dictionary-based wrapper of :py:class:`monai.transforms.SelectBestEnsemble`.
"""

backend = SelectBestEnsemble.backend

def __init__(
self,
label_model_dict: dict[int, int],
keys: KeysCollection,
output_key: Optional[str] = None,
) -> None:
"""
Args:
keys: keys of the corresponding items to be stack and execute ensemble.
if only 1 key provided, suppose it's a PyTorch Tensor with data stacked on dimension `E`.
output_key: the key to store ensemble result in the dictionary.
if only 1 key provided in `keys`, `output_key` can be None and use `keys` as default.

"""
ensemble = SelectBestEnsemble(
label_model_dict=label_model_dict,
)
super().__init__(keys, ensemble, output_key)
110 changes: 110 additions & 0 deletions src/segmantic/seg/losses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from __future__ import annotations

import torch
from monai.networks import one_hot
from monai.utils import LossReduction
from torch.nn.modules.loss import _Loss


class AsymmetricUnifiedFocalLoss(_Loss):
Copy link
Owner Author

@dyollb dyollb Apr 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"""
AsymmetricUnifiedFocalLoss is a variant of Focal Loss.

Implementation of the Asymmetric Unified Focal Loss described in:

- "Unified Focal Loss: Generalising Dice and Cross Entropy-based Losses to Handle Class Imbalanced Medical Image Segmentation",
Michael Yeung, Computerized Medical Imaging and Graphics
- https://github.com/mlyg/unified-focal-loss/issues/8
"""

def __init__(
self,
to_onehot_y: bool = False,
num_classes: int = 2,
gamma: float = 0.75,
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gamma=2.0

delta: float = 0.7,
reduction: LossReduction | str = LossReduction.MEAN,
):
"""
Args:
to_onehot_y : whether to convert `y` into the one-hot format. Defaults to False.
num_classes : number of classes. Defaults to 2.
delta : weight of the background. Defaults to 0.7.
gamma : value of the exponent gamma in the definition of the Focal loss. Defaults to 0.75.

Example:
>>> import torch
>>> from monai.losses import AsymmetricUnifiedFocalLoss
>>> pred = torch.ones((1,1,32,32), dtype=torch.float32)
>>> grnd = torch.ones((1,1,32,32), dtype=torch.int64)
>>> fl = AsymmetricUnifiedFocalLoss(to_onehot_y=True)
>>> fl(pred, grnd)
"""
super().__init__(reduction=LossReduction(reduction).value)
self.to_onehot_y = to_onehot_y
self.num_classes = num_classes
self.gamma = gamma
self.delta = delta
self.epsilon = torch.tensor(1e-7)

def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
"""
Args:
y_pred : the shape should be BNH[WD], where N is the number of classes.
It only supports binary segmentation.
The input should be the original logits since it will be transformed by
a sigmoid in the forward function.
y_true : the shape should be BNH[WD], where N is the number of classes.
It only supports binary segmentation.

Raises:
ValueError: When input and target are different shape
ValueError: When len(y_pred.shape) != 4 and len(y_pred.shape) != 5
ValueError: When num_classes
ValueError: When the number of classes entered does not match the expected number
"""
if y_pred.shape != y_true.shape:
raise ValueError(
f"ground truth has different shape ({y_true.shape}) from input ({y_pred.shape})"
)

if len(y_pred.shape) != 4 and len(y_pred.shape) != 5:
raise ValueError(f"input shape must be 4 or 5, but got {y_pred.shape}")

if y_pred.shape[1] == 1:
y_pred = one_hot(y_pred, num_classes=self.num_classes)
y_true = one_hot(y_true, num_classes=self.num_classes)

if torch.max(y_true) != self.num_classes - 1:
raise ValueError(
f"Please make sure the number of classes is {self.num_classes-1}"
)

# use pytorch CrossEntropyLoss ?
# https://github.com/Project-MONAI/MONAI/blob/2d463a7d19166cff6a83a313f339228bc812912d/monai/losses/dice.py#L741
epsilon = torch.tensor(self.epsilon, device=y_pred.device)
y_pred = torch.clip(y_pred, epsilon, 1.0 - epsilon)
cross_entropy = -y_true * torch.log(y_pred)

# calculate losses separately for each class, only enhancing foreground class
back_ce = (
torch.pow(1 - y_pred[:, 0, ...], self.gamma) * cross_entropy[:, 0, ...]
)
back_ce = (1 - self.delta) * back_ce

losses = [back_ce]
for i in range(1, self.num_classes):
i_ce = cross_entropy[:, i, ...]
i_ce = self.delta * i_ce
losses.append(i_ce)

loss = torch.stack(losses)
if self.reduction == LossReduction.SUM.value:
return torch.sum(loss) # sum over the batch and channel dims
if self.reduction == LossReduction.NONE.value:
return loss # returns [N, num_classes] losses
if self.reduction == LossReduction.MEAN.value:
return torch.mean(loss)
raise ValueError(
f'Unsupported reduction: {self.reduction}, available options are ["mean", "sum", "none"].'
)
Loading
Loading