Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🩹 BUG: Fix Parsing Missing Omero Version NGFF Metadata #568

Merged
merged 16 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 172 additions & 2 deletions tests/test_wsireader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Tests for reading whole-slide images."""

import copy
import json
import logging
import os
import pathlib
import random
import re
import shutil
from copy import deepcopy
from pathlib import Path
from time import time

# When no longer supporting Python <3.9 this should be collections.abc.Iterable
Expand All @@ -17,6 +20,7 @@
import pytest
import zarr
from click.testing import CliRunner
from packaging.version import Version
from skimage.filters import threshold_otsu
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
from skimage.morphology import binary_dilation, disk, remove_small_objects
Expand Down Expand Up @@ -2034,11 +2038,11 @@ def test_ngff_empty_datasets_mpp(tmp_path):
assert wsi.info.mpp is None


def test_nff_no_scale_transforms_mpp(tmp_path):
def test_ngff_no_scale_transforms_mpp(tmp_path):
"""Test that mpp is None if no scale transforms are present."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample with no axes
sample_copy = tmp_path / "ngff-1"
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
Expand All @@ -2051,6 +2055,172 @@ def test_nff_no_scale_transforms_mpp(tmp_path):
assert wsi.info.mpp is None


def test_ngff_missing_omero_version(tmp_path):
"""Test that the reader can handle missing omero version."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Remove the omero version
del zattrs["omero"]["version"]
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
wsireader.WSIReader.open(sample_copy)


def test_ngff_missing_multiscales_returns_false(tmp_path):
"""Test that missing multiscales key returns False for is_ngff."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Remove the multiscales key
del zattrs["multiscales"]
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
assert not wsireader.is_ngff(sample_copy)


def test_ngff_wrong_format_metadata(tmp_path, caplog):
"""Test that is_ngff is False and logs a warning if metadata is wrong."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"] = "foo"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with caplog.at_level(logging.WARNING):
assert not wsireader.is_ngff(sample_copy)
assert "must be present and of the correct type" in caplog.text


def test_ngff_omero_below_min_version(tmp_path):
"""Test for FileNotSupported when omero version is below minimum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["omero"]["version"] = "0.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_omero_above_max_version(tmp_path):
"""Test for FileNotSupported when omero version is above maximum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["omero"]["version"] = "10.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_multiscales_below_min_version(tmp_path):
"""Test for FileNotSupported when multiscales version is below minimum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"][0]["version"] = "0.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_multiscales_above_max_version(tmp_path):
"""Test for FileNotSupported when multiscales version is above maximum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"][0]["version"] = "10.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_non_numeric_version(tmp_path, monkeypatch):
"""Test that the reader can handle non-numeric omero versions."""

# Patch the is_ngff function to change the min/max version
if_ngff = wsireader.is_ngff # noqa: F841
min_version = Version("0.4")
max_version = Version("0.5")

def patched_is_ngff(
path: Path,
min_version: Version = min_version,
max_version: Version = max_version,
) -> bool:
"""Patched is_ngff function with new min/max version."""
return is_ngff(path, min_version, max_version)

monkeypatch.setattr(wsireader, "is_ngff", patched_is_ngff)

sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Set the omero version to a non-numeric string
zattrs["omero"]["version"] = "0.5-dev"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
wsireader.WSIReader.open(sample_copy)


def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog):
"""Test that the reader logs a warning inconsistent multiscales versions."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Set the versions to be inconsistent
multiscales = zattrs["multiscales"]
# Needs at least 2 multiscales to be inconsistent
if len(multiscales) < 2:
multiscales.append(copy.deepcopy(multiscales[0]))
for i, _ in enumerate(multiscales):
multiscales[i]["version"] = f"0.{i}-dev"
zattrs["omero"]["multiscales"] = multiscales
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
# Capture logger output to check for warning
with caplog.at_level(logging.WARNING), pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)
assert "multiple versions" in caplog.text


class TestReader:
scenarios = [
(
Expand Down
64 changes: 57 additions & 7 deletions tiatoolbox/wsicore/wsireader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import tifffile
import zarr
from defusedxml import ElementTree
from packaging.version import Version
from PIL import Image

from tiatoolbox import logger, utils
Expand All @@ -36,6 +37,8 @@
Bounds = Tuple[Number, Number, Number, Number]
IntBounds = Tuple[int, int, int, int]
Resolution = Union[Number, Tuple[Number, Number], np.ndarray]
MIN_NGFF_VERSION = Version("0.4")
MAX_NGFF_VERSION = Version("0.4")
shaneahmed marked this conversation as resolved.
Show resolved Hide resolved


def is_dicom(path: pathlib.Path) -> bool:
Expand Down Expand Up @@ -97,7 +100,11 @@ def is_zarr(path: pathlib.Path) -> bool:
return False


def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
def is_ngff(
path: pathlib.Path,
min_version: Version = MIN_NGFF_VERSION,
max_version: Version = MAX_NGFF_VERSION,
) -> bool:
"""Check if the input is a NGFF file.

Args:
Expand Down Expand Up @@ -129,18 +136,61 @@ def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
all(isinstance(m, dict) for m in multiscales),
]
):
logger.warning(
"The NGFF file is not valid. "
"The multiscales, _ARRAY_DIMENSIONS and omero attributes "
"must be present and of the correct type."
)
return False
except KeyError:
return False
multiscales_versions = tuple(
tuple(int(part) for part in scale.get("version", "").split("."))
for scale in multiscales
)
omero_version = tuple(int(part) for part in omero.get("version", "").split("."))
multiscales_versions = {
Version(scale["version"]) for scale in multiscales if "version" in scale
}
omero_version: Optional[str] = omero.get("version")
if omero_version:
John-P marked this conversation as resolved.
Show resolved Hide resolved
omero_version: Version = Version(omero_version)
if omero_version < min_version:
logger.warning(
"The minimum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
min_version,
multiscales_versions,
)
return False
if omero_version > max_version:
logger.warning(
"The maximum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
max_version,
multiscales_versions,
)
return False

if len(multiscales_versions) > 1:
logger.warning(
"Found multiple versions for NGFF multiscales: %s",
multiscales_versions,
)

if any(version < min_version for version in multiscales_versions):
logger.warning(
"The minimum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
min_version,
multiscales_versions,
)
return False
if omero_version < min_version:

if any(version > max_version for version in multiscales_versions):
logger.warning(
"The maximum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
max_version,
multiscales_versions,
)
return False

return is_zarr(path)


Expand Down
3 changes: 3 additions & 0 deletions whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Nx4
NxM
NxN
OME
Omero
Omnyx
OpenCV
Otsu's
Expand Down Expand Up @@ -128,8 +129,10 @@ macports
memray
mpp
mse
multiscales
natively
ndarray
ngff
nn
noinspection
normalizer
Expand Down