Skip to content

Commit

Permalink
🩹 BUG: Fix Parsing Missing Omero Version NGFF Metadata (#568)
Browse files Browse the repository at this point in the history
- [x] Handle missing version keys and null values.
- [x] Add test for missing value
- [x] Add test for non-numeric values e.g. "0.5-dev" [see current spec doc](https://ngff.openmicroscopy.org/latest/#omero-md)
- [x] Warn if version < or > 0.4
- [x] Warn for inconsistent versions

---------

Co-authored-by: Shan E Ahmed Raza <13048456+shaneahmed@users.noreply.github.com>
  • Loading branch information
John-P and shaneahmed authored Mar 27, 2023
1 parent 09bd197 commit 6ec1083
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 9 deletions.
174 changes: 172 additions & 2 deletions tests/test_wsireader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Tests for reading whole-slide images."""

import copy
import json
import logging
import os
import pathlib
import random
import re
import shutil
from copy import deepcopy
from pathlib import Path
from time import time

# When no longer supporting Python <3.9 this should be collections.abc.Iterable
Expand All @@ -17,6 +20,7 @@
import pytest
import zarr
from click.testing import CliRunner
from packaging.version import Version
from skimage.filters import threshold_otsu
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
from skimage.morphology import binary_dilation, disk, remove_small_objects
Expand Down Expand Up @@ -2034,11 +2038,11 @@ def test_ngff_empty_datasets_mpp(tmp_path):
assert wsi.info.mpp is None


def test_nff_no_scale_transforms_mpp(tmp_path):
def test_ngff_no_scale_transforms_mpp(tmp_path):
"""Test that mpp is None if no scale transforms are present."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample with no axes
sample_copy = tmp_path / "ngff-1"
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
Expand All @@ -2051,6 +2055,172 @@ def test_nff_no_scale_transforms_mpp(tmp_path):
assert wsi.info.mpp is None


def test_ngff_missing_omero_version(tmp_path):
"""Test that the reader can handle missing omero version."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Remove the omero version
del zattrs["omero"]["version"]
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
wsireader.WSIReader.open(sample_copy)


def test_ngff_missing_multiscales_returns_false(tmp_path):
"""Test that missing multiscales key returns False for is_ngff."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Remove the multiscales key
del zattrs["multiscales"]
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
assert not wsireader.is_ngff(sample_copy)


def test_ngff_wrong_format_metadata(tmp_path, caplog):
"""Test that is_ngff is False and logs a warning if metadata is wrong."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"] = "foo"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with caplog.at_level(logging.WARNING):
assert not wsireader.is_ngff(sample_copy)
assert "must be present and of the correct type" in caplog.text


def test_ngff_omero_below_min_version(tmp_path):
"""Test for FileNotSupported when omero version is below minimum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["omero"]["version"] = "0.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_omero_above_max_version(tmp_path):
"""Test for FileNotSupported when omero version is above maximum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["omero"]["version"] = "10.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_multiscales_below_min_version(tmp_path):
"""Test for FileNotSupported when multiscales version is below minimum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"][0]["version"] = "0.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_multiscales_above_max_version(tmp_path):
"""Test for FileNotSupported when multiscales version is above maximum."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Change the format to something else
zattrs["multiscales"][0]["version"] = "10.0"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
with pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)


def test_ngff_non_numeric_version(tmp_path, monkeypatch):
"""Test that the reader can handle non-numeric omero versions."""

# Patch the is_ngff function to change the min/max version
if_ngff = wsireader.is_ngff # noqa: F841
min_version = Version("0.4")
max_version = Version("0.5")

def patched_is_ngff(
path: Path,
min_version: Version = min_version,
max_version: Version = max_version,
) -> bool:
"""Patched is_ngff function with new min/max version."""
return is_ngff(path, min_version, max_version)

monkeypatch.setattr(wsireader, "is_ngff", patched_is_ngff)

sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Set the omero version to a non-numeric string
zattrs["omero"]["version"] = "0.5-dev"
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
wsireader.WSIReader.open(sample_copy)


def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog):
"""Test that the reader logs a warning inconsistent multiscales versions."""
sample = _fetch_remote_sample("ngff-1")
# Create a copy of the sample
sample_copy = tmp_path / "ngff-1.zarr"
shutil.copytree(sample, sample_copy)
with open(sample_copy / ".zattrs", "r") as fh:
zattrs = json.load(fh)
# Set the versions to be inconsistent
multiscales = zattrs["multiscales"]
# Needs at least 2 multiscales to be inconsistent
if len(multiscales) < 2:
multiscales.append(copy.deepcopy(multiscales[0]))
for i, _ in enumerate(multiscales):
multiscales[i]["version"] = f"0.{i}-dev"
zattrs["omero"]["multiscales"] = multiscales
with open(sample_copy / ".zattrs", "w") as fh:
json.dump(zattrs, fh, indent=2)
# Capture logger output to check for warning
with caplog.at_level(logging.WARNING), pytest.raises(FileNotSupported):
wsireader.WSIReader.open(sample_copy)
assert "multiple versions" in caplog.text


class TestReader:
scenarios = [
(
Expand Down
64 changes: 57 additions & 7 deletions tiatoolbox/wsicore/wsireader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import tifffile
import zarr
from defusedxml import ElementTree
from packaging.version import Version
from PIL import Image

from tiatoolbox import logger, utils
Expand All @@ -36,6 +37,8 @@
Bounds = Tuple[Number, Number, Number, Number]
IntBounds = Tuple[int, int, int, int]
Resolution = Union[Number, Tuple[Number, Number], np.ndarray]
MIN_NGFF_VERSION = Version("0.4")
MAX_NGFF_VERSION = Version("0.4")


def is_dicom(path: pathlib.Path) -> bool:
Expand Down Expand Up @@ -97,7 +100,11 @@ def is_zarr(path: pathlib.Path) -> bool:
return False


def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
def is_ngff(
path: pathlib.Path,
min_version: Version = MIN_NGFF_VERSION,
max_version: Version = MAX_NGFF_VERSION,
) -> bool:
"""Check if the input is a NGFF file.
Args:
Expand Down Expand Up @@ -129,18 +136,61 @@ def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
all(isinstance(m, dict) for m in multiscales),
]
):
logger.warning(
"The NGFF file is not valid. "
"The multiscales, _ARRAY_DIMENSIONS and omero attributes "
"must be present and of the correct type."
)
return False
except KeyError:
return False
multiscales_versions = tuple(
tuple(int(part) for part in scale.get("version", "").split("."))
for scale in multiscales
)
omero_version = tuple(int(part) for part in omero.get("version", "").split("."))
multiscales_versions = {
Version(scale["version"]) for scale in multiscales if "version" in scale
}
omero_version: Optional[str] = omero.get("version")
if omero_version:
omero_version: Version = Version(omero_version)
if omero_version < min_version:
logger.warning(
"The minimum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
min_version,
multiscales_versions,
)
return False
if omero_version > max_version:
logger.warning(
"The maximum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
max_version,
multiscales_versions,
)
return False

if len(multiscales_versions) > 1:
logger.warning(
"Found multiple versions for NGFF multiscales: %s",
multiscales_versions,
)

if any(version < min_version for version in multiscales_versions):
logger.warning(
"The minimum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
min_version,
multiscales_versions,
)
return False
if omero_version < min_version:

if any(version > max_version for version in multiscales_versions):
logger.warning(
"The maximum supported version of the NGFF file is %s. "
"But the versions of the multiscales in the file are %s.",
max_version,
multiscales_versions,
)
return False

return is_zarr(path)


Expand Down
3 changes: 3 additions & 0 deletions whitelist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Nx4
NxM
NxN
OME
Omero
Omnyx
OpenCV
Otsu's
Expand Down Expand Up @@ -128,8 +129,10 @@ macports
memray
mpp
mse
multiscales
natively
ndarray
ngff
nn
noinspection
normalizer
Expand Down

0 comments on commit 6ec1083

Please sign in to comment.