Skip to content

Commit

Permalink
Pushing VFS down to converters (#123)
Browse files Browse the repository at this point in the history
* Pushing VFS down for Tiff Reader and TileDBOpenslide

* Test fixes

* Enabling config and ctx in OMEZarr Converter

* Enabling ctx and config in openslide using cached file

* PR comments
  • Loading branch information
ktsitsi authored Jun 20, 2024
1 parent c4851af commit 83a9166
Show file tree
Hide file tree
Showing 11 changed files with 201 additions and 98 deletions.
10 changes: 3 additions & 7 deletions tests/integration/converters/test_ome_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,11 @@
from tiledb.cc import WebpInputFormat


@pytest.mark.parametrize("open_fileobj", [False, True])
def test_ome_tiff_converter(tmp_path, open_fileobj):
def test_ome_tiff_converter(tmp_path):
input_path = str(get_path("CMU-1-Small-Region.ome.tiff"))
output_path = str(tmp_path)
if open_fileobj:
with open(input_path, "rb") as f:
OMETiffConverter.to_tiledb(f, output_path)
else:
OMETiffConverter.to_tiledb(input_path, output_path)

OMETiffConverter.to_tiledb(input_path, output_path)

with TileDBOpenSlide(output_path) as t:
assert len(tiledb.Group(output_path)) == t.level_count == 2
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/converters/test_openslide.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
],
)
def test_openslide_converter(tmp_path, preserve_axes, chunked, max_workers, compressor):
input_path = get_path("CMU-1-Small-Region.svs")
input_path = str(get_path("CMU-1-Small-Region.svs"))
output_path = str(tmp_path)
OpenSlideConverter.to_tiledb(
input_path,
Expand Down Expand Up @@ -80,7 +80,7 @@ def test_openslide_converter(tmp_path, preserve_axes, chunked, max_workers, comp

@pytest.mark.parametrize("preserve_axes", [False, True])
def test_openslide_converter_group_metadata(tmp_path, preserve_axes):
input_path = get_path("CMU-1-Small-Region.svs")
input_path = str(get_path("CMU-1-Small-Region.svs"))
output_path = str(tmp_path)
OpenSlideConverter.to_tiledb(input_path, output_path, preserve_axes=preserve_axes)

Expand Down
34 changes: 16 additions & 18 deletions tests/integration/converters/test_scaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,23 @@ def test_scaler(tmp_path, scale_factors, chunked, max_workers, progressive):
ground_path = str(tmp_path / "ground")
test_path = str(tmp_path / "test")

with open(input_path, "rb") as f:
OMETiffConverter.to_tiledb(
f,
ground_path,
pyramid_kwargs={"scale_factors": scale_factors},
)
OMETiffConverter.to_tiledb(
input_path,
ground_path,
pyramid_kwargs={"scale_factors": scale_factors},
)

with open(input_path, "rb") as f:
OMETiffConverter.to_tiledb(
f,
test_path,
pyramid_kwargs={
"scale_factors": scale_factors,
"chunked": chunked,
"progressive": progressive,
"order": 1,
"max_workers": max_workers,
},
)
OMETiffConverter.to_tiledb(
input_path,
test_path,
pyramid_kwargs={
"scale_factors": scale_factors,
"chunked": chunked,
"progressive": progressive,
"order": 1,
"max_workers": max_workers,
},
)

with TileDBOpenSlide(ground_path) as ground, TileDBOpenSlide(test_path) as test:
assert ground.level_count == test.level_count
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/test_wrappers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import pytest

from tests import get_path
Expand All @@ -19,7 +21,7 @@
],
)
def test_from_bioimg_wrapper(tmp_path, converter, file_path):
input_path = get_path(file_path)
input_path = str(get_path(file_path))
output_path = str(tmp_path)
output_path_round = str(tmp_path) + "/roundtrip"
if converter == Converters.OMETIFF:
Expand All @@ -32,7 +34,7 @@ def test_from_bioimg_wrapper(tmp_path, converter, file_path):
with pytest.raises(NotImplementedError):
to_bioimg(output_path, output_path_round, converter=converter)
else:
input_path = input_path / str(0)
input_path = os.path.join(input_path, str(0))
rfromtype = from_bioimg(input_path, output_path, converter=converter)
rtotype = to_bioimg(output_path, output_path_round, converter=converter)
assert rfromtype == rtotype == OMEZarrConverter
2 changes: 1 addition & 1 deletion tiledb/bioimg/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
FMT_VERSION = 2
DATASET_TYPE = "bioimg"

DEFAULT_SCRATCH_SPACE = "/dev/shm"
# Windows use only
WIN_OPENSLIDE_PATH = r"D:\openslide-win64\bin"
89 changes: 49 additions & 40 deletions tiledb/bioimg/converters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,15 @@

class ImageReader(ABC):
@abstractmethod
def __init__(self, input_path: str, logger: logging.Logger, **kwargs: Any):
def __init__(
self,
input_path: str,
*,
logger: Optional[logging.Logger],
config: Optional[tiledb.Config] = None,
ctx: Optional[tiledb.Ctx] = None,
**kwargs: Any,
):
"""Initialize this ImageReader"""

def __enter__(self) -> ImageReader:
Expand Down Expand Up @@ -304,44 +312,45 @@ def to_tiledb(
pyramid_kwargs: Optional[Mapping[str, Any]] = None,
) -> Type[ImageConverter]:
"""
Convert an image to a TileDB Group of Arrays, one per level.
:param source: path to the input image or ImageReader object
:param output_path: path to the TileDB group of arrays
:param level_min: minimum level of the image to be converted. By default set to 0
to convert all levels.
:param tiles: A mapping from dimension name (one of 'T', 'C', 'Z', 'Y', 'X') to
the (maximum) tile for this dimension.
:param tile_scale: The scaling factor applied to each tile during I/O.
Larger scale factors will result in less I/O operations.
:param preserve_axes: If true, preserve the axes order of the original image.
:param chunked: If true, convert one tile at a time instead of the whole image.
**Note**: The OpenSlideConverter may not be 100% lossless with chunked=True
for levels>0, even though the converted images look visually identical to the
original ones.
:param max_workers: Maximum number of threads that can be used for conversion.
Applicable only if chunked=True.
:param exclude_metadata: If true, drop original metadata of the images and exclude them from being ingested.
:param compressor: TileDB compression filter mapping for each level
:param log: verbose logging, defaults to None. Allows passing custom logging.Logger or boolean.
If None or bool=False it initiates an INFO level logging. If bool=True then a logger is instantiated in
DEBUG logging level.
:param reader_kwargs: Keyword arguments passed to the _ImageReaderType constructor.
:param pyramid_kwargs: Keyword arguments passed to the scaler constructor for
generating downsampled versions of the base level. Valid keyword arguments are:
scale_factors (Required): The downsampling factor for each level
scale_axes (Optional): Default "XY". The axes which will be downsampled
chunked (Optional): Default False. If true the image is split into chunks and
each one is independently downsampled. If false the entire image is
downsampled at once, but it requires more memory.
progressive (Optional): Default False. If true each downsampled image is
generated using the previous level. If false for every downsampled image
the level_min is used, but it requires more memory.
order (Optional): Default 1. The order of the spline interpolation. The order
has to be in the range 0-5. See `skimage.transform.warp` for detail.
max_workers (Optional): Default None. The maximum number of workers for
chunked downsampling. If None, it will default to the number of processors
on the machine, multiplied by 5.
Convert an image to a TileDB Group of Arrays, one per level.
:param source: path to the input image or ImageReader object
:param output_path: path to the TileDB group of arrays
:param level_min: minimum level of the image to be converted. By default set to 0
to convert all levels.
:param tiles: A mapping from dimension name (one of 'T', 'C', 'Z', 'Y', 'X') to
the (maximum) tile for this dimension.
:param tile_scale: The scaling factor applied to each tile during I/O.
Larger scale factors will result in less I/O operations.
:param preserve_axes: If true, preserve the axes order of the original image.
:param chunked: If true, convert one tile at a time instead of the whole image.
**Note**: The OpenSlideConverter may not be 100% lossless with chunked=True
for levels>0, even though the converted images look visually identical to the
original ones.
:param max_workers: Maximum number of threads that can be used for conversion.
Applicable only if chunked=True.
:param exclude_metadata: If true, drop original metadata of the images and exclude them from being ingested.
:param compressor: TileDB compression filter mapping for each level
:param log: verbose logging, defaults to None. Allows passing custom logging.Logger or boolean.
If None or bool=False it initiates an INFO level logging. If bool=True then a logger is instantiated in
DEBUG logging level.
:param reader_kwargs: Keyword arguments passed to the _ImageReaderType constructor. Allows passing configuration
parameters like tiledb.Config or/and tiledb.Ctx.
See Also :param pyramid_kwargs: Keyword arguments passed to the scaler constructor for
generating downsampled versions of the base level. Valid keyword arguments are:
scale_factors (Required): The downsampling factor for each level
scale_axes (Optional): Default "XY". The axes which will be downsampled
chunked (Optional): Default False. If true the image is split into chunks and
each one is independently downsampled. If false the entire image is
downsampled at once, but it requires more memory.
progressive (Optional): Default False. If true each downsampled image is
generated using the previous level. If false for every downsampled image
the level_min is used, but it requires more memory.
order (Optional): Default 1. The order of the spline interpolation. The order
has to be in the range 0-5. See `skimage.transform.warp` for detail.
max_workers (Optional): Default None. The maximum number of workers for
chunked downsampling. If None, it will default to the number of processors
on the machine, multiplied by 5.
"""

if log:
Expand All @@ -358,7 +367,7 @@ def to_tiledb(
reader = source
elif cls._ImageReaderType is not None:
reader = cls._ImageReaderType(
source, logger, **reader_kwargs if reader_kwargs else {}
source, logger=logger, **reader_kwargs if reader_kwargs else {}
)
else:
raise NotImplementedError(f"{cls} does not support importing")
Expand Down
15 changes: 14 additions & 1 deletion tiledb/bioimg/converters/ome_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import numpy as np
import tifffile

from tiledb import VFS, Config, Ctx
from tiledb.cc import WebpInputFormat
from tiledb.highlevel import _get_ctx

from .. import ATTR_NAME, EXPORT_TILE_SIZE, WHITE_RGBA
from ..helpers import get_decimal_from_rgba, get_logger_wrapper, get_rgba, iter_color
Expand All @@ -20,7 +22,10 @@ class OMETiffReader(ImageReader):
def __init__(
self,
input_path: str,
*,
logger: Optional[logging.Logger] = None,
config: Optional[Config] = None,
ctx: Optional[Ctx] = None,
extra_tags: Sequence[Union[str, int]] = (),
):
"""
Expand All @@ -31,14 +36,22 @@ def __init__(
"""
self._logger = get_logger_wrapper(False) if not logger else logger
self._extra_tags = extra_tags
self._tiff = tifffile.TiffFile(input_path)

# Use VFS for all paths local or remote for reading the input image
self._input_path = input_path
self._ctx = _get_ctx(ctx, config)
self._cfg = self._ctx.config()
self._vfs = VFS(config=self._cfg, ctx=self._ctx)
self._vfs_fh = self._vfs.open(input_path, mode="rb")
self._tiff = tifffile.TiffFile(self._vfs_fh)
# XXX ignore all but the first series
self._series = self._tiff.series[0]
omexml = self._tiff.ome_metadata
self._metadata = tifffile.xml2dict(omexml) if omexml else {}

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self._tiff.close()
self._vfs.close(file=self._vfs_fh)

@property
def logger(self) -> Optional[logging.Logger]:
Expand Down
16 changes: 13 additions & 3 deletions tiledb/bioimg/converters/ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
from ome_zarr.reader import OMERO, Multiscales, Reader, ZarrLocation
from ome_zarr.writer import write_multiscale

from tiledb import Config, Ctx
from tiledb.cc import WebpInputFormat
from tiledb.highlevel import _get_ctx

from .. import WHITE_RGB
from ..helpers import get_logger_wrapper, get_rgba
from ..helpers import get_logger_wrapper, get_rgba, translate_config_to_s3fs
from .axes import Axes
from .base import ImageConverter, ImageReader, ImageWriter

Expand All @@ -23,15 +25,23 @@ class OMEZarrReader(ImageReader):
def __init__(
self,
input_path: str,
*,
logger: Optional[logging.Logger] = None,
config: Optional[Config] = None,
ctx: Optional[Ctx] = None,
):
"""
OME-Zarr image reader
:param input_path: The path to the Zarr image
"""
self._logger = get_logger_wrapper(False) if not logger else logger
self._root_node = next(Reader(ZarrLocation(input_path))())
self._ctx = _get_ctx(ctx, config)
self._cfg = self._ctx.config()
storage_options = translate_config_to_s3fs(self._cfg)
input_fh = zarr.storage.FSStore(
input_path, check=True, create=True, **storage_options
)
self._root_node = next(Reader(ZarrLocation(input_fh))())
self._multiscales = cast(Multiscales, self._root_node.load(Multiscales))
self._omero = cast(Optional[OMERO], self._root_node.load(OMERO))

Expand Down
26 changes: 22 additions & 4 deletions tiledb/bioimg/converters/openslide.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,41 @@
else:
import openslide as osd

from tiledb import Config, Ctx
from tiledb.cc import WebpInputFormat
from tiledb.highlevel import _get_ctx

from ..helpers import get_logger_wrapper, iter_color
from ..helpers import cache_filepath, get_logger_wrapper, is_remote_protocol, iter_color
from . import DEFAULT_SCRATCH_SPACE
from .axes import Axes
from .base import ImageConverter, ImageReader


class OpenSlideReader(ImageReader):
def __init__(self, input_path: str, logger: Optional[logging.Logger] = None):
def __init__(
self,
input_path: str,
*,
logger: Optional[logging.Logger] = None,
config: Optional[Config] = None,
ctx: Optional[Ctx] = None,
scratch_space: str = DEFAULT_SCRATCH_SPACE,
):
"""
OpenSlide image reader
:param input_path: The path to the OpenSlide image
"""
self._ctx = _get_ctx(ctx, config)
self._cfg = self._ctx.config()
self._logger = get_logger_wrapper(False) if not logger else logger
self._osd = osd.OpenSlide(input_path)
if is_remote_protocol(input_path):
resolved_path = cache_filepath(
input_path, config, ctx, self._logger, scratch_space
)
else:
resolved_path = input_path
self._osd = osd.OpenSlide(resolved_path)

def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
self._osd.close()
Expand Down
Loading

0 comments on commit 83a9166

Please sign in to comment.