Skip to content

Commit

Permalink
Enabling config and ctx in OMEZarr Converter
Browse files Browse the repository at this point in the history
  • Loading branch information
ktsitsi committed Jun 14, 2024
1 parent 8969a1c commit b877256
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 56 deletions.
78 changes: 39 additions & 39 deletions tiledb/bioimg/converters/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,45 +304,45 @@ def to_tiledb(
pyramid_kwargs: Optional[Mapping[str, Any]] = None,
) -> Type[ImageConverter]:
"""
Convert an image to a TileDB Group of Arrays, one per level.
:param source: path to the input image or ImageReader object
:param output_path: path to the TileDB group of arrays
:param level_min: minimum level of the image to be converted. By default set to 0
to convert all levels.
:param tiles: A mapping from dimension name (one of 'T', 'C', 'Z', 'Y', 'X') to
the (maximum) tile for this dimension.
:param tile_scale: The scaling factor applied to each tile during I/O.
Larger scale factors will result in less I/O operations.
:param preserve_axes: If true, preserve the axes order of the original image.
:param chunked: If true, convert one tile at a time instead of the whole image.
**Note**: The OpenSlideConverter may not be 100% lossless with chunked=True
for levels>0, even though the converted images look visually identical to the
original ones.
:param max_workers: Maximum number of threads that can be used for conversion.
Applicable only if chunked=True.
:param exclude_metadata: If true, drop original metadata of the images and exclude them from being ingested.
:param compressor: TileDB compression filter mapping for each level
:param log: verbose logging, defaults to None. Allows passing custom logging.Logger or boolean.
If None or bool=False it initiates an INFO level logging. If bool=True then a logger is instantiated in
DEBUG logging level.
:param reader_kwargs: Keyword arguments passed to the _ImageReaderType constructor. Allows passing configuration
parameters like tiledb.Config or/and tiledb.Ctx.
:param pyramid_kwargs: Keyword arguments passed to the scaler constructor for
generating downsampled versions of the base level. Valid keyword arguments are:
scale_factors (Required): The downsampling factor for each level
scale_axes (Optional): Default "XY". The axes which will be downsampled
chunked (Optional): Default False. If true the image is split into chunks and
each one is independently downsampled. If false the entire image is
downsampled at once, but it requires more memory.
progressive (Optional): Default False. If true each downsampled image is
generated using the previous level. If false for every downsampled image
the level_min is used, but it requires more memory.
order (Optional): Default 1. The order of the spline interpolation. The order
has to be in the range 0-5. See `skimage.transform.warp` for detail.
max_workers (Optional): Default None. The maximum number of workers for
chunked downsampling. If None, it will default to the number of processors
on the machine, multiplied by 5.
Convert an image to a TileDB Group of Arrays, one per level.
:param source: path to the input image or ImageReader object
:param output_path: path to the TileDB group of arrays
:param level_min: minimum level of the image to be converted. By default set to 0
to convert all levels.
:param tiles: A mapping from dimension name (one of 'T', 'C', 'Z', 'Y', 'X') to
the (maximum) tile for this dimension.
:param tile_scale: The scaling factor applied to each tile during I/O.
Larger scale factors will result in less I/O operations.
:param preserve_axes: If true, preserve the axes order of the original image.
:param chunked: If true, convert one tile at a time instead of the whole image.
**Note**: The OpenSlideConverter may not be 100% lossless with chunked=True
for levels>0, even though the converted images look visually identical to the
original ones.
:param max_workers: Maximum number of threads that can be used for conversion.
Applicable only if chunked=True.
:param exclude_metadata: If true, drop original metadata of the images and exclude them from being ingested.
:param compressor: TileDB compression filter mapping for each level
:param log: verbose logging, defaults to None. Allows passing custom logging.Logger or boolean.
If None or bool=False it initiates an INFO level logging. If bool=True then a logger is instantiated in
DEBUG logging level.
:param reader_kwargs: Keyword arguments passed to the _ImageReaderType constructor. Allows passing configuration
parameters like tiledb.Config or/and tiledb.Ctx.
See Also :param pyramid_kwargs: Keyword arguments passed to the scaler constructor for
generating downsampled versions of the base level. Valid keyword arguments are:
scale_factors (Required): The downsampling factor for each level
scale_axes (Optional): Default "XY". The axes which will be downsampled
chunked (Optional): Default False. If true the image is split into chunks and
each one is independently downsampled. If false the entire image is
downsampled at once, but it requires more memory.
progressive (Optional): Default False. If true each downsampled image is
generated using the previous level. If false for every downsampled image
the level_min is used, but it requires more memory.
order (Optional): Default 1. The order of the spline interpolation. The order
has to be in the range 0-5. See `skimage.transform.warp` for detail.
max_workers (Optional): Default None. The maximum number of workers for
chunked downsampling. If None, it will default to the number of processors
on the machine, multiplied by 5.
"""

if log:
Expand Down
5 changes: 3 additions & 2 deletions tiledb/bioimg/converters/ome_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
import numpy as np
import tifffile

from tiledb.cc import WebpInputFormat
from tiledb import VFS, Config, Ctx
from tiledb.cc import WebpInputFormat

from .. import ATTR_NAME, EXPORT_TILE_SIZE, WHITE_RGBA
from ..helpers import get_decimal_from_rgba, get_logger_wrapper, get_rgba, iter_color
from .axes import Axes
Expand Down Expand Up @@ -37,7 +38,7 @@ def __init__(
# Use VFS for all paths local or remote for reading the input image
self._input_path = input_path
self._vfs = VFS(config=config, ctx=ctx)
self._vfs_fh = self._vfs.open(input_path, mode='rb')
self._vfs_fh = self._vfs.open(input_path, mode="rb")
self._tiff = tifffile.TiffFile(self._vfs_fh)
# XXX ignore all but the first series
self._series = self._tiff.series[0]
Expand Down
16 changes: 8 additions & 8 deletions tiledb/bioimg/converters/ome_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
from ome_zarr.reader import OMERO, Multiscales, Reader, ZarrLocation
from ome_zarr.writer import write_multiscale

from tiledb.cc import WebpInputFormat
from tiledb import Config, Ctx
from tiledb.cc import WebpInputFormat

from .. import WHITE_RGB
from ..helpers import get_logger_wrapper, get_rgba
from ..helpers import get_logger_wrapper, get_rgba, translate_config_to_s3fs
from .axes import Axes
from .base import ImageConverter, ImageReader, ImageWriter

Expand All @@ -24,20 +24,20 @@ class OMEZarrReader(ImageReader):
def __init__(
self,
input_path: str,
logger: Optional[logging.Logger] = None,
config: Optional[Config] = None,
ctx: Optional[Ctx] = None,
logger: Optional[logging.Logger] = None,
):
"""
OME-Zarr image reader
:param input_path: The path to the Zarr image
"""
self._logger = get_logger_wrapper(False) if not logger else logger
storage_options = {'key': config.get('vfs.s3.access_aws_access_key_id', None),
'secret': config.get('vfs.s3.aws_secret_access_key', None)}
store = zarr.storage.FSStore(input_path, check=True, create=True, **storage_options)
self._root_node = next(Reader(ZarrLocation(input_path))())
storage_options = translate_config_to_s3fs(config, ctx)
input_fh = zarr.storage.FSStore(
input_path, check=True, create=True, **storage_options
)
self._root_node = next(Reader(ZarrLocation(input_fh))())
self._multiscales = cast(Multiscales, self._root_node.load(Multiscales))
self._omero = cast(Optional[OMERO], self._root_node.load(OMERO))

Expand Down
40 changes: 37 additions & 3 deletions tiledb/bioimg/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,24 @@
import os
import sys
from pathlib import Path
from typing import Any, Dict, Iterator, Mapping, MutableMapping, Sequence, Tuple, Optional
from typing import (
Any,
Dict,
Iterator,
Mapping,
MutableMapping,
Optional,
Sequence,
Tuple,
)
from urllib.parse import urlparse

import numpy as np

import tiledb
from tiledb import Config, Ctx
from tiledb.cc import WebpInputFormat
from tiledb.highlevel import _get_ctx

from . import ATTR_NAME
from .converters.axes import Axes, AxesMapper
Expand Down Expand Up @@ -79,7 +89,11 @@ def get_or_create(self, name: str, schema: tiledb.ArraySchema) -> Tuple[str, boo


def open_bioimg(
uri: str, mode: str = "r", attr: str = ATTR_NAME, config: Config = None, ctx: Optional[Ctx] = None
uri: str,
mode: str = "r",
attr: str = ATTR_NAME,
config: Config = None,
ctx: Optional[Ctx] = None,
) -> tiledb.Array:
return tiledb.open(
uri, mode=mode, attr=attr if mode == "r" else None, config=config, ctx=ctx
Expand Down Expand Up @@ -141,7 +155,9 @@ def get_axes_mapper(
return axes_mapper, dim_names, tiles


def iter_levels_meta(group: tiledb.Group, config: Config = None, ctx: Optional[Ctx] = None) -> Iterator[Mapping[str, Any]]:
def iter_levels_meta(
group: tiledb.Group, config: Config = None, ctx: Optional[Ctx] = None
) -> Iterator[Mapping[str, Any]]:
for o in group:
with open_bioimg(o.uri, config=config, ctx=ctx) as array:
try:
Expand Down Expand Up @@ -326,3 +342,21 @@ def get_logger_wrapper(
)

return logger


def translate_config_to_s3fs(
config: tiledb.Config, ctx: tiledb.Ctx
) -> Mapping[str, Any]:
ctx = _get_ctx(ctx, config)
cfg = ctx.config()
storage_options = {
"key": cfg.get("vfs.s3.aws_access_key_id", None) or None,
"secret": cfg.get("vfs.s3.aws_secret_access_key", None) or None,
"token": cfg.get("vfs.s3.aws_session_token", None) or None,
"endpoint_url": cfg.get("vfs.s3.endpoint_override", "") or None,
"max_concurrency": int(
cfg.get("vfs.s3.max_parallel_ops", cfg.get("sm.io_concurrency_level"))
),
"client_kwargs": {"region_name": cfg.get("vfs.s3.region", "") or None},
}
return storage_options
16 changes: 12 additions & 4 deletions tiledb/bioimg/openslide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import warnings
from operator import attrgetter
from typing import Any, Mapping, MutableMapping, Sequence, Tuple, Union, Optional
from typing import Any, Mapping, MutableMapping, Optional, Sequence, Tuple, Union

import numpy as np

Expand Down Expand Up @@ -31,8 +31,14 @@ def from_group_uri(cls, uri: str, attr: str = ATTR_NAME) -> TileDBOpenSlide:
)
return cls(uri, attr=attr)

def __init__(self, uri: str, *, attr: str = ATTR_NAME, config: Config = None, ctx: Optional[Ctx] = None,
):
def __init__(
self,
uri: str,
*,
attr: str = ATTR_NAME,
config: Config = None,
ctx: Optional[Ctx] = None,
):
"""Open this TileDBOpenSlide.
:param uri: uri of a tiledb.Group containing the image
Expand All @@ -42,7 +48,9 @@ def __init__(self, uri: str, *, attr: str = ATTR_NAME, config: Config = None, ct
pixel_depth = dict(json.loads(pixel_depth)) if pixel_depth else {}
self._levels = sorted(
(
TileDBOpenSlideLevel(o.uri, pixel_depth, attr=attr, config=config, ctx=ctx)
TileDBOpenSlideLevel(
o.uri, pixel_depth, attr=attr, config=config, ctx=ctx
)
for o in self._group
),
key=attrgetter("level"),
Expand Down

0 comments on commit b877256

Please sign in to comment.