Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] Re-introduce tiledb_ctx for SOMATileDBContext #3335

Merged
merged 7 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 139 additions & 34 deletions apis/python/src/tiledbsoma/options/_soma_tiledb_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, Literal, Mapping, Optional, Union
from typing import Any, Dict, Literal, Mapping

from somacore import ContextBase
from typing_extensions import Self
Expand All @@ -19,16 +19,36 @@
from .._types import OpenTimestamp
from .._util import ms_to_datetime, to_timestamp_ms

try:
from tiledb import Ctx as TileDBCtx

TILEDB_EXISTS = True

Check warning on line 25 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L25

Added line #L25 was not covered by tests
except ModuleNotFoundError:
# If we set this to None, then the type hint TileDBCtx | None in the code
# below will error out with:
# TypeError: unsupported operand type(s) for |: 'NoneType' and 'NoneType'
# We prevent this by using Any as a catch-all type. This allows the type hint
# to remain compatible even when TileDB is not installed, without causing any
# issues at runtime or with type checking tools
TileDBCtx = Any
TILEDB_EXISTS = False


def _check_tiledb_ctx() -> None:
if not TILEDB_EXISTS:
raise ModuleNotFoundError(
"The 'tiledb' module is required to access 'tiledb_ctx' but is "
"not installed."
)


def _default_config(
override: Mapping[str, Union[str, float]]
) -> Dict[str, Union[str, float]]:
def _default_config(override: Mapping[str, str | float]) -> Dict[str, str | float]:
"""Returns a fresh dictionary with TileDB config values.

These should be reasonable defaults that can be used out-of-the-box.
``override`` does exactly what it says: overrides default entries.
"""
cfg: Dict[str, Union[str, float]] = {
cfg: Dict[str, str | float] = {
"sm.mem.reader.sparse_global_order.ratio_array_data": 0.3
}
cfg.update(override)
Expand All @@ -41,7 +61,7 @@
return clib.SOMAContext({k: str(v) for k, v in _default_config({}).items()})


def _maybe_timestamp_ms(input: Optional[OpenTimestamp]) -> Optional[int]:
def _maybe_timestamp_ms(input: OpenTimestamp | None) -> int | None:
if input is None:
return None
return to_timestamp_ms(input)
Expand All @@ -66,26 +86,35 @@

def __init__(
self,
tiledb_config: Optional[Dict[str, Union[str, float]]] = None,
timestamp: Optional[OpenTimestamp] = None,
threadpool: Optional[ThreadPoolExecutor] = None,
tiledb_config: Dict[str, str | float] | None = None,
tiledb_ctx: TileDBCtx | None = None,
timestamp: OpenTimestamp | None = None,
threadpool: ThreadPoolExecutor | None = None,
) -> None:
"""Initializes a new SOMATileDBContext.

Either ``tiledb_ctx`` or ``tiledb_config`` may be provided, or both may
be left at their default. If neither are provided, this will use a
single shared :class:`tiledb.Ctx` instantiated upon first use.
If ``tiledb_ctx`` is provided, that exact :class:`tiledb.Ctx` is used.
Either ``tiledb_config`` or ``tiledb_ctx`` may be provided, or both may
be left at their default.

If a ``tiledb_config`` is provided (in the form of a ``dict``),
it is used to construct a new ``Ctx``.
it is used to construct a new ``SOMAContext``.

Args:
tiledb_ctx: An existing TileDB Context for use as the TileDB Context
in this configuration.
If `a `tiledb_ctx`` is provided, then it uses the configuration options
to construct a new ``SOMAContext``. Note that ``SOMAContext`` will create
a new ``Context`` object. The ``tiledb_ctx`` option is only available if
the `tiledb` module is installed; otherwise, it will throw a ````ModuleNotFoundError``.

If neither are provided, this will use a single shared :class:`SOMAContext`
instantiated upon first use.

Args:
tiledb_config: A set of TileDB configuration options to use,
overriding the default configuration.

tiledb_ctx: A TileDB Context where the set of TileDB
configuration options are used to override the default
configuration.

timestamp: The default timestamp for operations on SOMA objects,
provided either as a ``datetime.datetime`` or a number of
milliseconds since the Unix epoch.
Expand Down Expand Up @@ -117,30 +146,50 @@
provided, a new ThreadPoolExecutor will be created with
default settings.
"""
if tiledb_ctx is not None and tiledb_config is not None:
raise ValueError(

Check warning on line 150 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L150

Added line #L150 was not covered by tests
"only one of tiledb_config or tiledb_ctx"
" may be set when constructing a SOMATileDBContext"
)

# A TileDB Context may only be passed if tiledb is installed
if tiledb_ctx is not None:
_check_tiledb_ctx()

self._lock = threading.Lock()
"""A lock to ensure single initialization of ``_tiledb_ctx``."""
self._initial_config: Optional[Dict[str, Union[str, float]]] = (

self._initial_config = (
None if tiledb_config is None else _default_config(tiledb_config)
)
"""A dictionary of options to override the default TileDB config.

This includes both the user-provided options and the default options
that we provide to TileDB. If this is unset, then either we were
provided with a TileDB Ctx, or we need to use The Default Global Ctx.
"""

self._tiledb_ctx = tiledb_ctx
"""The TileDB context passed in by the user. None if not provided."""

"""The TileDB context to use, either provided or lazily constructed."""
self._timestamp_ms = _maybe_timestamp_ms(timestamp)

self.threadpool = threadpool or ThreadPoolExecutor()
"""User specified threadpool. If None, we'll instantiate one ourselves."""
self._native_context: Optional[clib.SOMAContext] = None

self._native_context: clib.SOMAContext | None = None
"""Lazily construct clib.SOMAContext."""

@property
def timestamp_ms(self) -> Optional[int]:
def timestamp_ms(self) -> int | None:
"""
The default timestamp for SOMA operations, as milliseconds since
the Unix epoch, or ``None`` if not provided.
"""
return self._timestamp_ms

@property
def timestamp(self) -> Optional[datetime.datetime]:
def timestamp(self) -> datetime.datetime | None:
"""
The default timestamp for SOMA operations, or ``None`` if not provided.
"""
Expand All @@ -153,17 +202,53 @@
"""The C++ SOMAContext for this SOMA context."""
with self._lock:
if self._native_context is None:
if self._initial_config is None:
self._native_context = _default_global_native_context()
else:
# SOMAContext needs to be lazily created
if self._initial_config is not None:
# The user passed in a tiledb_config
cfg = self._internal_tiledb_config()
self._native_context = clib.SOMAContext(
{k: str(v) for k, v in cfg.items()}
)
return self._native_context
elif self._tiledb_ctx is not None:
# The user passed in a tiledb_ctx; if tiledb is not installed
# it should be impossible to enter into this block because
# we already check that in the constructor
assert TileDBCtx is not None
cfg = self._tiledb_ctx.config().dict()
self._native_context = clib.SOMAContext(

Check warning on line 218 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L216-L218

Added lines #L216 - L218 were not covered by tests
{k: str(v) for k, v in cfg.items()}
)
else:
# The user did not provide settings so create a default
self._native_context = _default_global_native_context()

# else SOMAContext already exists
return self._native_context

@property
def tiledb_config(self) -> Dict[str, Union[str, float]]:
def tiledb_ctx(self) -> TileDBCtx | None:
"""The TileDB-Py Context passed in to create the ``SOMATileDBContext``.

This accessor is only available if tiledb is installed. If
``SOMATileDBContext`` was constructed with a ``tiledb_ctx``, this returns
the ``tiledb.Ctx`` that was passed in. If it was not constructed with
a ``tiledb_ctx`` (meaning, either using `tiledb_config` or with default
settings), then this will return ``None``.

Note that when constructing ``SOMATileDBContext`` with ``tiledb_ctx``,
internally it uses ``tiledb_ctx.config().dict()`` and constructs a new
``tiledb::Context`` object from the configuration options. Meaning, the
``Context`` passed-in as ``tiledb_ctx`` is NOT the same ``Context`` held
by ``SOMATileDBContext``.

If `tiledb` is not installed, this accessor throws a ``ModuleNotFoundError``
error.
"""
_check_tiledb_ctx()
return self._tiledb_ctx

@property
def tiledb_config(self) -> Dict[str, str | float]:
"""The TileDB configuration dictionary for this SOMA context.

If this ``SOMATileDBContext`` already has a ``tiledb_ctx``, this will
Expand All @@ -177,7 +262,7 @@
with self._lock:
return self._internal_tiledb_config()

def _internal_tiledb_config(self) -> Dict[str, Union[str, float]]:
def _internal_tiledb_config(self) -> Dict[str, str | float]:
"""Internal function for getting the TileDB Config.

Returns a new dict with the contents. Caller must hold ``_lock``.
Expand All @@ -186,6 +271,11 @@
if self._native_context is not None:
return dict(self._native_context.config())

# The user passed in a TileDB Context. Return its actual config.
if self._tiledb_ctx is not None:
_check_tiledb_ctx()
return dict(self._tiledb_ctx.config())

Check warning on line 277 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L276-L277

Added lines #L276 - L277 were not covered by tests

# Our context has not yet been built.
# We return what will be passed into the context.
return (
Expand All @@ -197,19 +287,20 @@
def replace(
self,
*,
tiledb_config: Optional[Dict[str, Any]] = None,
timestamp: Union[None, OpenTimestamp, _Unset] = _UNSET,
threadpool: Union[None, ThreadPoolExecutor, _Unset] = _UNSET,
tiledb_config: Dict[str, Any] | None = None,
tiledb_ctx: TileDBCtx | None = None,
timestamp: OpenTimestamp | _Unset | None = _UNSET,
threadpool: ThreadPoolExecutor | _Unset | None = _UNSET,
) -> Self:
"""Create a copy of the context, merging changes.

Args:
tiledb_config:
A dictionary of parameters for `tiledb.Config() <https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html#config>`_.
A dictionary of parameters representing `tiledb.Config() <https://tiledb-inc-tiledb.readthedocs-hosted.com/projects/tiledb-py/en/stable/python-api.html#config>`_.
To remove a parameter from the existing config, provide ``None``
as the value.
tiledb_ctx:
A TileDB Context to replace the current context with.
A TileDB Context to pull the configuration options from.
timestamp:
A timestamp to replace the current timestamp with.
Explicitly passing ``None`` will remove the timestamp.
Expand All @@ -230,10 +321,18 @@
"""
with self._lock:
if tiledb_config is not None:
if tiledb_ctx:
raise ValueError(

Check warning on line 325 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L325

Added line #L325 was not covered by tests
"Either tiledb_config or tiledb_ctx may be provided"
" to replace(), but not both."
)
new_config = self._internal_tiledb_config()
new_config.update(tiledb_config)
tiledb_config = {k: v for (k, v) in new_config.items() if v is not None}

if tiledb_ctx is not None:
_check_tiledb_ctx()

if timestamp == _UNSET:
# Keep the existing timestamp if not overridden.
timestamp = self._timestamp_ms
Expand All @@ -244,11 +343,12 @@
assert timestamp is None or isinstance(timestamp, (datetime.datetime, int))
return type(self)(
tiledb_config=tiledb_config,
tiledb_ctx=tiledb_ctx,
timestamp=timestamp,
threadpool=threadpool,
)

def _open_timestamp_ms(self, in_timestamp: Optional[OpenTimestamp]) -> int:
def _open_timestamp_ms(self, in_timestamp: OpenTimestamp | None) -> int:
"""Returns the real timestamp that should be used to open an object."""
if in_timestamp is not None:
return to_timestamp_ms(in_timestamp)
Expand All @@ -268,6 +368,11 @@
if context is None:
return SOMATileDBContext()

if TILEDB_EXISTS and isinstance(context, TileDBCtx):
raise TypeError(

Check warning on line 372 in apis/python/src/tiledbsoma/options/_soma_tiledb_context.py

View check run for this annotation

Codecov / codecov/patch

apis/python/src/tiledbsoma/options/_soma_tiledb_context.py#L372

Added line #L372 was not covered by tests
"context is a tiledb.Ctx, not a SOMATileDBContext -- please wrap it in tiledbsoma.SOMATileDBContext(...)"
)

if not isinstance(context, SOMATileDBContext):
raise TypeError("context is not a SOMATileDBContext")

Expand Down
3 changes: 2 additions & 1 deletion apis/python/src/tiledbsoma/soma_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void load_soma_context(py::module& m) {
py::class_<SOMAContext, std::shared_ptr<SOMAContext>>(m, "SOMAContext")
.def(py::init<>())
.def(py::init<std::map<std::string, std::string>>())
.def("config", &SOMAContext::tiledb_config);
.def("config", &SOMAContext::tiledb_config)
.def("tiledb_ctx", &SOMAContext::tiledb_ctx);
};
} // namespace libtiledbsomacpp
38 changes: 38 additions & 0 deletions apis/python/tests/test_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import importlib
import time
from unittest import mock

Expand Down Expand Up @@ -122,3 +123,40 @@ def test_malformed_concurrency_config_value():
tiledbsoma.IntIndexer(np.arange(100, dtype=np.int64), context=ctx).get_indexer(
np.array([0, 1])
)


@pytest.mark.skipif(
importlib.util.find_spec("tiledb") is not None, reason="TileDB-Py is installed"
)
def test_tiledb_ctx_without_tiledb():
# Test that tiledb_ctx errors out as expected without tiledb-py

with pytest.raises(ModuleNotFoundError):
tiledbsoma.SOMATileDBContext(tiledb_ctx="junk")

sctx = tiledbsoma.SOMATileDBContext()
with pytest.raises(ModuleNotFoundError):
sctx.tiledb_ctx

with pytest.raises(ModuleNotFoundError):
sctx.replace(tiledb_ctx="junk")


@pytest.mark.skipif(
importlib.util.find_spec("tiledb") is None, reason="TileDB-Py is not installed"
)
def test_tiledb_ctx_with_tiledb():
# If tiledb-py is installed, test that tiledb_ctx works to handle tiledb.Ctx
import tiledb

# Default
sctx = tiledbsoma.SOMATileDBContext(tiledb_ctx=tiledb.Ctx())
assert sctx.tiledb_ctx.config() == tiledb.Ctx().config()

# Pass config
sctx = tiledbsoma.SOMATileDBContext(tiledb_ctx=tiledb.Ctx({"foo": "bar"}))
assert sctx.tiledb_ctx.config()["foo"] == "bar"

# Replace config
sctx = sctx.replace(tiledb_ctx=tiledb.Ctx({"foo": "baz"}))
assert sctx.tiledb_ctx.config()["foo"] == "baz"