-
Notifications
You must be signed in to change notification settings - Fork 10
/
options.py
180 lines (135 loc) · 5.21 KB
/
options.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
"""Enums and other types used as options across methods of many types.
These types are *concrete* and should be used as-is as inputs to the various
SOMA types that require them, not reimplemented by the implementing package.
"""
import enum
from typing import Any, Dict, Mapping, Optional, Sequence, TypeVar, Union
import attrs
import numpy as np
import numpy.typing as npt
import pyarrow as pa
from typing_extensions import Final, Literal
from . import types
SOMA_JOINID: Final = "soma_joinid"
"""Global constant for the SOMA join ID."""
OpenMode = Literal["r", "w"]
"""How to open a SOMA object: read or write."""
class ReadPartitions:
"""
Sentinel base class for read-partition types.
Lifecycle:
Experimental
"""
__slots__ = ()
@attrs.define(frozen=True)
class IOfN(ReadPartitions):
"""Specifies that a read should return partition ``i`` out of ``n`` total.
For a read operation that returns ``n`` partitions, the read operation will
return the ``i``th partition (zero-indexed) out of ``n`` partitions of
approximately equal size.
Lifecycle:
Experimental
"""
i: int = attrs.field()
"""Which partition to return (zero-indexed)."""
n: int = attrs.field()
"""How many partitions there will be."""
@i.validator
def _validate(self, _, __):
del _, __ # Unused.
if not 0 <= self.i < self.n:
raise ValueError(
f"Partition index {self.i} must be in the range [0, {self.n})"
)
@attrs.define(frozen=True)
class BatchSize:
"""Specifies the size of a batch that should be returned from reads.
Read operations on foundational types return an iterator over "batches" of
data, enabling processing of larger-than-core datasets. This class allows
you to control what the size of those batches is.
If none of these options are set, a "reasonable" batch size is determined
automatically.
For example::
BatchSize(count=100)
# Will return batches of 100 elements.
BatchSize(bytes=1024 ** 2)
# Will return batches of up to 1 MB.
BatchSize()
# Will return automatically-sized batches.
Lifecycle:
Experimental
"""
count: Optional[int] = attrs.field(default=None)
"""``arrow.Table``s with this number of rows will be returned."""
bytes: Optional[int] = attrs.field(default=None)
"""Data of up to this size in bytes will be returned."""
@count.validator
@bytes.validator
def _validate(self, attr: attrs.Attribute, value):
if not value:
return # None (or 0, which we treat equivalently) is always valid.
if value < 0:
raise ValueError(f"If set, '{attr.name}' must be positive")
if self.count and self.bytes:
raise ValueError("Either 'count' or 'bytes' may be set, not both")
PlatformConfig = Union[Dict[str, Mapping[str, Any]], object]
"""Type alias for the ``platform_config`` parameter.
``platform_config`` allows platform-specific configuration data to be passed
to individual calls. This is either a ``dict``, or an implementation-specific
configuration object:
- If a dictionary, the keys to the dictionary are the name of a SOMA
implementation, and the value of the dictionary is configuration specific to
that implementation.
- If an implementation-specific object, that implementation will use that object
for configuration data; it will be ignored by others.
See the "Per-call configuration" section of the main SOMA specifiction.
"""
class ResultOrder(enum.Enum):
"""
The order results should be returned in.
Lifecycle:
Experimental
"""
AUTO = "auto"
ROW_MAJOR = "row-major"
COLUMN_MAJOR = "column-major"
ResultOrderStr = Union[ResultOrder, Literal["auto", "row-major", "column-major"]]
"""A ResultOrder, or the str representing it."""
DenseCoord = Union[None, int, types.Slice[int]]
"""A single coordinate range for reading dense data.
``None`` indicates the entire domain of a dimension; values of this type are
not ``Optional``, but may be ``None``.
"""
DenseNDCoords = Sequence[DenseCoord]
"""A sequence of ranges to read dense data."""
_T = TypeVar("_T")
ValSliceOrSequence = Union[_T, types.Slice[_T], types.Sequence[_T]]
"""A value of a type, a Slice of that type, or a Sequence of that type."""
# NOTE: Keep this in sync with the types accepted in `_canonicalize_coord`
# in ./query/axis.py.
SparseDFCoord = Union[
None,
ValSliceOrSequence[bytes],
ValSliceOrSequence[float],
ValSliceOrSequence[int],
ValSliceOrSequence[slice],
ValSliceOrSequence[str],
ValSliceOrSequence[np.datetime64],
ValSliceOrSequence[pa.TimestampType],
pa.Array,
pa.ChunkedArray,
npt.NDArray[np.integer],
npt.NDArray[np.datetime64],
]
"""A single coordinate range for one dimension of a sparse dataframe."""
SparseDFCoords = Sequence[SparseDFCoord]
"""A sequence of coordinate ranges for reading dense dataframes."""
SparseNDCoord = Union[
None,
ValSliceOrSequence[int],
npt.NDArray[np.integer],
pa.IntegerArray,
]
"""A single coordinate range for one dimension of a sparse nd-array."""
SparseNDCoords = Sequence[SparseNDCoord]
"""A sequence of coordinate ranges for reading sparse ndarrays."""