From e4a3bd9079b9b165db8ea74932ce02b159cd6dd7 Mon Sep 17 00:00:00 2001 From: Freddie Witherden Date: Sat, 7 Dec 2024 09:27:49 -0800 Subject: [PATCH] Add support for array-based bulk insertion. --- rtree/core.py | 17 +++++++++++++++++ rtree/index.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/rtree/core.py b/rtree/core.py index 1bd2b80d..e5648f7a 100644 --- a/rtree/core.py +++ b/rtree/core.py @@ -125,6 +125,23 @@ def free_error_msg_ptr(result, func, cargs): rt.Index_CreateWithStream.restype = ctypes.c_void_p rt.Index_CreateWithStream.errcheck = check_void # type: ignore +try: + rt.Index_CreateWithArray.argtypes = [ + ctypes.c_void_p, + ctypes.c_uint64, + ctypes.c_uint32, + ctypes.c_uint64, + ctypes.c_uint64, + ctypes.c_uint64, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_void_p + ] + rt.Index_CreateWithArray.restype = ctypes.c_void_p + rt.Index_CreateWithArray.errcheck = check_void # type: ignore +except AttributeError: + pass + rt.Index_Destroy.argtypes = [ctypes.c_void_p] rt.Index_Destroy.restype = None rt.Index_Destroy.errcheck = check_void_done # type: ignore diff --git a/rtree/index.py b/rtree/index.py index 90e53a54..1905cab9 100644 --- a/rtree/index.py +++ b/rtree/index.py @@ -207,20 +207,26 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.interleaved = bool(kwargs.get("interleaved", True)) stream = None + arrays = None basename = None storage = None if args: if isinstance(args[0], str) or isinstance(args[0], bytes): # they sent in a filename basename = args[0] - # they sent in a filename, stream + # they sent in a filename, stream or filename, buffers if len(args) > 1: - stream = args[1] + if isinstance(args[1], tuple): + arrays = args[1] + else: + stream = args[1] elif isinstance(args[0], ICustomStorage): storage = args[0] # they sent in a storage, stream if len(args) > 1: stream = args[1] + elif isinstance(args[0], tuple): + arrays = args[0] else: stream = args[0] @@ -272,6 +278,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if stream and self.properties.type == RT_RTree: self._exception = None self.handle = self._create_idx_from_stream(stream) + if self._exception: + raise self._exception + elif arrays and self.properties.type == RT_RTree: + self._exception = None + + try: + self.handle = self._create_idx_from_array(*arrays) + except AttributeError: + raise NotImplementedError("libspatialindex >= 2.1 needed for bulk insert") + if self._exception: raise self._exception else: @@ -279,6 +295,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if stream: # Bulk insert not supported, so add one by one for item in stream: self.insert(*item) + elif arrays: + raise NotImplementedError("Bulk insert only supported for RTrees") def get_size(self) -> int: warnings.warn( @@ -1250,6 +1268,28 @@ def py_next_item(p_id, p_mins, p_maxs, p_dimension, p_data, p_length): stream = core.NEXTFUNC(py_next_item) return IndexStreamHandle(self.properties.handle, stream) + def _create_idx_from_array(self, ibuf, minbuf, maxbuf): + assert len(ibuf) == len(minbuf) + assert len(ibuf) == len(maxbuf) + assert minbuf.strides == maxbuf.strides + + # Cast + ibuf = ibuf.astype(int) + minbuf = minbuf.astype(float) + maxbuf = maxbuf.astype(float) + + # Extract counts + n, d = minbuf.shape + + # Compute strides + i_stri = ibuf.strides[0] // 8 + d_i_stri = minbuf.strides[0] // 8 + d_j_stri = minbuf.strides[1] // 8 + + return IndexArrayHandle(self.properties.handle, n, d, i_stri, + d_i_stri, d_j_stri, ibuf.ctypes.data, + minbuf.ctypes.data, maxbuf.ctypes.data) + def leaves(self): leaf_node_count = ctypes.c_uint32() p_leafsizes = ctypes.pointer(ctypes.c_uint32()) @@ -1431,6 +1471,10 @@ class IndexStreamHandle(IndexHandle): _create = core.rt.Index_CreateWithStream +class IndexArrayHandle(IndexHandle): + _create = core.rt.Index_CreateWithArray + + class PropertyHandle(Handle): _create = core.rt.IndexProperty_Create _destroy = core.rt.IndexProperty_Destroy