Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix memory leak #151

Merged
merged 8 commits into from
May 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ steps:
architecture: 'x64'

- script: |
python -m pip install --upgrade pip setuptools wheel numpy tox setuptools-scm cython
python -m pip install --upgrade pip setuptools wheel numpy tox setuptools-scm cython psutil
displayName: 'Install dependencies'

- script: |
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ numpy>=1.7.2
setuptools>=18.0.1
setuptools-scm>=1.5.4
wheel>=0.30.0
psutil
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ setuptools==40.8.0
setuptools-scm==1.5.4
wheel==0.30.0
tox==3.0.0
psutil
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_zipfile(url):
from sys import version_info as ver

# Target branch
TILEDB_VERSION = "dev"
TILEDB_VERSION = "1.5.1"

# Use `setup.py [] --debug` for a debug build of libtiledb
TILEDB_DEBUG_BUILD = False
Expand Down
84 changes: 50 additions & 34 deletions tiledb/libtiledb.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ from cpython.bytes cimport (PyBytes_GET_SIZE,
PyBytes_FromString,
PyBytes_FromStringAndSize)

from cpython.mem cimport (PyMem_Malloc,
PyMem_Realloc,
PyMem_Free)

from cpython.ref cimport (Py_INCREF, Py_DECREF, PyTypeObject)

Expand Down Expand Up @@ -52,6 +49,10 @@ cdef extern from "numpy/arrayobject.h":
object obj)
# Steals a reference to dtype, need to incref the dtype
object PyArray_Scalar(void* ptr, np.dtype descr, object itemsize)
void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
void* PyDataMem_NEW(size_t nbytes)
void* PyDataMem_RENEW(void* data, size_t nbytes)
void PyDataMem_FREE(void* data)

import sys
from os.path import abspath
Expand All @@ -72,7 +73,8 @@ _MB = 1024 * _KB
# The native int type for this platform
IntType = np.dtype(np.int_)

# Numpy initialization code
# Numpy initialization code (critical)
# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array
np.import_array()

def version():
Expand Down Expand Up @@ -2375,7 +2377,7 @@ cdef class KV(object):

@staticmethod
def create(uri, KVSchema schema, key=None, Ctx ctx=default_ctx()):
"""Creates a persistent KV at the given URI, returns a KV class instance
"""Creates a persistent KV at the given URI
"""
cdef tiledb_ctx_t* ctx_ptr = ctx.ptr
cdef bytes buri = unicode_path(uri)
Expand All @@ -2402,7 +2404,7 @@ cdef class KV(object):
if rc != TILEDB_OK:
_raise_ctx_err(ctx_ptr, rc)

return KV(uri, key=key, ctx=ctx)
return

def __init__(self, uri, mode='r', key=None, timestamp=None, Ctx ctx=default_ctx()):
cdef tiledb_ctx_t* ctx_ptr = ctx.ptr
Expand Down Expand Up @@ -3403,6 +3405,7 @@ cdef class Array(object):
cdef uint64_t _timestamp = 0
if timestamp is not None:
_timestamp = <uint64_t> timestamp

# allocate and then open the array
cdef tiledb_array_t* array_ptr = NULL
cdef int rc = TILEDB_OK
Expand All @@ -3429,9 +3432,12 @@ cdef class Array(object):

# view on a single attribute
if attr and not any(attr == schema.attr(i).name for i in range(schema.nattr)):
tiledb_array_close(ctx_ptr, array_ptr)
tiledb_array_free(&array_ptr)
raise KeyError("No attribute matching '{}'".format(attr))
else:
self.view_attr = unicode(attr) if (attr is not None) else None

self.ctx = ctx
self.uri = unicode(uri)
self.mode = unicode(mode)
Expand Down Expand Up @@ -3817,11 +3823,11 @@ cdef class Array(object):
# note: must not divide by itemsize for a string, because it may be zero (e.g 'S0')
dims[0] = el_bytelen / el_dtype.base.itemsize
newobj = \
PyArray_NewFromDescr(
np.copy(PyArray_NewFromDescr(
<PyTypeObject*> np.ndarray,
el_dtype.base, 1, dims, NULL,
el_ptr,
np.NPY_ENSURECOPY, <object> NULL)
0, <object> NULL))

# set the output object
out_flat[el] = newobj
Expand All @@ -3840,7 +3846,6 @@ cdef class ReadQuery(object):
@property
def _offsets(self): return self._offsets


def __init__(self, Array array, np.ndarray subarray, list attr_names, tiledb_layout_t layout):
self._buffers = dict()
self._offsets = dict()
Expand All @@ -3854,8 +3859,10 @@ cdef class ReadQuery(object):
cdef:
vector [void*] buffer_ptrs
vector [uint64_t*] offsets_ptrs
void* tmp_ptr = NULL
void* subarray_ptr = NULL
np.npy_intp dims[1]
np.ndarray tmparray
bytes battr_name
Py_ssize_t nattr = len(attr_names)

Expand All @@ -3880,11 +3887,13 @@ cdef class ReadQuery(object):
tiledb_query_free(&query_ptr)
_raise_ctx_err(ctx_ptr, rc)

cdef uint64_t* buffer_sizes_ptr = <uint64_t*> PyMem_Malloc(nattr * sizeof(uint64_t))
# lifetime: free in finally clause
cdef uint64_t* buffer_sizes_ptr = <uint64_t*> PyDataMem_NEW(nattr * sizeof(uint64_t))
if buffer_sizes_ptr == NULL:
tiledb_query_free(&query_ptr)
raise MemoryError()
cdef uint64_t* offsets_sizes_ptr = <uint64_t*> PyMem_Malloc(nattr * sizeof(uint64_t))
# lifetime: free in finally clause
cdef uint64_t* offsets_sizes_ptr = <uint64_t*> PyDataMem_NEW(nattr * sizeof(uint64_t))
if offsets_sizes_ptr == NULL:
tiledb_query_free(&query_ptr)
raise MemoryError()
Expand All @@ -3911,19 +3920,31 @@ cdef class ReadQuery(object):

# allocate buffer to hold offsets for var-length attribute
# NOTE offsets_sizes is in BYTES
offsets_ptrs.push_back(<uint64_t*> PyMem_Malloc(<size_t>(offsets_sizes_ptr[i])))
#self._offsets[name] = np.empty(offsets_sizes_ptr[i], dtype=np.uint8)

# lifetime:
# - free on exception
# - otherwise, ownership transferred to NumPy
tmp_ptr = PyDataMem_NEW(<size_t>(offsets_sizes_ptr[i]))
if tmp_ptr == NULL:
raise MemoryError()
offsets_ptrs.push_back(<uint64_t*> tmp_ptr)
tmp_ptr = NULL
else:

rc = tiledb_array_max_buffer_size(ctx_ptr, array_ptr, battr_name,
subarray_ptr, &(buffer_sizes_ptr[i]))

if rc != TILEDB_OK:
_raise_ctx_err(ctx_ptr, rc)
offsets_ptrs.push_back(NULL)

buffer_ptrs.push_back(<void*> PyMem_Malloc(<size_t>(buffer_sizes_ptr[i])))
#self._buffers[name] = np.empty(buffer_sizes_ptr[i], dtype=np.uint8)
# lifetime:
# - free on exception
# - otherwise, ownership transferred to NumPy
tmp_ptr = PyDataMem_NEW(<size_t>(buffer_sizes_ptr[i]))
if tmp_ptr == NULL:
raise MemoryError()
buffer_ptrs.push_back(tmp_ptr)
tmp_ptr = NULL

# set the query buffers
for i in range(nattr):
Expand Down Expand Up @@ -3956,39 +3977,34 @@ cdef class ReadQuery(object):
for i in range(nattr):
name = attr_names[i]

dtype = np.dtype('uint8')

# Note: we don't know the actual read size until *after* the query executes
# so the realloc below is very important as consumers of this buffer
# rely on the size corresponding to actual bytes read.
if name != "coords" and schema.attr(name).isvar:
dims[0] = offsets_sizes_ptr[i]
Py_INCREF(dtype)
tmp_ptr = PyDataMem_RENEW(offsets_ptrs[i], <size_t>(offsets_sizes_ptr[i]))
self._offsets[name] = \
PyArray_NewFromDescr(
<PyTypeObject*> np.ndarray,
dtype, 1, dims, NULL,
PyMem_Realloc(offsets_ptrs[i], <size_t>(offsets_sizes_ptr[i])),
np.NPY_OWNDATA, <object> NULL)
np.PyArray_SimpleNewFromData(1, dims, np.NPY_UINT8, tmp_ptr)
PyArray_ENABLEFLAGS(self._offsets[name], np.NPY_OWNDATA)

dims[0] = buffer_sizes_ptr[i]
Py_INCREF(dtype)
tmp_ptr = PyDataMem_RENEW(buffer_ptrs[i], <size_t>(buffer_sizes_ptr[i]))
self._buffers[name] = \
PyArray_NewFromDescr(
<PyTypeObject*> np.ndarray,
dtype, 1, dims, NULL,
PyMem_Realloc(buffer_ptrs[i], <size_t>(buffer_sizes_ptr[i])),
np.NPY_OWNDATA, <object> NULL)
np.PyArray_SimpleNewFromData(1, dims, np.NPY_UINT8, tmp_ptr)
PyArray_ENABLEFLAGS(self._buffers[name], np.NPY_OWNDATA)

except:
# we only free the PyDataMem_NEW'd buffers on exception,
# otherwise NumPy manages them
for i in range(nattr):
if buffer_ptrs[i] != NULL:
PyMem_Free(buffer_ptrs[i])
PyDataMem_FREE(buffer_ptrs[i])
if offsets_ptrs[i] != NULL:
PyMem_Free(offsets_ptrs[i])
PyDataMem_FREE(offsets_ptrs[i])
raise
finally:
PyMem_Free(buffer_sizes_ptr)
PyMem_Free(offsets_sizes_ptr)
PyDataMem_FREE(buffer_sizes_ptr)
PyDataMem_FREE(offsets_sizes_ptr)
tiledb_query_free(&query_ptr)


Expand Down
12 changes: 10 additions & 2 deletions tiledb/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import os
import shutil
import tempfile
import traceback
from unittest import TestCase

class DiskTestCase(TestCase):
pathmap = dict()

def setUp(self):
prefix = 'tiledb-' + self.__class__.__name__
Expand All @@ -20,8 +22,14 @@ def tearDown(self):
except OSError as exc:
print("test '{}' error deleting '{}'".format(self.__class__.__name__,
dirpath))
raise
print("registered paths and originating functions:")
for path,frame in self.pathmap.items():
print(" '{}' <- '{}'".format(path,frame))
raise exc

def path(self, path):
return os.path.abspath(os.path.join(self.rootdir, path))
out = os.path.abspath(os.path.join(self.rootdir, path))
frame = traceback.extract_stack(limit=2)[-2][2]
self.pathmap[out] = frame
return out

Loading