From e74bf610b8dcaa72824189473dc6cd77546433f0 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 14 Nov 2018 00:00:37 +1100 Subject: [PATCH] PyArray conversion speedups and PyArrayFromBuffer (#487) * PyArray conversion speedups and PyArrayFromBuffer * PyArray_Info parameterised T,N and immutable * PyArray_Info size and stride changed to Tuples (from Vectors) * PyArray_Info data changed from Ptr{Cvoid} to Ptr{T} * `PyArrayInfoFromBuffer(o::PyObject)` faster way to get PyArray_Info from numpy than numpy's __array_interface__ * `PyArrayFromBuffer(o::PyObject)` 4x faster PyArray conversion * ArrayFromBuffer no copy conversion to Julia Array * moved PyBuffer tests to separate file * Add isbuftype and use in pysequence_query * Increase robustness in parsing buffer format string * ArrayFromBuffer Improvements * ArgumentError for non-native endian and typestrs_native * Move PyArray related things to their own file rename ArrayFromBuffer to NoCopyArray, and make indexing of NoCopyArray match py indexing for row major arrays too * Void to Nothing/Cvoid * 0.7 deprecations * put my thang down flip it and reverse it * Improve lifecycle management of Pybuffers PyArray no holds a ref to the PyBuffer object. This should ensure that the buffer isn't released until the PyArray is no longer needed setdata! now explicitly releases the buffer it used to point to * Avoid tests when NumPy not available, and fix a test * Clarify pydecref(o::PyBuffer) docstring and change pass a PyPtr_NULL instead of a C_NULL for Py_buffer.obj when creating a new PyBuffer * Remove exports: setdata!, NoCopyArray, isbuftype * Test setdata!, improve NoCopyArray docstring * Add tests for PyArray getindex and fix similar for PyArray indexing into a PyArray was throwing a ambiguous method error for similar(::PyArray, T, dims::Dims) * Add GC rooting note --- benchmarks/arrayperf.jl | 57 +++++++ src/PyCall.jl | 4 +- src/conversions.jl | 6 +- src/numpy.jl | 323 +------------------------------------- src/pyarray.jl | 338 ++++++++++++++++++++++++++++++++++++++++ src/pybuffer.jl | 106 ++++++++++++- test/runtests.jl | 9 +- test/testpybuffer.jl | 114 ++++++++++++++ 8 files changed, 630 insertions(+), 327 deletions(-) create mode 100644 benchmarks/arrayperf.jl create mode 100644 src/pyarray.jl create mode 100644 test/testpybuffer.jl diff --git a/benchmarks/arrayperf.jl b/benchmarks/arrayperf.jl new file mode 100644 index 00000000..b5a46f06 --- /dev/null +++ b/benchmarks/arrayperf.jl @@ -0,0 +1,57 @@ +using PyCall, BenchmarkTools, DataStructures +using PyCall: PyArray_Info + +results = OrderedDict{String,Any}() + +let + np = pyimport("numpy") + nprand = np["random"]["rand"] + # nparray_pyo(x) = pycall(np["array"], PyObject, x) + # pytestarray(sz::Int...) = pycall(np["reshape"], PyObject, nparray_pyo(1:prod(sz)), sz) + + # no convert baseline + nprand_pyo(sz...) = pycall(nprand, PyObject, sz...) + + for arr_size in [(2,2), (100,100)] + pyo_arr = nprand_pyo(arr_size...) + results["nprand_pyo$arr_size"] = @benchmark $nprand_pyo($arr_size...) + println("nprand_pyo $arr_size:\n"); display(results["nprand_pyo$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarr$arr_size"] = @benchmark $convert(PyArray, $pyo_arr) + println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"]) + println("--------------------------------------------------") + + results["PyArray_Info$arr_size"] = @benchmark $PyArray_Info($pyo_arr) + println("PyArray_Info $arr_size:\n"); display(results["PyArray_Info$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarrbuf$arr_size"] = @benchmark $PyArray($pyo_arr) + println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"]) + println("--------------------------------------------------") + + results["convert_arr$arr_size"] = @benchmark convert(Array, $pyo_arr) + println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"]) + println("--------------------------------------------------") + + results["convert_arrbuf$arr_size"] = @benchmark $NoCopyArray($pyo_arr) + println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + results["setdata!$arr_size"] = @benchmark $setdata!($pyarr, $pyo_arr) + println("setdata!:\n"); display(results["setdata!$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + pybuf=PyBuffer() + results["setdata! bufprealloc$arr_size"] = + @benchmark $setdata!($pyarr, $pyo_arr, $pybuf) + println("setdata! bufprealloc:\n"); display(results["setdata! bufprealloc$arr_size"]) + println("--------------------------------------------------") + end +end +println() +println("Mean times") +println("----------") +foreach((r)->println(rpad(r[1],27), ": ", mean(r[2])), results) diff --git a/src/PyCall.jl b/src/PyCall.jl index 46ee4643..6f4fcf04 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -5,7 +5,8 @@ module PyCall using Compat, VersionParsing export pycall, pycall!, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, - PyPtr, pyincref, pydecref, pyversion, PyArray, PyArray_Info, + PyPtr, pyincref, pydecref, pyversion, + PyArray, PyArray_Info, PyBuffer, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, @@ -177,6 +178,7 @@ pytypeof(o::PyObject) = ispynull(o) ? throw(ArgumentError("NULL PyObjects have n const TypeTuple = Union{Type,NTuple{N, Type}} where {N} include("pybuffer.jl") +include("pyarray.jl") include("conversions.jl") include("pytype.jl") include("pyiterator.jl") diff --git a/src/conversions.jl b/src/conversions.jl index ab681ae5..ec3c6af5 100644 --- a/src/conversions.jl +++ b/src/conversions.jl @@ -768,13 +768,11 @@ function pysequence_query(o::PyObject) return AbstractRange elseif ispybytearray(o) return Vector{UInt8} - elseif !haskey(o, "__array_interface__") + elseif !isbuftype(o) # only handle PyList for now return pyisinstance(o, @pyglobalobj :PyList_Type) ? Array : Union{} else - otypestr = get(o["__array_interface__"], PyObject, "typestr") - typestr = convert(AbstractString, otypestr) # Could this just be String now? - T = npy_typestrs[typestr[2:end]] + T, native_byteorder = array_format(o) if T == PyPtr T = PyObject end diff --git a/src/numpy.jl b/src/numpy.jl index 542b8b9b..e41fd1a2 100644 --- a/src/numpy.jl +++ b/src/numpy.jl @@ -102,7 +102,7 @@ end # the values of these seem to have been stable for some time, and # the NumPy developers seem to have some awareness of binary compatibility -# NPY_TYPES: +# NumPy Types: const NPY_BOOL = Int32(0) const NPY_BYTE = Int32(1) @@ -127,26 +127,8 @@ const NPY_UNICODE = Int32(19) const NPY_VOID = Int32(20) const NPY_HALF = Int32(23) -# NPY_ORDER: -const NPY_ANYORDER = Int32(-1) -const NPY_CORDER = Int32(0) -const NPY_FORTRANORDER = Int32(1) - -# flags: -const NPY_ARRAY_C_CONTIGUOUS = Int32(1) -const NPY_ARRAY_F_CONTIGUOUS = Int32(2) -const NPY_ARRAY_ALIGNED = Int32(0x0100) -const NPY_ARRAY_WRITEABLE = Int32(0x0400) -const NPY_ARRAY_OWNDATA = Int32(0x0004) -const NPY_ARRAY_ENSURECOPY = Int32(0x0020) -const NPY_ARRAY_ENSUREARRAY = Int32(0x0040) -const NPY_ARRAY_FORCECAST = Int32(0x0010) -const NPY_ARRAY_UPDATEIFCOPY = Int32(0x1000) -const NPY_ARRAY_NOTSWAPPED = Int32(0x0200) -const NPY_ARRAY_ELEMENTSTRIDES = Int32(0x0080) - ######################################################################### -# conversion from Julia types to NPY_TYPES constant +# conversion from Julia types to NumPy types npy_type(::Type{Bool}) = NPY_BOOL npy_type(::Type{Int8}) = NPY_BYTE @@ -164,18 +146,9 @@ npy_type(::Type{ComplexF32}) = NPY_CFLOAT npy_type(::Type{ComplexF64}) = NPY_CDOUBLE npy_type(::Type{PyPtr}) = NPY_OBJECT -const NPY_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} - -# conversions from __array_interface__ type strings to supported Julia types -const npy_typestrs = Dict( "b1"=>Bool, - "i1"=>Int8, "u1"=>UInt8, - "i2"=>Int16, "u2"=>UInt16, - "i4"=>Int32, "u4"=>UInt32, - "i8"=>Int64, "u8"=>UInt64, - "f2"=>Float16, "f4"=>Float32, - "f8"=>Float64, "c8"=>ComplexF32, - "c16"=>ComplexF64, "O"=>PyPtr, - "O$(div(Sys.WORD_SIZE,8))"=>PyPtr) +# flags: +const NPY_ARRAY_ALIGNED = Int32(0x0100) +const NPY_ARRAY_WRITEABLE = Int32(0x0400) ######################################################################### # no-copy conversion of Julia arrays to NumPy arrays. @@ -185,7 +158,7 @@ const npy_typestrs = Dict( "b1"=>Bool, # dimensions. For example, although NumPy works with both row-major and # column-major data, some Python libraries like OpenCV seem to require # row-major data (the default in NumPy). In such cases, use PyReverseDims(array) -function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES +function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:PYARR_TYPES @npyinitialize size_a = revdims ? reverse(size(a)) : size(a) strides_a = revdims ? reverse(strides(a)) : strides(a) @@ -199,7 +172,7 @@ function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES return PyObject(p, a) end -function PyObject(a::StridedArray{T}) where T<:NPY_TYPES +function PyObject(a::StridedArray{T}) where T<:PYARR_TYPES try return NpyArray(a, false) catch @@ -207,7 +180,7 @@ function PyObject(a::StridedArray{T}) where T<:NPY_TYPES end end -PyReverseDims(a::StridedArray{T}) where {T<:NPY_TYPES} = NpyArray(a, true) +PyReverseDims(a::StridedArray{T}) where {T<:PYARR_TYPES} = NpyArray(a, true) PyReverseDims(a::BitArray) = PyReverseDims(Array(a)) """ @@ -222,283 +195,3 @@ libraries that expect row-major data. PyReverseDims(a::AbstractArray) ######################################################################### -# Extract shape and other information about a NumPy array. We need -# to call the Python interface to do this, since the equivalent information -# in NumPy's C API is only available via macros (or parsing structs). -# [ Hopefully, this will be improved in a future NumPy version. ] - -mutable struct PyArray_Info - T::Type - native::Bool # native byte order? - sz::Vector{Int} - st::Vector{Int} # strides, in multiples of bytes! - data::Ptr{Cvoid} - readonly::Bool - - function PyArray_Info(a::PyObject) - ai = PyDict{AbstractString,PyObject}(a["__array_interface__"]) - typestr = convert(AbstractString, ai["typestr"]) - T = npy_typestrs[typestr[2:end]] - datatuple = convert(Tuple{Int,Bool}, ai["data"]) - sz = convert(Vector{Int}, ai["shape"]) - local st - try - st = isempty(sz) ? Int[] : convert(Vector{Int}, ai["strides"]) - catch - # default is C-order contiguous - st = similar(sz) - st[end] = sizeof(T) - for i = length(sz)-1:-1:1 - st[i] = st[i+1]*sz[i+1] - end - end - return new(T, - (ENDIAN_BOM == 0x04030201 && typestr[1] == '<') - || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') - || typestr[1] == '|', - sz, st, - convert(Ptr{Cvoid}, datatuple[1]), - datatuple[2]) - end -end - -aligned(i::PyArray_Info) = # FIXME: also check pointer alignment? - all(m -> m == 0, mod.(i.st, sizeof(i.T))) # strides divisible by elsize - -# whether a contiguous array in column-major (Fortran, Julia) order -function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int}) - if prod(sz) == 1 - return true - end - if st[1] != sizeof(T) - return false - end - for j = 2:length(st) - if st[j] != st[j-1] * sz[j-1] - return false - end - end - return true -end - -f_contiguous(i::PyArray_Info) = f_contiguous(i.T, i.sz, i.st) -@static if VERSION >= v"0.7.0-DEV.4534" # julia#26369 - c_contiguous(i::PyArray_Info) = f_contiguous(i.T, reverse(i.sz,dims=1), reverse(i.st,dims=1)) -else - c_contiguous(i::PyArray_Info) = f_contiguous(i.T, flipdim(i.sz,1), flipdim(i.st,1)) -end - -######################################################################### -# PyArray: no-copy wrapper around NumPy ndarray -# -# Hopefully, in the future this can be a subclass of StridedArray (see -# Julia issue #2345), which will allow it to be used with most Julia -# functions, but that is not possible at the moment. So, to use this -# with Julia linalg functions etcetera a copy is still required. - -""" - PyArray(o::PyObject) - -This converts an `ndarray` object `o` to a PyArray. - -This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). - -If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. -""" -mutable struct PyArray{T,N} <: AbstractArray{T,N} - o::PyObject - info::PyArray_Info - dims::Dims - st::Vector{Int} - f_contig::Bool - c_contig::Bool - data::Ptr{T} - - function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} - if !aligned(info) - throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) - elseif !info.native - throw(ArgumentError("only native byte-order arrays are supported")) - elseif info.T != T - throw(ArgumentError("inconsistent type in PyArray constructor")) - elseif length(info.sz) != N || length(info.st) != N - throw(ArgumentError("inconsistent ndims in PyArray constructor")) - end - return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), - f_contiguous(info), c_contiguous(info), - convert(Ptr{T}, info.data)) - end -end - -function PyArray(o::PyObject) - info = PyArray_Info(o) - return PyArray{info.T, length(info.sz)}(o, info) -end - -size(a::PyArray) = a.dims -ndims(a::PyArray{T,N}) where {T,N} = N - -similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) - -function copy(a::PyArray{T,N}) where {T,N} - if N > 1 && a.c_contig # equivalent to f_contig with reversed dims - B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) - return permutedims(B, (N:-1:1)) - end - A = Array{T}(undef, a.dims) - if a.f_contig - ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) - return A - else - return copyto!(A, a) - end -end - -# TODO: need to do bounds-checking of these indices! - -getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) -getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) - -getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = - unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function getindex(a::PyArray, i::Integer) - if a.f_contig - return unsafe_load(a.data, i) - else - return a[ind2sub(a.dims, i)...] - end -end - -function getindex(a::PyArray, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - unsafe_load(a.data, index) -end - -function writeok_assign(a::PyArray, v, i::Integer) - if a.info.readonly - throw(ArgumentError("read-only PyArray")) - else - unsafe_store!(a.data, v, i) - end - return a -end - -setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) -setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) - -setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = - writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function setindex!(a::PyArray, v, i::Integer) - if a.f_contig - return writeok_assign(a, v, i) - else - return setindex!(a, v, ind2sub(a.dims, i)...) - end -end - -function setindex!(a::PyArray, v, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - writeok_assign(a, v, index) -end - -stride(a::PyArray, i::Integer) = a.st[i] - -Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data - -pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) - -function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T - offset = 0 - for i = 1:length(is) - offset += (is[i]-1)*a.st[i] - end - return a.data + offset*sizeof(T) -end - -summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", - string(T), " PyArray") - -######################################################################### -# PyArray <-> PyObject conversions - -PyObject(a::PyArray) = a.o - -convert(::Type{PyArray}, o::PyObject) = PyArray(o) - -function convert(::Type{Array{T, 1}}, o::PyObject) where T<:NPY_TYPES - try - copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info - catch - len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) - A = Array{pyany_toany(T)}(undef, len) - py2array(T, A, o, 1, 1) - end -end - -function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES - try - info = PyArray_Info(o) - try - copy(PyArray{T, length(info.sz)}(o, info)) # will check T == info.T - catch - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - py2array(T, o) - end -end - -function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N} - try - info = PyArray_Info(o) - try - copy(PyArray{T,N}(o, info)) # will check T == info.T and N == length(info.sz) - catch - nd = length(info.sz) - if nd != N - throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) - end - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - A = py2array(T, o) - if ndims(A) != N - throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) - end - A - end -end - -function convert(::Type{Array{PyObject}}, o::PyObject) - map(pyincref, convert(Array{PyPtr}, o)) -end - -function convert(::Type{Array{PyObject,1}}, o::PyObject) - map(pyincref, convert(Array{PyPtr, 1}, o)) -end - -function convert(::Type{Array{PyObject,N}}, o::PyObject) where N - map(pyincref, convert(Array{PyPtr, N}, o)) -end - -######################################################################### diff --git a/src/pyarray.jl b/src/pyarray.jl new file mode 100644 index 00000000..fb8559a6 --- /dev/null +++ b/src/pyarray.jl @@ -0,0 +1,338 @@ +######################################################################### +# Extract shape and other information about arrays that support Python's +# Buffer Interface/Protocol (PEP 3118) +######################################################################### +struct PyArray_Info{T,N} + native::Bool # native byte order? + sz::NTuple{N,Int} + st::NTuple{N,Int} # strides, in multiples of bytes! + data::Ptr{T} + readonly::Bool + pybuf::PyBuffer +end + +function PyArray_Info(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + sz = size(pybuf) + strd = strides(pybuf) + length(strd) == 0 && (sz = ()) + N = length(sz) + isreadonly = pybuf.buf.readonly==1 + return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly, pybuf) +end + +aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? + all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize + +eltype(i::PyArray_Info{T,N}) where {T,N} = T +ndims(i::PyArray_Info{T,N}) where {T,N} = N + +function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} + stv = Vector{Int}(N) + stv[end] = sizeof(T) + for i = N-1:-1:1 + stv[i] = stv[i+1]*sz[i+1] + end + ntuple(i->stv[i], N) +end + +# whether a contiguous array in column-major (Fortran, Julia) order +function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N + if prod(sz) == 1 || length(sz) == 1 + # 0 or 1-dim arrays should default to f-contiguous in julia + return true + end + if st[1] != sizeof(T) + return false + end + for j = 2:N + if st[j] != st[j-1] * sz[j-1] + return false + end + end + return true +end + +f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = + error("stride and size are different lengths, size: $sz, strides: $sz") + +f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) +c_contiguous(i::PyArray_Info{T,N}) where {T,N} = + f_contiguous(T, reverse(i.sz), reverse(i.st)) + + +######################################################################### +# PyArray: no-copy wrapper around NumPy ndarray +# +# Hopefully, in the future this can be a subclass of StridedArray (see +# Julia issue #2345), which will allow it to be used with most Julia +# functions, but that is not possible at the moment. So, to use this +# with Julia linalg functions etcetera a copy is still required. + +""" + PyArray(o::PyObject) + +This converts an `ndarray` object `o` to a PyArray. + +This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). + +If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. +""" +mutable struct PyArray{T,N} <: AbstractArray{T,N} + o::PyObject + info::PyArray_Info + dims::Dims + st::NTuple{N,Int} + f_contig::Bool + c_contig::Bool + data::Ptr{T} + + function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} + if !aligned(info) + throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) + elseif !info.native + throw(ArgumentError("only native byte-order arrays are supported")) + elseif eltype(info) != T + throw(ArgumentError("inconsistent type in PyArray constructor")) + elseif length(info.sz) != N || length(info.st) != N + throw(ArgumentError("inconsistent ndims in PyArray constructor")) + end + return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), + f_contiguous(info), c_contiguous(info), + convert(Ptr{T}, info.data)) + end +end + +function PyArray(o::PyObject) + info = PyArray_Info(o) + return PyArray{eltype(info), length(info.sz)}(o, info) +end + +size(a::PyArray) = a.dims +ndims(a::PyArray{T,N}) where {T,N} = N + +similar(a::PyArray, ::Type{T}, dims::Dims) where {T} = Array{T}(undef, dims) + +""" +Update the data ptr of the `a` to point to the buffer exposed by `o` through +the Python buffer interface +""" +function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N} + pybufinfo = a.info.pybuf + PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) + dataptr = pybufinfo.buf.buf + a.data = reinterpret(Ptr{T}, dataptr) + a +end + +function copy(a::PyArray{T,N}) where {T,N} + if N > 1 && a.c_contig # equivalent to f_contig with reversed dims + B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) + return permutedims(B, (N:-1:1)) + end + A = Array{T}(undef, a.dims) + if a.f_contig + ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) + return A + else + return copyto!(A, a) + end +end + +# TODO: need to do bounds-checking of these indices! +# TODO: need to GC root these `a`s to guard against the PyArray getting gc'd, +# e.g. if it's a temporary in a function: +# `two_rands() = pycall(np.rand, PyArray, 10)[1:2]` + + +getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) +getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) + +getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = + unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function getindex(a::PyArray, i::Integer) + if a.f_contig + return unsafe_load(a.data, i) + else + return a[ind2sub(a.dims, i)...] + end +end + +function getindex(a::PyArray, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + unsafe_load(a.data, index) +end + +function writeok_assign(a::PyArray, v, i::Integer) + if a.info.readonly + throw(ArgumentError("read-only PyArray")) + else + unsafe_store!(a.data, v, i) + end + return a +end + +setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) +setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) + +setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = + writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function setindex!(a::PyArray, v, i::Integer) + if a.f_contig + return writeok_assign(a, v, i) + else + return setindex!(a, v, ind2sub(a.dims, i)...) + end +end + +function setindex!(a::PyArray, v, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + writeok_assign(a, v, index) +end + +stride(a::PyArray, i::Integer) = a.st[i] + +Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data + +pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) + +function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T + offset = 0 + for i = 1:length(is) + offset += (is[i]-1)*a.st[i] + end + return a.data + offset*sizeof(T) +end + +summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", + string(T), " PyArray") + +######################################################################### +# PyArray <-> PyObject conversions + +const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} + +PyObject(a::PyArray) = a.o + +convert(::Type{PyArray}, o::PyObject) = PyArray(o) + +function convert(::Type{Array{T, 1}}, o::PyObject) where T<:PYARR_TYPES + try + copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info + catch + len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) + A = Array{pyany_toany(T)}(undef, len) + py2array(T, A, o, 1, 1) + end +end + +function convert(::Type{Array{T}}, o::PyObject) where T<:PYARR_TYPES + try + info = PyArray_Info(o) + try + copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info) + catch + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + py2array(T, o) + end +end + +function convert(::Type{Array{T,N}}, o::PyObject) where {T<:PYARR_TYPES,N} + try + info = PyArray_Info(o) + try + copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info) + catch + nd = length(info.sz) + if nd != N + throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) + end + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + A = py2array(T, o) + if ndims(A) != N + throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) + end + A + end +end + +function convert(::Type{Array{PyObject}}, o::PyObject) + map(pyincref, convert(Array{PyPtr}, o)) +end + +function convert(::Type{Array{PyObject,1}}, o::PyObject) + map(pyincref, convert(Array{PyPtr, 1}, o)) +end + +function convert(::Type{Array{PyObject,N}}, o::PyObject) where N + map(pyincref, convert(Array{PyPtr, N}, o)) +end + +array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) + +""" +``` +NoCopyArray(o::PyObject) +``` +Convert a Python array-like object, to a Julia `Array` or `PermutedDimsArray` +without making a copy of the data. + +If the data is stored in row-major format +(the default in Python/NumPy), then the returned array `nca` will be a +`PermutedDimsArray` such that the arrays are indexed the same way in Julia and +Python. i.e. `nca[idxs...] == o[idxs...]` + +If the data is stored in column-major format then a regular Julia `Array` will +be returned. + +Warning: This function is only lightly tested, and should be considered +experimental - it may cause segmentation faults on conversion or subsequent +array access, or be subtly broken in other ways. Only dense/contiguous, native +endian arrays that support the Python Buffer protocol are likely be converted +correctly. +""" +function NoCopyArray(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + !native_byteorder && throw(ArgumentError( + "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) + T == Nothing && throw(ArgumentError( + "Array datatype '$(get_format_str(pybuf))' not supported")) + # TODO more checks on strides etc + sz = size(pybuf) + @static if VERSION >= v"0.7.0-DEV.3526" # julia#25647 + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, own=false) + else + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false) + end + !f_contiguous(T, sz, strides(pybuf)) && + (arr = PermutedDimsArray(reshape(arr, reverse(sz)), (pybuf.buf.ndim:-1:1))) + return arr +end diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 06295261..ea076e1f 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -29,7 +29,7 @@ end mutable struct PyBuffer buf::Py_buffer PyBuffer() = begin - b = new(Py_buffer(C_NULL, C_NULL, 0, 0, + b = new(Py_buffer(C_NULL, PyPtr_NULL, 0, 0, 0, 0, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL)) @compat finalizer(pydecref, b) @@ -37,6 +37,13 @@ mutable struct PyBuffer end end +""" +`pydecref(o::PyBuffer)` +Release the reference to buffer `o` +N.b. As per https://docs.python.org/3/c-api/buffer.html#c.PyBuffer_Release, +It is an error to call this function on a PyBuffer that was not obtained via +the python c-api function `PyObject_GetBuffer(), unless o.obj is a PyPtr(C_NULL)` +""" function pydecref(o::PyBuffer) # note that PyBuffer_Release sets o.obj to NULL, and # is a no-op if o.obj is already NULL @@ -86,6 +93,9 @@ function Base.stride(b::PyBuffer, d::Integer) return Int(unsafe_load(b.buf.strides, d)) end +# Strides in bytes +Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,) + # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. iscontiguous(b::PyBuffer) = 1 == ccall((@pysym :PyBuffer_IsContiguous), Cint, @@ -93,7 +103,6 @@ iscontiguous(b::PyBuffer) = ############################################################################# # pybuffer constant values from Include/object.h -const PyBUF_MAX_NDIM = convert(Cint, 64) const PyBUF_SIMPLE = convert(Cint, 0) const PyBUF_WRITABLE = convert(Cint, 0x0001) const PyBUF_FORMAT = convert(Cint, 0x0004) @@ -103,16 +112,42 @@ const PyBUF_C_CONTIGUOUS = convert(Cint, 0x0020) | PyBUF_STRIDES const PyBUF_F_CONTIGUOUS = convert(Cint, 0x0040) | PyBUF_STRIDES const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES const PyBUF_INDIRECT = convert(Cint, 0x0100) | PyBUF_STRIDES +const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS) # construct a PyBuffer from a PyObject, if possible function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) - b = PyBuffer() + return PyBuffer!(PyBuffer(), o, flags) +end + +function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. + pydecref(b) # ensure b is properly released @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint, (PyPtr, Any, Cint), o, b, flags) return b end +""" +`isbuftype(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_ND_CONTIGUOUS)` +Returns true if the python object `o` supports the buffer protocol. False if not. +""" +function isbuftype(o::Union{PyObject,PyPtr}) + # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510 + # so we can't access it easily. It basically just checks if PyObject_GetBuffer exists + # So we'll just try call PyObject_GetBuffer and check for success/failure + b = PyBuffer() + ret = ccall((@pysym :PyObject_GetBuffer), Cint, + (PyPtr, Any, Cint), o, b, PyBUF_ND_CONTIGUOUS) + if ret != 0 + pyerr_clear() + else + # handle pointer types + T, native_byteorder = array_format(b) + T <: Ptr && (ret = 1) + end + return ret == 0 +end + ############################################################################# # recursive function to write buffer dimension by dimension, starting at @@ -154,4 +189,69 @@ function Base.write(io::IO, b::PyBuffer) end end +# ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 + +const standard_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Cvoid}, + "b"=>Int8, "B"=>UInt8, + "h"=>Int16, "H"=>UInt16, + "i"=>Int32, "I"=>UInt32, + "l"=>Int32, "L"=>UInt32, + "q"=>Int64, "Q"=>UInt64, + "e"=>Float16, "f"=>Float32, + "d"=>Float64, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) + +const native_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Cvoid}, + "b"=>Int8, "B"=>UInt8, + "h"=>Cshort, "H"=>Cushort, + "i"=>Cint, "I"=>Cuint, + "l"=>Clong, "L"=>Culong, + "q"=>Clonglong, "Q"=>Culonglong, + "e"=>Float16, "f"=>Cfloat, + "d"=>Cdouble, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) + +const typestrs_native = + Dict{DataType, String}(zip(values(native_typestrs), keys(native_typestrs))) + +get_format_str(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) + +function array_format(pybuf::PyBuffer) + # a NULL-terminated format-string ... indicating what is in each element of memory. + # TODO: handle more cases: https://www.python.org/dev/peps/pep-3118/#additions-to-the-struct-string-syntax + # refs: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 + # https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/_internal.py#L490 + # https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment + + # "NULL implies standard unsigned bytes ("B")" --pep 3118 + pybuf.buf.format == C_NULL && return UInt8, true + + fmt_str = get_format_str(pybuf) + native_byteorder = true + type_start_idx = 1 + typestrs = standard_typestrs + if length(fmt_str) > 1 + type_start_idx = 2 + if fmt_str[1] == '=' + elseif fmt_str[1] == '<' + native_byteorder = ENDIAN_BOM == 0x04030201 + elseif fmt_str[1] == '>' || fmt_str =='!' + native_byteorder = ENDIAN_BOM == 0x01020304 + elseif fmt_str[1] == '@' || fmt_str[1] == '^' + typestrs = native_typestrs + elseif fmt_str[1] == "Z" + type_start_idx = 1 + else + error("Unsupported format string: \"$fmt_str\"") + end + end + typestrs[fmt_str[type_start_idx:end]], native_byteorder +end + ############################################################################# diff --git a/test/runtests.jl b/test/runtests.jl index 7de01831..bc77c484 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -272,11 +272,11 @@ const PyInt = pyversion < v"3" ? Int : Clonglong @test o[:real] == 1 end - # []-based sequence access - let a1=[5,8,6], a2=rand(3,4), a3=rand(3,4,5), o1=PyObject(a1), o2=PyObject(a2), o3=PyObject(a3) + @testset "[]-based sequence access" begin + a1=[5,8,6]; a2=rand(3,4); a3=rand(3,4,5); o1=PyObject(a1); o2=PyObject(a2); o3=PyObject(a3) @test [o1[i] for i in eachindex(a1)] == a1 @test [o1[end-(i-1)] for i in eachindex(a1)] == reverse(a1) - @test o2[1] == collect(a2[1,:]) + @test all(o2[1] == collect(a2[1,:])) @test length(o1) == length(o2) == length(o3) == 3 o1[end-1] = 7 @test o1[2] == 7 @@ -286,7 +286,7 @@ const PyInt = pyversion < v"3" ? Int : Clonglong if PyCall.npy_initialized @test [o2[i,j] for i=1:3, j=1:4] == a2 @test [o3[i,j,k] for i=1:3, j=1:4, k=1:5] == a3 - @test o3[2,3] == collect(a3[2,3,:]) + @test all(o3[2,3] == collect(a3[2,3,:])) o2[2,3] = 8 @test o2[2,3] == 8 o3[2,3,4] = 9 @@ -746,3 +746,4 @@ end end include("test_pyfncall.jl") +include("testpybuffer.jl") diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl new file mode 100644 index 00000000..8d735a05 --- /dev/null +++ b/test/testpybuffer.jl @@ -0,0 +1,114 @@ +using Compat.Test, PyCall, Compat +using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized, +NoCopyArray, isbuftype, setdata! + +pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") +pyutf8(s::String) = pyutf8(PyObject(s)) + +@testset "PyBuffer" begin + @testset "String Buffers" begin + b = PyCall.PyBuffer(pyutf8("test string")) + @test ndims(b) == 1 + @test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b) + @test stride(b, 1) == 1 + @test PyCall.iscontiguous(b) == true + end + + if !npy_initialized + println("Skipping array related buffer tests since NumPy not available") + else + np = pyimport("numpy") + listpy = pybuiltin("list") + arrpyo(args...; kwargs...) = + pycall(np["array"], PyObject, args...; kwargs...) + listpyo(args...) = pycall(listpy, PyObject, args...) + pytestarray(sz::Int...; order="C") = + pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order) + + @testset "Non-native-endian" begin + wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">" + wrong_endian_arr = + pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], + dtype=wrong_endian_str*"i2") + # Not supported, so throws + @test_throws ArgumentError NoCopyArray(wrong_endian_arr) + @test_throws ArgumentError PyArray(wrong_endian_arr) + end + + @testset "NoCopyArray 1d" begin + ao = arrpyo(1.0:10.0, "d") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (10,) + @test strides(pybuf) == (1,) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + @test nca isa Array + @test nca[3] == ao[3] + @test nca[4] == ao[4] + end + + @testset "NoCopyArray 2d f-contig" begin + ao = arrpyo(reshape(1.0:12.0, (3,4)) |> collect, "d", order="F") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4) + @test strides(pybuf) == (1, 3) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + # @show typeof(nca) (nca isa Array) + @test nca isa Array + @test size(nca) == (3,4) + @test strides(nca) == (1,3) + @test nca[3,2] == ao[3,2] + @test nca[2,3] == ao[2,3] + end + + @testset "NoCopyArray 3d c-contig" begin + ao = pytestarray(3,4,5) + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4,5) + @test strides(pybuf) == (20,5,1) .* sizeof(T) + nca = NoCopyArray(ao) + @test nca isa PermutedDimsArray + @test !(nca isa Array) + @test size(nca) == (3,4,5) + @test strides(nca) == (20,5,1) + @test nca[2,3,4] == ao[2,3,4] + @test nca[3,2,4] == ao[3,2,4] + end + + @testset "isbuftype" begin + @test isbuftype(PyObject(0)) == false + @test isbuftype(listpyo((1.0:10.0...,))) == false + @test isbuftype(arrpyo(1.0:10.0, "d")) == true + @test isbuftype(PyObject([1:10...])) == true + end + + # TODO maybe move these to a test_pyarray.jl + @testset "setdata!" begin + ao1 = arrpyo(1.0:10.0, "d") + pyarr = convert(PyArray, ao1) + ao2 = arrpyo(11.0:20.0, "d") + setdata!(pyarr, ao2) + @test all(pyarr[1:10] .== 11.0:20.0) + end + + @testset "similar on PyArray PyVec getindex" begin + jlarr1 = [1:10;] + jlarr2 = hcat([1:10;], [1:10;]) + pyarr1 = pycall(np["array"], PyArray, jlarr1) + pyarr2 = pycall(np["array"], PyArray, jlarr2) + @test all(pyarr1[1:10] .== jlarr1[1:10]) + @test all(pyarr2[1:10, 2] .== jlarr2[1:10, 2]) + @test all(pyarr2[1:10, 1:2] .== jlarr2) + end + end +end