diff --git a/benchmarks/arrayperf.jl b/benchmarks/arrayperf.jl new file mode 100644 index 00000000..b5a46f06 --- /dev/null +++ b/benchmarks/arrayperf.jl @@ -0,0 +1,57 @@ +using PyCall, BenchmarkTools, DataStructures +using PyCall: PyArray_Info + +results = OrderedDict{String,Any}() + +let + np = pyimport("numpy") + nprand = np["random"]["rand"] + # nparray_pyo(x) = pycall(np["array"], PyObject, x) + # pytestarray(sz::Int...) = pycall(np["reshape"], PyObject, nparray_pyo(1:prod(sz)), sz) + + # no convert baseline + nprand_pyo(sz...) = pycall(nprand, PyObject, sz...) + + for arr_size in [(2,2), (100,100)] + pyo_arr = nprand_pyo(arr_size...) + results["nprand_pyo$arr_size"] = @benchmark $nprand_pyo($arr_size...) + println("nprand_pyo $arr_size:\n"); display(results["nprand_pyo$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarr$arr_size"] = @benchmark $convert(PyArray, $pyo_arr) + println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"]) + println("--------------------------------------------------") + + results["PyArray_Info$arr_size"] = @benchmark $PyArray_Info($pyo_arr) + println("PyArray_Info $arr_size:\n"); display(results["PyArray_Info$arr_size"]) + println("--------------------------------------------------") + + results["convert_pyarrbuf$arr_size"] = @benchmark $PyArray($pyo_arr) + println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"]) + println("--------------------------------------------------") + + results["convert_arr$arr_size"] = @benchmark convert(Array, $pyo_arr) + println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"]) + println("--------------------------------------------------") + + results["convert_arrbuf$arr_size"] = @benchmark $NoCopyArray($pyo_arr) + println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + results["setdata!$arr_size"] = @benchmark $setdata!($pyarr, $pyo_arr) + println("setdata!:\n"); display(results["setdata!$arr_size"]) + println("--------------------------------------------------") + + pyarr = convert(PyArray, pyo_arr) + pybuf=PyBuffer() + results["setdata! bufprealloc$arr_size"] = + @benchmark $setdata!($pyarr, $pyo_arr, $pybuf) + println("setdata! bufprealloc:\n"); display(results["setdata! bufprealloc$arr_size"]) + println("--------------------------------------------------") + end +end +println() +println("Mean times") +println("----------") +foreach((r)->println(rpad(r[1],27), ": ", mean(r[2])), results) diff --git a/src/PyCall.jl b/src/PyCall.jl index 8e5ccc46..f96740ab 100644 --- a/src/PyCall.jl +++ b/src/PyCall.jl @@ -5,7 +5,8 @@ module PyCall using Compat, VersionParsing export pycall, pycall!, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims, - PyPtr, pyincref, pydecref, pyversion, PyArray, PyArray_Info, + PyPtr, pyincref, pydecref, pyversion, + PyArray, PyArray_Info, PyBuffer, pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict, pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr, pyraise, pytype_mapping, pygui, pygui_start, pygui_stop, @@ -177,6 +178,7 @@ pytypeof(o::PyObject) = ispynull(o) ? throw(ArgumentError("NULL PyObjects have n const TypeTuple = Union{Type,NTuple{N, Type}} where {N} include("pybuffer.jl") +include("pyarray.jl") include("conversions.jl") include("pytype.jl") include("pyiterator.jl") diff --git a/src/conversions.jl b/src/conversions.jl index ab681ae5..ec3c6af5 100644 --- a/src/conversions.jl +++ b/src/conversions.jl @@ -768,13 +768,11 @@ function pysequence_query(o::PyObject) return AbstractRange elseif ispybytearray(o) return Vector{UInt8} - elseif !haskey(o, "__array_interface__") + elseif !isbuftype(o) # only handle PyList for now return pyisinstance(o, @pyglobalobj :PyList_Type) ? Array : Union{} else - otypestr = get(o["__array_interface__"], PyObject, "typestr") - typestr = convert(AbstractString, otypestr) # Could this just be String now? - T = npy_typestrs[typestr[2:end]] + T, native_byteorder = array_format(o) if T == PyPtr T = PyObject end diff --git a/src/numpy.jl b/src/numpy.jl index 542b8b9b..e41fd1a2 100644 --- a/src/numpy.jl +++ b/src/numpy.jl @@ -102,7 +102,7 @@ end # the values of these seem to have been stable for some time, and # the NumPy developers seem to have some awareness of binary compatibility -# NPY_TYPES: +# NumPy Types: const NPY_BOOL = Int32(0) const NPY_BYTE = Int32(1) @@ -127,26 +127,8 @@ const NPY_UNICODE = Int32(19) const NPY_VOID = Int32(20) const NPY_HALF = Int32(23) -# NPY_ORDER: -const NPY_ANYORDER = Int32(-1) -const NPY_CORDER = Int32(0) -const NPY_FORTRANORDER = Int32(1) - -# flags: -const NPY_ARRAY_C_CONTIGUOUS = Int32(1) -const NPY_ARRAY_F_CONTIGUOUS = Int32(2) -const NPY_ARRAY_ALIGNED = Int32(0x0100) -const NPY_ARRAY_WRITEABLE = Int32(0x0400) -const NPY_ARRAY_OWNDATA = Int32(0x0004) -const NPY_ARRAY_ENSURECOPY = Int32(0x0020) -const NPY_ARRAY_ENSUREARRAY = Int32(0x0040) -const NPY_ARRAY_FORCECAST = Int32(0x0010) -const NPY_ARRAY_UPDATEIFCOPY = Int32(0x1000) -const NPY_ARRAY_NOTSWAPPED = Int32(0x0200) -const NPY_ARRAY_ELEMENTSTRIDES = Int32(0x0080) - ######################################################################### -# conversion from Julia types to NPY_TYPES constant +# conversion from Julia types to NumPy types npy_type(::Type{Bool}) = NPY_BOOL npy_type(::Type{Int8}) = NPY_BYTE @@ -164,18 +146,9 @@ npy_type(::Type{ComplexF32}) = NPY_CFLOAT npy_type(::Type{ComplexF64}) = NPY_CDOUBLE npy_type(::Type{PyPtr}) = NPY_OBJECT -const NPY_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} - -# conversions from __array_interface__ type strings to supported Julia types -const npy_typestrs = Dict( "b1"=>Bool, - "i1"=>Int8, "u1"=>UInt8, - "i2"=>Int16, "u2"=>UInt16, - "i4"=>Int32, "u4"=>UInt32, - "i8"=>Int64, "u8"=>UInt64, - "f2"=>Float16, "f4"=>Float32, - "f8"=>Float64, "c8"=>ComplexF32, - "c16"=>ComplexF64, "O"=>PyPtr, - "O$(div(Sys.WORD_SIZE,8))"=>PyPtr) +# flags: +const NPY_ARRAY_ALIGNED = Int32(0x0100) +const NPY_ARRAY_WRITEABLE = Int32(0x0400) ######################################################################### # no-copy conversion of Julia arrays to NumPy arrays. @@ -185,7 +158,7 @@ const npy_typestrs = Dict( "b1"=>Bool, # dimensions. For example, although NumPy works with both row-major and # column-major data, some Python libraries like OpenCV seem to require # row-major data (the default in NumPy). In such cases, use PyReverseDims(array) -function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES +function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:PYARR_TYPES @npyinitialize size_a = revdims ? reverse(size(a)) : size(a) strides_a = revdims ? reverse(strides(a)) : strides(a) @@ -199,7 +172,7 @@ function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES return PyObject(p, a) end -function PyObject(a::StridedArray{T}) where T<:NPY_TYPES +function PyObject(a::StridedArray{T}) where T<:PYARR_TYPES try return NpyArray(a, false) catch @@ -207,7 +180,7 @@ function PyObject(a::StridedArray{T}) where T<:NPY_TYPES end end -PyReverseDims(a::StridedArray{T}) where {T<:NPY_TYPES} = NpyArray(a, true) +PyReverseDims(a::StridedArray{T}) where {T<:PYARR_TYPES} = NpyArray(a, true) PyReverseDims(a::BitArray) = PyReverseDims(Array(a)) """ @@ -222,283 +195,3 @@ libraries that expect row-major data. PyReverseDims(a::AbstractArray) ######################################################################### -# Extract shape and other information about a NumPy array. We need -# to call the Python interface to do this, since the equivalent information -# in NumPy's C API is only available via macros (or parsing structs). -# [ Hopefully, this will be improved in a future NumPy version. ] - -mutable struct PyArray_Info - T::Type - native::Bool # native byte order? - sz::Vector{Int} - st::Vector{Int} # strides, in multiples of bytes! - data::Ptr{Cvoid} - readonly::Bool - - function PyArray_Info(a::PyObject) - ai = PyDict{AbstractString,PyObject}(a["__array_interface__"]) - typestr = convert(AbstractString, ai["typestr"]) - T = npy_typestrs[typestr[2:end]] - datatuple = convert(Tuple{Int,Bool}, ai["data"]) - sz = convert(Vector{Int}, ai["shape"]) - local st - try - st = isempty(sz) ? Int[] : convert(Vector{Int}, ai["strides"]) - catch - # default is C-order contiguous - st = similar(sz) - st[end] = sizeof(T) - for i = length(sz)-1:-1:1 - st[i] = st[i+1]*sz[i+1] - end - end - return new(T, - (ENDIAN_BOM == 0x04030201 && typestr[1] == '<') - || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>') - || typestr[1] == '|', - sz, st, - convert(Ptr{Cvoid}, datatuple[1]), - datatuple[2]) - end -end - -aligned(i::PyArray_Info) = # FIXME: also check pointer alignment? - all(m -> m == 0, mod.(i.st, sizeof(i.T))) # strides divisible by elsize - -# whether a contiguous array in column-major (Fortran, Julia) order -function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int}) - if prod(sz) == 1 - return true - end - if st[1] != sizeof(T) - return false - end - for j = 2:length(st) - if st[j] != st[j-1] * sz[j-1] - return false - end - end - return true -end - -f_contiguous(i::PyArray_Info) = f_contiguous(i.T, i.sz, i.st) -@static if VERSION >= v"0.7.0-DEV.4534" # julia#26369 - c_contiguous(i::PyArray_Info) = f_contiguous(i.T, reverse(i.sz,dims=1), reverse(i.st,dims=1)) -else - c_contiguous(i::PyArray_Info) = f_contiguous(i.T, flipdim(i.sz,1), flipdim(i.st,1)) -end - -######################################################################### -# PyArray: no-copy wrapper around NumPy ndarray -# -# Hopefully, in the future this can be a subclass of StridedArray (see -# Julia issue #2345), which will allow it to be used with most Julia -# functions, but that is not possible at the moment. So, to use this -# with Julia linalg functions etcetera a copy is still required. - -""" - PyArray(o::PyObject) - -This converts an `ndarray` object `o` to a PyArray. - -This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). - -If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. -""" -mutable struct PyArray{T,N} <: AbstractArray{T,N} - o::PyObject - info::PyArray_Info - dims::Dims - st::Vector{Int} - f_contig::Bool - c_contig::Bool - data::Ptr{T} - - function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} - if !aligned(info) - throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) - elseif !info.native - throw(ArgumentError("only native byte-order arrays are supported")) - elseif info.T != T - throw(ArgumentError("inconsistent type in PyArray constructor")) - elseif length(info.sz) != N || length(info.st) != N - throw(ArgumentError("inconsistent ndims in PyArray constructor")) - end - return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), - f_contiguous(info), c_contiguous(info), - convert(Ptr{T}, info.data)) - end -end - -function PyArray(o::PyObject) - info = PyArray_Info(o) - return PyArray{info.T, length(info.sz)}(o, info) -end - -size(a::PyArray) = a.dims -ndims(a::PyArray{T,N}) where {T,N} = N - -similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims) - -function copy(a::PyArray{T,N}) where {T,N} - if N > 1 && a.c_contig # equivalent to f_contig with reversed dims - B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) - return permutedims(B, (N:-1:1)) - end - A = Array{T}(undef, a.dims) - if a.f_contig - ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) - return A - else - return copyto!(A, a) - end -end - -# TODO: need to do bounds-checking of these indices! - -getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) -getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) - -getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = - unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function getindex(a::PyArray, i::Integer) - if a.f_contig - return unsafe_load(a.data, i) - else - return a[ind2sub(a.dims, i)...] - end -end - -function getindex(a::PyArray, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - unsafe_load(a.data, index) -end - -function writeok_assign(a::PyArray, v, i::Integer) - if a.info.readonly - throw(ArgumentError("read-only PyArray")) - else - unsafe_store!(a.data, v, i) - end - return a -end - -setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) -setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) - -setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = - writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) - -function setindex!(a::PyArray, v, i::Integer) - if a.f_contig - return writeok_assign(a, v, i) - else - return setindex!(a, v, ind2sub(a.dims, i)...) - end -end - -function setindex!(a::PyArray, v, is::Integer...) - index = 1 - n = min(length(is),length(a.st)) - for i = 1:n - index += (is[i]-1)*a.st[i] - end - for i = n+1:length(is) - if is[i] != 1 - throw(BoundsError()) - end - end - writeok_assign(a, v, index) -end - -stride(a::PyArray, i::Integer) = a.st[i] - -Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data - -pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) - -function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T - offset = 0 - for i = 1:length(is) - offset += (is[i]-1)*a.st[i] - end - return a.data + offset*sizeof(T) -end - -summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", - string(T), " PyArray") - -######################################################################### -# PyArray <-> PyObject conversions - -PyObject(a::PyArray) = a.o - -convert(::Type{PyArray}, o::PyObject) = PyArray(o) - -function convert(::Type{Array{T, 1}}, o::PyObject) where T<:NPY_TYPES - try - copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info - catch - len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) - A = Array{pyany_toany(T)}(undef, len) - py2array(T, A, o, 1, 1) - end -end - -function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES - try - info = PyArray_Info(o) - try - copy(PyArray{T, length(info.sz)}(o, info)) # will check T == info.T - catch - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - py2array(T, o) - end -end - -function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N} - try - info = PyArray_Info(o) - try - copy(PyArray{T,N}(o, info)) # will check T == info.T and N == length(info.sz) - catch - nd = length(info.sz) - if nd != N - throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) - end - return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) - end - catch - A = py2array(T, o) - if ndims(A) != N - throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) - end - A - end -end - -function convert(::Type{Array{PyObject}}, o::PyObject) - map(pyincref, convert(Array{PyPtr}, o)) -end - -function convert(::Type{Array{PyObject,1}}, o::PyObject) - map(pyincref, convert(Array{PyPtr, 1}, o)) -end - -function convert(::Type{Array{PyObject,N}}, o::PyObject) where N - map(pyincref, convert(Array{PyPtr, N}, o)) -end - -######################################################################### diff --git a/src/pyarray.jl b/src/pyarray.jl new file mode 100644 index 00000000..fb8559a6 --- /dev/null +++ b/src/pyarray.jl @@ -0,0 +1,338 @@ +######################################################################### +# Extract shape and other information about arrays that support Python's +# Buffer Interface/Protocol (PEP 3118) +######################################################################### +struct PyArray_Info{T,N} + native::Bool # native byte order? + sz::NTuple{N,Int} + st::NTuple{N,Int} # strides, in multiples of bytes! + data::Ptr{T} + readonly::Bool + pybuf::PyBuffer +end + +function PyArray_Info(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + sz = size(pybuf) + strd = strides(pybuf) + length(strd) == 0 && (sz = ()) + N = length(sz) + isreadonly = pybuf.buf.readonly==1 + return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly, pybuf) +end + +aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment? + all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize + +eltype(i::PyArray_Info{T,N}) where {T,N} = T +ndims(i::PyArray_Info{T,N}) where {T,N} = N + +function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} + stv = Vector{Int}(N) + stv[end] = sizeof(T) + for i = N-1:-1:1 + stv[i] = stv[i+1]*sz[i+1] + end + ntuple(i->stv[i], N) +end + +# whether a contiguous array in column-major (Fortran, Julia) order +function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N + if prod(sz) == 1 || length(sz) == 1 + # 0 or 1-dim arrays should default to f-contiguous in julia + return true + end + if st[1] != sizeof(T) + return false + end + for j = 2:N + if st[j] != st[j-1] * sz[j-1] + return false + end + end + return true +end + +f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} = + error("stride and size are different lengths, size: $sz, strides: $sz") + +f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st) +c_contiguous(i::PyArray_Info{T,N}) where {T,N} = + f_contiguous(T, reverse(i.sz), reverse(i.st)) + + +######################################################################### +# PyArray: no-copy wrapper around NumPy ndarray +# +# Hopefully, in the future this can be a subclass of StridedArray (see +# Julia issue #2345), which will allow it to be used with most Julia +# functions, but that is not possible at the moment. So, to use this +# with Julia linalg functions etcetera a copy is still required. + +""" + PyArray(o::PyObject) + +This converts an `ndarray` object `o` to a PyArray. + +This implements a nocopy wrapper to a NumPy array (currently of only numeric types only). + +If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`. +""" +mutable struct PyArray{T,N} <: AbstractArray{T,N} + o::PyObject + info::PyArray_Info + dims::Dims + st::NTuple{N,Int} + f_contig::Bool + c_contig::Bool + data::Ptr{T} + + function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N} + if !aligned(info) + throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported")) + elseif !info.native + throw(ArgumentError("only native byte-order arrays are supported")) + elseif eltype(info) != T + throw(ArgumentError("inconsistent type in PyArray constructor")) + elseif length(info.sz) != N || length(info.st) != N + throw(ArgumentError("inconsistent ndims in PyArray constructor")) + end + return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)), + f_contiguous(info), c_contiguous(info), + convert(Ptr{T}, info.data)) + end +end + +function PyArray(o::PyObject) + info = PyArray_Info(o) + return PyArray{eltype(info), length(info.sz)}(o, info) +end + +size(a::PyArray) = a.dims +ndims(a::PyArray{T,N}) where {T,N} = N + +similar(a::PyArray, ::Type{T}, dims::Dims) where {T} = Array{T}(undef, dims) + +""" +Update the data ptr of the `a` to point to the buffer exposed by `o` through +the Python buffer interface +""" +function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N} + pybufinfo = a.info.pybuf + PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) + dataptr = pybufinfo.buf.buf + a.data = reinterpret(Ptr{T}, dataptr) + a +end + +function copy(a::PyArray{T,N}) where {T,N} + if N > 1 && a.c_contig # equivalent to f_contig with reversed dims + B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) + return permutedims(B, (N:-1:1)) + end + A = Array{T}(undef, a.dims) + if a.f_contig + ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) + return A + else + return copyto!(A, a) + end +end + +# TODO: need to do bounds-checking of these indices! +# TODO: need to GC root these `a`s to guard against the PyArray getting gc'd, +# e.g. if it's a temporary in a function: +# `two_rands() = pycall(np.rand, PyArray, 10)[1:2]` + + +getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data) +getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1]) + +getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} = + unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function getindex(a::PyArray, i::Integer) + if a.f_contig + return unsafe_load(a.data, i) + else + return a[ind2sub(a.dims, i)...] + end +end + +function getindex(a::PyArray, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + unsafe_load(a.data, index) +end + +function writeok_assign(a::PyArray, v, i::Integer) + if a.info.readonly + throw(ArgumentError("read-only PyArray")) + else + unsafe_store!(a.data, v, i) + end + return a +end + +setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1) +setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1]) + +setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} = + writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2]) + +function setindex!(a::PyArray, v, i::Integer) + if a.f_contig + return writeok_assign(a, v, i) + else + return setindex!(a, v, ind2sub(a.dims, i)...) + end +end + +function setindex!(a::PyArray, v, is::Integer...) + index = 1 + n = min(length(is),length(a.st)) + for i = 1:n + index += (is[i]-1)*a.st[i] + end + for i = n+1:length(is) + if is[i] != 1 + throw(BoundsError()) + end + end + writeok_assign(a, v, index) +end + +stride(a::PyArray, i::Integer) = a.st[i] + +Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data + +pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i)) + +function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T + offset = 0 + for i = 1:length(is) + offset += (is[i]-1)*a.st[i] + end + return a.data + offset*sizeof(T) +end + +summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ", + string(T), " PyArray") + +######################################################################### +# PyArray <-> PyObject conversions + +const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr} + +PyObject(a::PyArray) = a.o + +convert(::Type{PyArray}, o::PyObject) = PyArray(o) + +function convert(::Type{Array{T, 1}}, o::PyObject) where T<:PYARR_TYPES + try + copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info + catch + len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o) + A = Array{pyany_toany(T)}(undef, len) + py2array(T, A, o, 1, 1) + end +end + +function convert(::Type{Array{T}}, o::PyObject) where T<:PYARR_TYPES + try + info = PyArray_Info(o) + try + copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info) + catch + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + py2array(T, o) + end +end + +function convert(::Type{Array{T,N}}, o::PyObject) where {T<:PYARR_TYPES,N} + try + info = PyArray_Info(o) + try + copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info) + catch + nd = length(info.sz) + if nd != N + throw(ArgumentError("cannot convert $(nd)d array to $(N)d")) + end + return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1) + end + catch + A = py2array(T, o) + if ndims(A) != N + throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d")) + end + A + end +end + +function convert(::Type{Array{PyObject}}, o::PyObject) + map(pyincref, convert(Array{PyPtr}, o)) +end + +function convert(::Type{Array{PyObject,1}}, o::PyObject) + map(pyincref, convert(Array{PyPtr, 1}, o)) +end + +function convert(::Type{Array{PyObject,N}}, o::PyObject) where N + map(pyincref, convert(Array{PyPtr, N}, o)) +end + +array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) + +""" +``` +NoCopyArray(o::PyObject) +``` +Convert a Python array-like object, to a Julia `Array` or `PermutedDimsArray` +without making a copy of the data. + +If the data is stored in row-major format +(the default in Python/NumPy), then the returned array `nca` will be a +`PermutedDimsArray` such that the arrays are indexed the same way in Julia and +Python. i.e. `nca[idxs...] == o[idxs...]` + +If the data is stored in column-major format then a regular Julia `Array` will +be returned. + +Warning: This function is only lightly tested, and should be considered +experimental - it may cause segmentation faults on conversion or subsequent +array access, or be subtly broken in other ways. Only dense/contiguous, native +endian arrays that support the Python Buffer protocol are likely be converted +correctly. +""" +function NoCopyArray(o::PyObject) + # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer + pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + !native_byteorder && throw(ArgumentError( + "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) + T == Nothing && throw(ArgumentError( + "Array datatype '$(get_format_str(pybuf))' not supported")) + # TODO more checks on strides etc + sz = size(pybuf) + @static if VERSION >= v"0.7.0-DEV.3526" # julia#25647 + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, own=false) + else + arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false) + end + !f_contiguous(T, sz, strides(pybuf)) && + (arr = PermutedDimsArray(reshape(arr, reverse(sz)), (pybuf.buf.ndim:-1:1))) + return arr +end diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 06295261..ea076e1f 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -29,7 +29,7 @@ end mutable struct PyBuffer buf::Py_buffer PyBuffer() = begin - b = new(Py_buffer(C_NULL, C_NULL, 0, 0, + b = new(Py_buffer(C_NULL, PyPtr_NULL, 0, 0, 0, 0, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL, C_NULL)) @compat finalizer(pydecref, b) @@ -37,6 +37,13 @@ mutable struct PyBuffer end end +""" +`pydecref(o::PyBuffer)` +Release the reference to buffer `o` +N.b. As per https://docs.python.org/3/c-api/buffer.html#c.PyBuffer_Release, +It is an error to call this function on a PyBuffer that was not obtained via +the python c-api function `PyObject_GetBuffer(), unless o.obj is a PyPtr(C_NULL)` +""" function pydecref(o::PyBuffer) # note that PyBuffer_Release sets o.obj to NULL, and # is a no-op if o.obj is already NULL @@ -86,6 +93,9 @@ function Base.stride(b::PyBuffer, d::Integer) return Int(unsafe_load(b.buf.strides, d)) end +# Strides in bytes +Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,) + # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. iscontiguous(b::PyBuffer) = 1 == ccall((@pysym :PyBuffer_IsContiguous), Cint, @@ -93,7 +103,6 @@ iscontiguous(b::PyBuffer) = ############################################################################# # pybuffer constant values from Include/object.h -const PyBUF_MAX_NDIM = convert(Cint, 64) const PyBUF_SIMPLE = convert(Cint, 0) const PyBUF_WRITABLE = convert(Cint, 0x0001) const PyBUF_FORMAT = convert(Cint, 0x0004) @@ -103,16 +112,42 @@ const PyBUF_C_CONTIGUOUS = convert(Cint, 0x0020) | PyBUF_STRIDES const PyBUF_F_CONTIGUOUS = convert(Cint, 0x0040) | PyBUF_STRIDES const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES const PyBUF_INDIRECT = convert(Cint, 0x0100) | PyBUF_STRIDES +const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS) # construct a PyBuffer from a PyObject, if possible function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) - b = PyBuffer() + return PyBuffer!(PyBuffer(), o, flags) +end + +function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) # TODO change to `Ref{PyBuffer}` when 0.6 is dropped. + pydecref(b) # ensure b is properly released @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint, (PyPtr, Any, Cint), o, b, flags) return b end +""" +`isbuftype(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_ND_CONTIGUOUS)` +Returns true if the python object `o` supports the buffer protocol. False if not. +""" +function isbuftype(o::Union{PyObject,PyPtr}) + # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510 + # so we can't access it easily. It basically just checks if PyObject_GetBuffer exists + # So we'll just try call PyObject_GetBuffer and check for success/failure + b = PyBuffer() + ret = ccall((@pysym :PyObject_GetBuffer), Cint, + (PyPtr, Any, Cint), o, b, PyBUF_ND_CONTIGUOUS) + if ret != 0 + pyerr_clear() + else + # handle pointer types + T, native_byteorder = array_format(b) + T <: Ptr && (ret = 1) + end + return ret == 0 +end + ############################################################################# # recursive function to write buffer dimension by dimension, starting at @@ -154,4 +189,69 @@ function Base.write(io::IO, b::PyBuffer) end end +# ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 + +const standard_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Cvoid}, + "b"=>Int8, "B"=>UInt8, + "h"=>Int16, "H"=>UInt16, + "i"=>Int32, "I"=>UInt32, + "l"=>Int32, "L"=>UInt32, + "q"=>Int64, "Q"=>UInt64, + "e"=>Float16, "f"=>Float32, + "d"=>Float64, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) + +const native_typestrs = Dict{String,DataType}( + "?"=>Bool, "P"=>Ptr{Cvoid}, + "b"=>Int8, "B"=>UInt8, + "h"=>Cshort, "H"=>Cushort, + "i"=>Cint, "I"=>Cuint, + "l"=>Clong, "L"=>Culong, + "q"=>Clonglong, "Q"=>Culonglong, + "e"=>Float16, "f"=>Cfloat, + "d"=>Cdouble, "g"=>Nothing, # Float128? + # `Nothing` indicates no equiv Julia type + "Z8"=>ComplexF32, "Z16"=>ComplexF64, + "Zf"=>ComplexF32, "Zd"=>ComplexF64) + +const typestrs_native = + Dict{DataType, String}(zip(values(native_typestrs), keys(native_typestrs))) + +get_format_str(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format)) + +function array_format(pybuf::PyBuffer) + # a NULL-terminated format-string ... indicating what is in each element of memory. + # TODO: handle more cases: https://www.python.org/dev/peps/pep-3118/#additions-to-the-struct-string-syntax + # refs: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966 + # https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/_internal.py#L490 + # https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment + + # "NULL implies standard unsigned bytes ("B")" --pep 3118 + pybuf.buf.format == C_NULL && return UInt8, true + + fmt_str = get_format_str(pybuf) + native_byteorder = true + type_start_idx = 1 + typestrs = standard_typestrs + if length(fmt_str) > 1 + type_start_idx = 2 + if fmt_str[1] == '=' + elseif fmt_str[1] == '<' + native_byteorder = ENDIAN_BOM == 0x04030201 + elseif fmt_str[1] == '>' || fmt_str =='!' + native_byteorder = ENDIAN_BOM == 0x01020304 + elseif fmt_str[1] == '@' || fmt_str[1] == '^' + typestrs = native_typestrs + elseif fmt_str[1] == "Z" + type_start_idx = 1 + else + error("Unsupported format string: \"$fmt_str\"") + end + end + typestrs[fmt_str[type_start_idx:end]], native_byteorder +end + ############################################################################# diff --git a/test/runtests.jl b/test/runtests.jl index 9733cdef..25f34b5d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -272,11 +272,11 @@ const PyInt = pyversion < v"3" ? Int : Clonglong @test o[:real] == 1 end - # []-based sequence access - let a1=[5,8,6], a2=rand(3,4), a3=rand(3,4,5), o1=PyObject(a1), o2=PyObject(a2), o3=PyObject(a3) + @testset "[]-based sequence access" begin + a1=[5,8,6]; a2=rand(3,4); a3=rand(3,4,5); o1=PyObject(a1); o2=PyObject(a2); o3=PyObject(a3) @test [o1[i] for i in eachindex(a1)] == a1 @test [o1[end-(i-1)] for i in eachindex(a1)] == reverse(a1) - @test o2[1] == collect(a2[1,:]) + @test all(o2[1] == collect(a2[1,:])) @test length(o1) == length(o2) == length(o3) == 3 o1[end-1] = 7 @test o1[2] == 7 @@ -286,7 +286,7 @@ const PyInt = pyversion < v"3" ? Int : Clonglong if PyCall.npy_initialized @test [o2[i,j] for i=1:3, j=1:4] == a2 @test [o3[i,j,k] for i=1:3, j=1:4, k=1:5] == a3 - @test o3[2,3] == collect(a3[2,3,:]) + @test all(o3[2,3] == collect(a3[2,3,:])) o2[2,3] = 8 @test o2[2,3] == 8 o3[2,3,4] = 9 @@ -688,3 +688,4 @@ def try_call(f): end include("test_pyfncall.jl") +include("testpybuffer.jl") diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl new file mode 100644 index 00000000..8d735a05 --- /dev/null +++ b/test/testpybuffer.jl @@ -0,0 +1,114 @@ +using Compat.Test, PyCall, Compat +using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized, +NoCopyArray, isbuftype, setdata! + +pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8") +pyutf8(s::String) = pyutf8(PyObject(s)) + +@testset "PyBuffer" begin + @testset "String Buffers" begin + b = PyCall.PyBuffer(pyutf8("test string")) + @test ndims(b) == 1 + @test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b) + @test stride(b, 1) == 1 + @test PyCall.iscontiguous(b) == true + end + + if !npy_initialized + println("Skipping array related buffer tests since NumPy not available") + else + np = pyimport("numpy") + listpy = pybuiltin("list") + arrpyo(args...; kwargs...) = + pycall(np["array"], PyObject, args...; kwargs...) + listpyo(args...) = pycall(listpy, PyObject, args...) + pytestarray(sz::Int...; order="C") = + pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order) + + @testset "Non-native-endian" begin + wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">" + wrong_endian_arr = + pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2], + dtype=wrong_endian_str*"i2") + # Not supported, so throws + @test_throws ArgumentError NoCopyArray(wrong_endian_arr) + @test_throws ArgumentError PyArray(wrong_endian_arr) + end + + @testset "NoCopyArray 1d" begin + ao = arrpyo(1.0:10.0, "d") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (10,) + @test strides(pybuf) == (1,) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + @test nca isa Array + @test nca[3] == ao[3] + @test nca[4] == ao[4] + end + + @testset "NoCopyArray 2d f-contig" begin + ao = arrpyo(reshape(1.0:12.0, (3,4)) |> collect, "d", order="F") + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4) + @test strides(pybuf) == (1, 3) .* sizeof(T) + nca = NoCopyArray(ao) + @test !(nca isa PermutedDimsArray) + # @show typeof(nca) (nca isa Array) + @test nca isa Array + @test size(nca) == (3,4) + @test strides(nca) == (1,3) + @test nca[3,2] == ao[3,2] + @test nca[2,3] == ao[2,3] + end + + @testset "NoCopyArray 3d c-contig" begin + ao = pytestarray(3,4,5) + pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) + T, native_byteorder = array_format(pybuf) + @test T == Float64 + @test native_byteorder == true + @test size(pybuf) == (3,4,5) + @test strides(pybuf) == (20,5,1) .* sizeof(T) + nca = NoCopyArray(ao) + @test nca isa PermutedDimsArray + @test !(nca isa Array) + @test size(nca) == (3,4,5) + @test strides(nca) == (20,5,1) + @test nca[2,3,4] == ao[2,3,4] + @test nca[3,2,4] == ao[3,2,4] + end + + @testset "isbuftype" begin + @test isbuftype(PyObject(0)) == false + @test isbuftype(listpyo((1.0:10.0...,))) == false + @test isbuftype(arrpyo(1.0:10.0, "d")) == true + @test isbuftype(PyObject([1:10...])) == true + end + + # TODO maybe move these to a test_pyarray.jl + @testset "setdata!" begin + ao1 = arrpyo(1.0:10.0, "d") + pyarr = convert(PyArray, ao1) + ao2 = arrpyo(11.0:20.0, "d") + setdata!(pyarr, ao2) + @test all(pyarr[1:10] .== 11.0:20.0) + end + + @testset "similar on PyArray PyVec getindex" begin + jlarr1 = [1:10;] + jlarr2 = hcat([1:10;], [1:10;]) + pyarr1 = pycall(np["array"], PyArray, jlarr1) + pyarr2 = pycall(np["array"], PyArray, jlarr2) + @test all(pyarr1[1:10] .== jlarr1[1:10]) + @test all(pyarr2[1:10, 2] .== jlarr2[1:10, 2]) + @test all(pyarr2[1:10, 1:2] .== jlarr2) + end + end +end