From cccd18874984642d5bcbddaf4c625a71b420b4d7 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 12 Dec 2018 17:56:03 +1100 Subject: [PATCH 1/4] Add support for non-contiguous PyArrays --- src/pyarray.jl | 8 ++++---- src/pybuffer.jl | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index fb8559a6..3045f4d6 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -13,7 +13,7 @@ end function PyArray_Info(o::PyObject) # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer - pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + pybuf = PyBuffer(o, PyBUF_ND_STRIDED) T, native_byteorder = array_format(pybuf) sz = size(pybuf) strd = strides(pybuf) @@ -121,7 +121,7 @@ the Python buffer interface """ function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N} pybufinfo = a.info.pybuf - PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS) + PyBuffer!(pybufinfo, o, PyBUF_ND_STRIDED) dataptr = pybufinfo.buf.buf a.data = reinterpret(Ptr{T}, dataptr) a @@ -294,7 +294,7 @@ function convert(::Type{Array{PyObject,N}}, o::PyObject) where N map(pyincref, convert(Array{PyPtr, N}, o)) end -array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS)) +array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_STRIDED)) """ ``` @@ -319,7 +319,7 @@ correctly. """ function NoCopyArray(o::PyObject) # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer - pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS) + pybuf = PyBuffer(o, PyBUF_ND_STRIDED) T, native_byteorder = array_format(pybuf) !native_byteorder && throw(ArgumentError( "Only native endian format supported, format string: '$(get_format_str(pybuf))'")) diff --git a/src/pybuffer.jl b/src/pybuffer.jl index 125504b5..96d3d338 100644 --- a/src/pybuffer.jl +++ b/src/pybuffer.jl @@ -112,7 +112,9 @@ const PyBUF_C_CONTIGUOUS = convert(Cint, 0x0020) | PyBUF_STRIDES const PyBUF_F_CONTIGUOUS = convert(Cint, 0x0040) | PyBUF_STRIDES const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES const PyBUF_INDIRECT = convert(Cint, 0x0100) | PyBUF_STRIDES -const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS) +const PyBUF_ND_STRIDED = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | + PyBUF_STRIDES) +const PyBUF_ND_CONTIGUOUS = PyBUF_ND_STRIDED | PyBUF_ANY_CONTIGUOUS # construct a PyBuffer from a PyObject, if possible function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) @@ -128,8 +130,9 @@ function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE) end """ -`isbuftype(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_ND_CONTIGUOUS)` -Returns true if the python object `o` supports the buffer protocol. False if not. +`isbuftype(o::Union{PyObject,PyPtr})` +Returns true if the python object `o` supports the buffer protocol as a strided +array. False if not. """ function isbuftype(o::Union{PyObject,PyPtr}) # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510 @@ -137,7 +140,7 @@ function isbuftype(o::Union{PyObject,PyPtr}) # So we'll just try call PyObject_GetBuffer and check for success/failure b = PyBuffer() ret = ccall((@pysym :PyObject_GetBuffer), Cint, - (PyPtr, Any, Cint), o, b, PyBUF_ND_CONTIGUOUS) + (PyPtr, Any, Cint), o, b, PyBUF_ND_STRIDED) if ret != 0 pyerr_clear() else From a574a6b7991b71503f60f3e64e7f0c8b73fd89c7 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 12 Dec 2018 18:17:01 +1100 Subject: [PATCH 2/4] Tests for f_contiguous, and copy(::PyArray) Including for non-contiguous arrays --- test/testpybuffer.jl | 85 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl index f471c371..227205c2 100644 --- a/test/testpybuffer.jl +++ b/test/testpybuffer.jl @@ -15,7 +15,7 @@ pyutf8(s::String) = pyutf8(PyObject(s)) end if !npy_initialized - println("Skipping array related buffer tests since NumPy not available") + println(stderr, "Warning: skipping array related buffer tests since NumPy not available") else np = pyimport("numpy") listpy = pybuiltin("list") @@ -50,6 +50,89 @@ pyutf8(s::String) = pyutf8(PyObject(s)) end end + # f_contiguous(T, sz, st) + @testset "f_contiguous 1D" begin + # contiguous case: stride == sizeof(T) + @test f_contiguous(Float64, (4,), (8,)) == true + # non-contiguous case: stride != sizeof(T) + @test f_contiguous(Float64, (4,), (16,)) == false + end + + @testset "f_contiguous 2D" begin + # contiguous: st[1] == sizeof(T), st[2] == st[1]*sz[1] + @test f_contiguous(Float64, (4, 2), (8, 32)) == true + # non-contiguous: stride != sizeof(T), but st[2] == st[1]*sz[1] + @test f_contiguous(Float64, (4, 2), (16, 64)) == false + # non-contiguous: stride == sizeof(T), but st[2] != st[1]*sz[1] + @test f_contiguous(Float64, (4, 2), (8, 64)) == false + end + + @testset "copy f_contig 1d" begin + apyo = arrpyo(1.0:10.0, "d") + pyarr = PyArray(apyo) + jlcopy = copy(pyarr) + @test pyarr.f_contig == true + @test pyarr.c_contig == true + @test all(jlcopy .== pyarr) + # check it's not aliasing the same data + jlcopy[1] = -1.0 + @test pyarr[1] == 1.0 + end + + @testset "copy c_contig 2d" begin + apyo = pytestarray(2,3) # arrpyo([[1,2,3],[4,5,6]], "d") + pyarr = PyArray(apyo) + jlcopy = copy(pyarr) + @test pyarr.c_contig == true + @test pyarr.f_contig == false + # check all is in order + for i in 1:size(pyarr, 1) + for j in 1:size(pyarr, 1) + @test jlcopy[i,j] == pyarr[i,j] + end + end + # check it's not aliasing the same data + jlcopy[1,1] = -1.0 + @test pyarr[1,1] == 1.0 + end + + @testset "Non contiguous PyArrays" begin + @testset "1d non-contiguous" begin + # create an array of four Int32s, with stride 8 + nparr = pycall(np["ndarray"], PyObject, 4, + buffer=UInt32[1,0,1,0,1,0,1,0], + dtype="i4", strides=(8,)) + pyarr = PyArray(nparr) + + # The convert goes via a PyArray then a `copy` + @test convert(PyAny, nparr) == [1, 1, 1, 1] + + @test eltype(pyarr) == Int32 + @test sizeof(eltype(pyarr)) == 4 + @test pyarr.info.st == (8,) + # not f_contig because not contiguous + @test pyarr.f_contig == false + @test copy(pyarr) == Int32[1, 1, 1, 1] + end + + @testset "2d non-contiguous" begin + nparr = pycall(np["ndarray"], PyObject, + buffer=UInt32[1,0,2,0,1,0,2,0, + 1,0,2,0,1,0,2,0], order="f", + dtype="i4", shape=(2, 4), strides=(8,16)) + pyarr = PyArray(nparr) + + # The convert goes via a PyArray then a `copy` + @test convert(PyAny, nparr) == [1 1 1 1; 2 2 2 2] + pyarr = convert(PyArray, nparr) + @test eltype(pyarr) == Int32 + @test pyarr.info.st == (8, 16) + # not f_contig because not contiguous + @test pyarr.f_contig == false + @test copy(pyarr) == Int32[1 1 1 1; 2 2 2 2] + end + end + @testset "NoCopyArray 1d" begin ao = arrpyo(1.0:10.0, "d") pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS) From ac2ea1e22adc5c4b94dfefb3839512ecdf8e91b2 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 12 Dec 2018 18:17:21 +1100 Subject: [PATCH 3/4] Fix f_contiguous for 1d arrays --- src/pyarray.jl | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index 3045f4d6..d08d1a7f 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -38,16 +38,30 @@ function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N} ntuple(i->stv[i], N) end -# whether a contiguous array in column-major (Fortran, Julia) order -function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N - if prod(sz) == 1 || length(sz) == 1 - # 0 or 1-dim arrays should default to f-contiguous in julia - return true - end +""" +`f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int})::Bool` + +Whether an array is in column-major (Fortran, Julia) order, and +has elements stored contiguously. Any array that is f_contiguous will have +identical memory layout to a Julia `Array` of the same size. + +`sz` should be the dimensions of the array in number of elements (i.e. what + `size(a)` would return) +`st` should be the stride(s) *in bytes* between elements in each dimension +""" +function f_contiguous(::Type{T}, sz::NTuple{N,Int}, st::NTuple{N,Int}) where {T,N} if st[1] != sizeof(T) + # not contiguous return false end + if prod(sz) == 1 || length(sz) == 1 + # 0 or 1-dim arrays (with correct stride) should default to f-contiguous + # in julia + return true + end for j = 2:N + # check stride[cur_dim] == stride[prev_dim]*sz[prev_dim] for all dims>1, + # implying contiguous column-major storage (n.b. st[1] == sizeof(T) here) if st[j] != st[j-1] * sz[j-1] return false end From 8e5be56b6cd15cc4308176da44e33de3107ba403 Mon Sep 17 00:00:00 2001 From: Joel Mason Date: Wed, 12 Dec 2018 18:17:37 +1100 Subject: [PATCH 4/4] Simplify Copy(::PyArray) --- src/pyarray.jl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/pyarray.jl b/src/pyarray.jl index d08d1a7f..f787b4d1 100644 --- a/src/pyarray.jl +++ b/src/pyarray.jl @@ -142,17 +142,15 @@ function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N} end function copy(a::PyArray{T,N}) where {T,N} - if N > 1 && a.c_contig # equivalent to f_contig with reversed dims - B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N)) - return permutedims(B, (N:-1:1)) - end + # memcpy is ok iff `a` is f_contig (implies same memory layout as the equiv + # `Array`) otherwise we do a regular `copyto!`, such that A[I...] == a[I...] A = Array{T}(undef, a.dims) if a.f_contig ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a)) - return A else - return copyto!(A, a) + copyto!(A, a) end + return A end # TODO: need to do bounds-checking of these indices!