Skip to content

Commit

Permalink
PyArray conversion speedups and PyArrayFromBuffer
Browse files Browse the repository at this point in the history
* PyArray_Info parameterised T,N and immutable
* PyArray_Info size and stride changed to Tuples (from Vectors)
* PyArray_Info data changed from Ptr{Cvoid} to Ptr{T}
* `PyArrayInfoFromBuffer(o::PyObject)` faster way to get PyArray_Info from numpy than numpy's __array_interface__
* `PyArrayFromBuffer(o::PyObject)` 4x faster PyArray conversion
* ArrayFromBuffer no copy conversion to Julia Array
* moved PyBuffer tests to separate file
  • Loading branch information
JobJob committed Apr 4, 2018
1 parent 0f3ad3c commit a25c297
Show file tree
Hide file tree
Showing 6 changed files with 203 additions and 51 deletions.
57 changes: 57 additions & 0 deletions benchmarks/arrayperf.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
using PyCall, BenchmarkTools, DataStructures
using PyCall: PyArrayInfoFromBuffer

results = OrderedDict{String,Any}()

let
np = pyimport("numpy")
nprand = np["random"]["rand"]
# nparray_pyo(x) = pycall(np["array"], PyObject, x)
# pytestarray(sz::Int...) = pycall(np["reshape"], PyObject, nparray_pyo(1:prod(sz)), sz)

# no convert baseline
nprand_pyo(sz...) = pycall(nprand, PyObject, sz...)

for arr_size in [(2,2), (100,100)]
pyo_arr = nprand_pyo(arr_size...)
results["nprand_pyo$arr_size"] = @benchmark $nprand_pyo($arr_size...)
println("nprand_pyo $arr_size:\n"); display(results["nprand_pyo$arr_size"])
println("--------------------------------------------------")

results["convert_pyarr$arr_size"] = @benchmark $convert(PyArray, $pyo_arr)
println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"])
println("--------------------------------------------------")

results["PyArrayInfoFromBuffer$arr_size"] = @benchmark $PyArrayInfoFromBuffer($pyo_arr)
println("PyArrayInfoFromBuffer $arr_size:\n"); display(results["PyArrayInfoFromBuffer$arr_size"])
println("--------------------------------------------------")

results["convert_pyarrbuf$arr_size"] = @benchmark $PyArrayFromBuffer($pyo_arr)
println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"])
println("--------------------------------------------------")

results["convert_arr$arr_size"] = @benchmark convert(Array, $pyo_arr)
println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"])
println("--------------------------------------------------")

results["convert_arrbuf$arr_size"] = @benchmark $ArrayFromBuffer($pyo_arr)
println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"])
println("--------------------------------------------------")

pyarr = convert(PyArray, pyo_arr)
results["setdata!$arr_size"] = @benchmark $setdata!($pyarr, $pyo_arr)
println("setdata!:\n"); display(results["setdata!$arr_size"])
println("--------------------------------------------------")

pyarr = convert(PyArray, pyo_arr)
pybuf=PyBuffer()
results["setdata! bufprealloc$arr_size"] =
@benchmark $setdata!($pyarr, $pyo_arr, $pybuf)
println("setdata! bufprealloc:\n"); display(results["setdata! bufprealloc$arr_size"])
println("--------------------------------------------------")
end
end
println()
println("Mean times")
println("----------")
foreach((r)->println(rpad(r[1],27), ": ", mean(r[2])), results)
3 changes: 2 additions & 1 deletion src/PyCall.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ module PyCall
using Compat, VersionParsing

export pycall, pyimport, pybuiltin, PyObject, PyReverseDims,
PyPtr, pyincref, pydecref, pyversion, PyArray, PyArray_Info,
PyPtr, pyincref, pydecref, pyversion,
PyArray, PyArray_Info, PyBuffer, PyArrayFromBuffer, setdata!, ArrayFromBuffer,
pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict,
pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr,
pyraise, pytype_mapping, pygui, pygui_start, pygui_stop,
Expand Down
99 changes: 59 additions & 40 deletions src/numpy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,63 +226,75 @@ PyReverseDims(a::AbstractArray)
# to call the Python interface to do this, since the equivalent information
# in NumPy's C API is only available via macros (or parsing structs).
# [ Hopefully, this will be improved in a future NumPy version. ]

mutable struct PyArray_Info
T::Type
struct PyArray_Info{T,N}
native::Bool # native byte order?
sz::Vector{Int}
st::Vector{Int} # strides, in multiples of bytes!
data::Ptr{Cvoid}
sz::NTuple{N,Int}
st::NTuple{N,Int} # strides, in multiples of bytes!
data::Ptr{T}
readonly::Bool
end

function PyArray_Info(a::PyObject)
ai = PyDict{AbstractString,PyObject}(a["__array_interface__"])
typestr = convert(AbstractString, ai["typestr"])
T = npy_typestrs[typestr[2:end]]
datatuple = convert(Tuple{Int,Bool}, ai["data"])
sz = convert(Vector{Int}, ai["shape"])
local st
try
st = isempty(sz) ? Int[] : convert(Vector{Int}, ai["strides"])
catch
# default is C-order contiguous
st = similar(sz)
st[end] = sizeof(T)
for i = length(sz)-1:-1:1
st[i] = st[i+1]*sz[i+1]
end
function PyArray_Info(a::PyObject)
ai = PyDict{String,PyObject,true}(a["__array_interface__"])
typestr = convert(String, ai["typestr"])
T = npy_typestrs[typestr[2:end]]
datatuple = convert(Tuple{Int,Bool}, ai["data"])
sz = convert(Tuple{Vararg{Int}}, ai["shape"])
N = length(sz)
st = if isempty(sz)
()
else
stq = get(ai, "strides", PyNULL())
if stq.o == pynothing[] || stq.o == C_NULL
default_stride(sz, T)
else
convert(Tuple{Vararg{Int}}, stq)
end
return new(T,
(ENDIAN_BOM == 0x04030201 && typestr[1] == '<')
|| (ENDIAN_BOM == 0x01020304 && typestr[1] == '>')
|| typestr[1] == '|',
sz, st,
convert(Ptr{Cvoid}, datatuple[1]),
datatuple[2])
end
return PyArray_Info{T,N}((ENDIAN_BOM == 0x04030201 && typestr[1] == '<')
|| (ENDIAN_BOM == 0x01020304 && typestr[1] == '>')
|| typestr[1] == '|',
sz, st,
convert(Ptr{Cvoid}, datatuple[1]),
datatuple[2])
end

aligned(i::PyArray_Info) = # FIXME: also check pointer alignment?
all(m -> m == 0, mod.(i.st, sizeof(i.T))) # strides divisible by elsize
aligned(i::PyArray_Info{T,N}) where {T,N} = # FIXME: also check pointer alignment?
all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize

eltype(i::PyArray_Info{T,N}) where {T,N} = T
ndims(i::PyArray_Info{T,N}) where {T,N} = N

function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N}
stv = Vector{Int}(N)
stv[end] = sizeof(T)
for i = N-1:-1:1
stv[i] = stv[i+1]*sz[i+1]
end
ntuple(i->stv[i], N)
end

# whether a contiguous array in column-major (Fortran, Julia) order
function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int})
function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N
if prod(sz) == 1
return true
end
if st[1] != sizeof(T)
return false
end
for j = 2:length(st)
for j = 2:N
if st[j] != st[j-1] * sz[j-1]
return false
end
end
return true
end

f_contiguous(i::PyArray_Info) = f_contiguous(i.T, i.sz, i.st)
c_contiguous(i::PyArray_Info) = f_contiguous(i.T, flipdim(i.sz,1), flipdim(i.st,1))
f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} =
error("stride and size are different lengths, size: $sz, strides: $sz")

f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st)
c_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, reverse(i.sz), reverse(i.st))

#########################################################################
# PyArray: no-copy wrapper around NumPy ndarray
Expand All @@ -305,7 +317,7 @@ mutable struct PyArray{T,N} <: AbstractArray{T,N}
o::PyObject
info::PyArray_Info
dims::Dims
st::Vector{Int}
st::NTuple{N,Int}
f_contig::Bool
c_contig::Bool
data::Ptr{T}
Expand All @@ -315,7 +327,7 @@ mutable struct PyArray{T,N} <: AbstractArray{T,N}
throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported"))
elseif !info.native
throw(ArgumentError("only native byte-order arrays are supported"))
elseif info.T != T
elseif eltype(info) != T
throw(ArgumentError("inconsistent type in PyArray constructor"))
elseif length(info.sz) != N || length(info.st) != N
throw(ArgumentError("inconsistent ndims in PyArray constructor"))
Expand All @@ -328,14 +340,21 @@ end

function PyArray(o::PyObject)
info = PyArray_Info(o)
return PyArray{info.T, length(info.sz)}(o, info)
return PyArray{eltype(info), length(info.sz)}(o, info)
end

size(a::PyArray) = a.dims
ndims(a::PyArray{T,N}) where {T,N} = N

similar(a::PyArray, T, dims::Dims) = Array{T}(uninitialized, dims)

function setdata!{T,N}(a::PyArray{T,N}, o::PyObject, pybufinfo=PyBuffer())
PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS)
dataptr = pybufinfo.buf.buf
a.data = reinterpret(Ptr{T}, dataptr)
a
end

function copy(a::PyArray{T,N}) where {T,N}
if N > 1 && a.c_contig # equivalent to f_contig with reversed dims
B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N))
Expand Down Expand Up @@ -455,7 +474,7 @@ function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES
try
info = PyArray_Info(o)
try
copy(PyArray{T, length(info.sz)}(o, info)) # will check T == info.T
copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info)
catch
return py2array(T, Array{pyany_toany(T)}(uninitialized, info.sz...), o, 1, 1)
end
Expand All @@ -468,7 +487,7 @@ function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N}
try
info = PyArray_Info(o)
try
copy(PyArray{T,N}(o, info)) # will check T == info.T and N == length(info.sz)
copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info)
catch
nd = length(info.sz)
if nd != N
Expand Down
60 changes: 58 additions & 2 deletions src/pybuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,17 @@ function Base.stride(b::PyBuffer, d::Integer)
return Int(unsafe_load(b.buf.strides, d))
end

# Strides in bytes
Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,)

# TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
iscontiguous(b::PyBuffer) =
1 == ccall((@pysym :PyBuffer_IsContiguous), Cint,
(Any, Cchar), b, 'A')

#############################################################################
# pybuffer constant values from Include/object.h
const PyBUF_MAX_NDIM = convert(Cint, 64)
const PyBUF_MAX_NDIM = convert(Cint, 64) # == 0x0040, and not in spec?
const PyBUF_SIMPLE = convert(Cint, 0)
const PyBUF_WRITABLE = convert(Cint, 0x0001)
const PyBUF_FORMAT = convert(Cint, 0x0004)
Expand All @@ -103,10 +106,14 @@ const PyBUF_C_CONTIGUOUS = convert(Cint, 0x0020) | PyBUF_STRIDES
const PyBUF_F_CONTIGUOUS = convert(Cint, 0x0040) | PyBUF_STRIDES
const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES
const PyBUF_INDIRECT = convert(Cint, 0x0100) | PyBUF_STRIDES
const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS)

# construct a PyBuffer from a PyObject, if possible
function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
b = PyBuffer()
return PyBuffer!(PyBuffer(), o, flags)
end

function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
# TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
@pycheckz ccall((@pysym :PyObject_GetBuffer), Cint,
(PyPtr, Any, Cint), o, b, flags)
Expand Down Expand Up @@ -154,4 +161,53 @@ function Base.write(io::IO, b::PyBuffer)
end
end

# ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966
const pybuf_typestrs = Dict{String,DataType}("?"=>Bool,
"b"=>Int8, "B"=>UInt8,
"h"=>Int16, "H"=>UInt16,
"i"=>Int32, "I"=>UInt32,
"l"=>Int64, "L"=>UInt64,
"q"=>Int128, "Q"=>UInt128,
"e"=>Float16, "f"=>Float32,
"d"=>Float64, "g"=>Void, # Float128?
"c8"=>ComplexF32, "c16"=>ComplexF64,)
# "O"=>PyPtr, "O$(div(Sys.WORD_SIZE,8))"=>PyPtr)

get_typestr(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format))

function array_info(pybuf::PyBuffer)
typestr = get_typestr(pybuf)
native_byteorder = length(typestr) == 1 ||
(ENDIAN_BOM == 0x04030201 && typestr[1] == '<') ||
(ENDIAN_BOM == 0x01020304 && typestr[1] == '>')
pybuf_typestrs[typestr[end:end]], native_byteorder
end

function PyArrayInfoFromBuffer(o::PyObject)
pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS)
# XXX pyincref buffer? and add a finalizer to the array that calls pydecref?
T, native_byteorder = array_info(pybuf)
sz = size(pybuf)
N = length(sz)
strd = strides(pybuf)
isreadonly = pybuf.buf.readonly==1
return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly)
end

function PyArrayFromBuffer(o::PyObject)
info = PyArrayInfoFromBuffer(o::PyObject)
PyArray{eltype(info), length(info.sz)}(o, info)
end

function ArrayFromBuffer(o::PyObject)
pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS)
# XXX pyincref buffer? and add a finalizer to the array that calls pydecref?
T, native_byteorder = array_info(pybuf)
!native_byteorder && error("Only native endian format supported, typestr: '$(get_typestr(pybuf))'")
T == Void && error("Array datatype '$(get_typestr(pybuf))' not supported")
# TODO more checks on strides etc
arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), size(pybuf), false)
f_contiguous(T, strides(pybuf), size(pybuf)) ? arr : PermutedDimsArray(arr, (pybuf.buf.ndim:-1:1))
end

#############################################################################
10 changes: 2 additions & 8 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,6 @@ if pymodule_exists("mpmath")
end
@test convert(BigInt, PyObject(1234)) == 1234

# buffers
let b = PyCall.PyBuffer(pyutf8("test string"))
@test ndims(b) == 1
@test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b)
@test stride(b, 1) == 1
@test PyCall.iscontiguous(b) == true
end

let o = PyObject(1+2im)
@test haskey(o, :real)
@test :real in keys(o)
Expand Down Expand Up @@ -535,3 +527,5 @@ end
@test pyfunctionret(factorial, Float64, Int)(3) === 6.0
@test pyfunctionret(factorial, nothing, Int)(3) === nothing
@test PyCall.is_pyjlwrap(pycall(pyfunctionret(factorial, Any, Int), PyObject, 3))

include("testpybuffer.jl")
25 changes: 25 additions & 0 deletions test/testpybuffer.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
using Compat.Test, PyCall, Compat

pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8")
pyutf8(s::String) = pyutf8(PyObject(s))

const np = pyimport("numpy")

@testset "PyBuffer" begin
@testset "String Buffers" begin
b = PyCall.PyBuffer(pyutf8("test string"))
@test ndims(b) == 1
@test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b)
@test stride(b, 1) == 1
@test PyCall.iscontiguous(b) == true
end

@testset "Non-native-endian" begin
wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">"
wrong_endian_arr =
pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2],
dtype=wrong_endian_str*"i2")
# Not supported, so throws
@test_throws ErrorException ArrayFromBuffer(wrong_endian_arr)
end
end

0 comments on commit a25c297

Please sign in to comment.