diff --git a/benchmarks/arrayperf.jl b/benchmarks/arrayperf.jl
new file mode 100644
index 00000000..b5a46f06
--- /dev/null
+++ b/benchmarks/arrayperf.jl
@@ -0,0 +1,57 @@
+using PyCall, BenchmarkTools, DataStructures
+using PyCall: PyArray_Info
+
+results = OrderedDict{String,Any}()
+
+let
+    np = pyimport("numpy")
+    nprand = np["random"]["rand"]
+    # nparray_pyo(x) = pycall(np["array"], PyObject, x)
+    # pytestarray(sz::Int...) = pycall(np["reshape"], PyObject, nparray_pyo(1:prod(sz)), sz)
+
+    # no convert baseline
+    nprand_pyo(sz...)   = pycall(nprand, PyObject, sz...)
+
+    for arr_size in [(2,2), (100,100)]
+        pyo_arr = nprand_pyo(arr_size...)
+        results["nprand_pyo$arr_size"] = @benchmark $nprand_pyo($arr_size...)
+        println("nprand_pyo $arr_size:\n"); display(results["nprand_pyo$arr_size"])
+        println("--------------------------------------------------")
+
+        results["convert_pyarr$arr_size"] = @benchmark $convert(PyArray, $pyo_arr)
+        println("convert_pyarr $arr_size:\n"); display(results["convert_pyarr$arr_size"])
+        println("--------------------------------------------------")
+
+        results["PyArray_Info$arr_size"] = @benchmark $PyArray_Info($pyo_arr)
+        println("PyArray_Info $arr_size:\n"); display(results["PyArray_Info$arr_size"])
+        println("--------------------------------------------------")
+
+        results["convert_pyarrbuf$arr_size"] = @benchmark $PyArray($pyo_arr)
+        println("convert_pyarrbuf $arr_size:\n"); display(results["convert_pyarrbuf$arr_size"])
+        println("--------------------------------------------------")
+
+        results["convert_arr$arr_size"] = @benchmark convert(Array, $pyo_arr)
+        println("convert_arr $arr_size:\n"); display(results["convert_arr$arr_size"])
+        println("--------------------------------------------------")
+
+        results["convert_arrbuf$arr_size"] = @benchmark $NoCopyArray($pyo_arr)
+        println("convert_arrbuf $arr_size:\n"); display(results["convert_arrbuf$arr_size"])
+        println("--------------------------------------------------")
+
+        pyarr = convert(PyArray, pyo_arr)
+        results["setdata!$arr_size"] = @benchmark $setdata!($pyarr, $pyo_arr)
+        println("setdata!:\n"); display(results["setdata!$arr_size"])
+        println("--------------------------------------------------")
+
+        pyarr = convert(PyArray, pyo_arr)
+        pybuf=PyBuffer()
+        results["setdata! bufprealloc$arr_size"] =
+            @benchmark $setdata!($pyarr, $pyo_arr, $pybuf)
+        println("setdata! bufprealloc:\n"); display(results["setdata! bufprealloc$arr_size"])
+        println("--------------------------------------------------")
+    end
+end
+println()
+println("Mean times")
+println("----------")
+foreach((r)->println(rpad(r[1],27), ": ", mean(r[2])), results)
diff --git a/src/PyCall.jl b/src/PyCall.jl
index 8e5ccc46..f96740ab 100644
--- a/src/PyCall.jl
+++ b/src/PyCall.jl
@@ -5,7 +5,8 @@ module PyCall
 using Compat, VersionParsing
 
 export pycall, pycall!, pyimport, pyimport_e, pybuiltin, PyObject, PyReverseDims,
-       PyPtr, pyincref, pydecref, pyversion, PyArray, PyArray_Info,
+       PyPtr, pyincref, pydecref, pyversion,
+       PyArray, PyArray_Info, PyBuffer,
        pyerr_check, pyerr_clear, pytype_query, PyAny, @pyimport, PyDict,
        pyisinstance, pywrap, pytypeof, pyeval, PyVector, pystring, pystr, pyrepr,
        pyraise, pytype_mapping, pygui, pygui_start, pygui_stop,
@@ -177,6 +178,7 @@ pytypeof(o::PyObject) = ispynull(o) ? throw(ArgumentError("NULL PyObjects have n
 
 const TypeTuple = Union{Type,NTuple{N, Type}} where {N}
 include("pybuffer.jl")
+include("pyarray.jl")
 include("conversions.jl")
 include("pytype.jl")
 include("pyiterator.jl")
diff --git a/src/conversions.jl b/src/conversions.jl
index ab681ae5..ec3c6af5 100644
--- a/src/conversions.jl
+++ b/src/conversions.jl
@@ -768,13 +768,11 @@ function pysequence_query(o::PyObject)
         return AbstractRange
     elseif ispybytearray(o)
         return Vector{UInt8}
-    elseif !haskey(o, "__array_interface__")
+    elseif !isbuftype(o)
         # only handle PyList for now
         return pyisinstance(o, @pyglobalobj :PyList_Type) ? Array : Union{}
     else
-        otypestr = get(o["__array_interface__"], PyObject, "typestr")
-        typestr = convert(AbstractString, otypestr) # Could this just be String now?
-        T = npy_typestrs[typestr[2:end]]
+        T, native_byteorder = array_format(o)
         if T == PyPtr
             T = PyObject
         end
diff --git a/src/numpy.jl b/src/numpy.jl
index 542b8b9b..e41fd1a2 100644
--- a/src/numpy.jl
+++ b/src/numpy.jl
@@ -102,7 +102,7 @@ end
 # the values of these seem to have been stable for some time, and
 # the NumPy developers seem to have some awareness of binary compatibility
 
-# NPY_TYPES:
+# NumPy Types:
 
 const NPY_BOOL = Int32(0)
 const NPY_BYTE = Int32(1)
@@ -127,26 +127,8 @@ const NPY_UNICODE = Int32(19)
 const NPY_VOID = Int32(20)
 const NPY_HALF = Int32(23)
 
-# NPY_ORDER:
-const NPY_ANYORDER = Int32(-1)
-const NPY_CORDER = Int32(0)
-const NPY_FORTRANORDER = Int32(1)
-
-# flags:
-const NPY_ARRAY_C_CONTIGUOUS = Int32(1)
-const NPY_ARRAY_F_CONTIGUOUS = Int32(2)
-const NPY_ARRAY_ALIGNED = Int32(0x0100)
-const NPY_ARRAY_WRITEABLE = Int32(0x0400)
-const NPY_ARRAY_OWNDATA = Int32(0x0004)
-const NPY_ARRAY_ENSURECOPY = Int32(0x0020)
-const NPY_ARRAY_ENSUREARRAY = Int32(0x0040)
-const NPY_ARRAY_FORCECAST = Int32(0x0010)
-const NPY_ARRAY_UPDATEIFCOPY = Int32(0x1000)
-const NPY_ARRAY_NOTSWAPPED = Int32(0x0200)
-const NPY_ARRAY_ELEMENTSTRIDES = Int32(0x0080)
-
 #########################################################################
-# conversion from Julia types to NPY_TYPES constant
+# conversion from Julia types to NumPy types
 
 npy_type(::Type{Bool}) = NPY_BOOL
 npy_type(::Type{Int8}) = NPY_BYTE
@@ -164,18 +146,9 @@ npy_type(::Type{ComplexF32}) = NPY_CFLOAT
 npy_type(::Type{ComplexF64}) = NPY_CDOUBLE
 npy_type(::Type{PyPtr}) = NPY_OBJECT
 
-const NPY_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr}
-
-# conversions from __array_interface__ type strings to supported Julia types
-const npy_typestrs = Dict( "b1"=>Bool,
-                           "i1"=>Int8,        "u1"=>UInt8,
-                           "i2"=>Int16,       "u2"=>UInt16,
-                           "i4"=>Int32,       "u4"=>UInt32,
-                           "i8"=>Int64,       "u8"=>UInt64,
-                           "f2"=>Float16,     "f4"=>Float32,
-                           "f8"=>Float64,     "c8"=>ComplexF32,
-                           "c16"=>ComplexF64, "O"=>PyPtr,
-                           "O$(div(Sys.WORD_SIZE,8))"=>PyPtr)
+# flags:
+const NPY_ARRAY_ALIGNED = Int32(0x0100)
+const NPY_ARRAY_WRITEABLE = Int32(0x0400)
 
 #########################################################################
 # no-copy conversion of Julia arrays to NumPy arrays.
@@ -185,7 +158,7 @@ const npy_typestrs = Dict( "b1"=>Bool,
 # dimensions. For example, although NumPy works with both row-major and
 # column-major data, some Python libraries like OpenCV seem to require
 # row-major data (the default in NumPy). In such cases, use PyReverseDims(array)
-function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES
+function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:PYARR_TYPES
     @npyinitialize
     size_a = revdims ? reverse(size(a)) : size(a)
     strides_a = revdims ? reverse(strides(a)) : strides(a)
@@ -199,7 +172,7 @@ function NpyArray(a::StridedArray{T}, revdims::Bool) where T<:NPY_TYPES
     return PyObject(p, a)
 end
 
-function PyObject(a::StridedArray{T}) where T<:NPY_TYPES
+function PyObject(a::StridedArray{T}) where T<:PYARR_TYPES
     try
         return NpyArray(a, false)
     catch
@@ -207,7 +180,7 @@ function PyObject(a::StridedArray{T}) where T<:NPY_TYPES
     end
 end
 
-PyReverseDims(a::StridedArray{T}) where {T<:NPY_TYPES} = NpyArray(a, true)
+PyReverseDims(a::StridedArray{T}) where {T<:PYARR_TYPES} = NpyArray(a, true)
 PyReverseDims(a::BitArray) = PyReverseDims(Array(a))
 
 """
@@ -222,283 +195,3 @@ libraries that expect row-major data.
 PyReverseDims(a::AbstractArray)
 
 #########################################################################
-# Extract shape and other information about a NumPy array.  We need
-# to call the Python interface to do this, since the equivalent information
-# in NumPy's C API is only available via macros (or parsing structs).
-# [ Hopefully, this will be improved in a future NumPy version. ]
-
-mutable struct PyArray_Info
-    T::Type
-    native::Bool # native byte order?
-    sz::Vector{Int}
-    st::Vector{Int} # strides, in multiples of bytes!
-    data::Ptr{Cvoid}
-    readonly::Bool
-
-    function PyArray_Info(a::PyObject)
-        ai = PyDict{AbstractString,PyObject}(a["__array_interface__"])
-        typestr = convert(AbstractString, ai["typestr"])
-        T = npy_typestrs[typestr[2:end]]
-        datatuple = convert(Tuple{Int,Bool}, ai["data"])
-        sz = convert(Vector{Int}, ai["shape"])
-        local st
-        try
-            st = isempty(sz) ? Int[] : convert(Vector{Int}, ai["strides"])
-        catch
-            # default is C-order contiguous
-            st = similar(sz)
-            st[end] = sizeof(T)
-            for i = length(sz)-1:-1:1
-                st[i] = st[i+1]*sz[i+1]
-            end
-        end
-        return new(T,
-                   (ENDIAN_BOM == 0x04030201 && typestr[1] == '<')
-                   || (ENDIAN_BOM == 0x01020304 && typestr[1] == '>')
-                   || typestr[1] == '|',
-                   sz, st,
-                   convert(Ptr{Cvoid}, datatuple[1]),
-                   datatuple[2])
-    end
-end
-
-aligned(i::PyArray_Info) = #  FIXME: also check pointer alignment?
-  all(m -> m == 0, mod.(i.st, sizeof(i.T))) # strides divisible by elsize
-
-# whether a contiguous array in column-major (Fortran, Julia) order
-function f_contiguous(T::Type, sz::Vector{Int}, st::Vector{Int})
-    if prod(sz) == 1
-        return true
-    end
-    if st[1] != sizeof(T)
-        return false
-    end
-    for j = 2:length(st)
-        if st[j] != st[j-1] * sz[j-1]
-            return false
-        end
-    end
-    return true
-end
-
-f_contiguous(i::PyArray_Info) = f_contiguous(i.T, i.sz, i.st)
-@static if VERSION >= v"0.7.0-DEV.4534" # julia#26369
-    c_contiguous(i::PyArray_Info) = f_contiguous(i.T, reverse(i.sz,dims=1), reverse(i.st,dims=1))
-else
-    c_contiguous(i::PyArray_Info) = f_contiguous(i.T, flipdim(i.sz,1), flipdim(i.st,1))
-end
-
-#########################################################################
-# PyArray: no-copy wrapper around NumPy ndarray
-#
-# Hopefully, in the future this can be a subclass of StridedArray (see
-# Julia issue #2345), which will allow it to be used with most Julia
-# functions, but that is not possible at the moment.  So, to use this
-# with Julia linalg functions etcetera a copy is still required.
-
-"""
-    PyArray(o::PyObject)
-
-This converts an `ndarray` object `o` to a PyArray.
-
-This implements a nocopy wrapper to a NumPy array (currently of only numeric types only).
-
-If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`.
-"""
-mutable struct PyArray{T,N} <: AbstractArray{T,N}
-    o::PyObject
-    info::PyArray_Info
-    dims::Dims
-    st::Vector{Int}
-    f_contig::Bool
-    c_contig::Bool
-    data::Ptr{T}
-
-    function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N}
-        if !aligned(info)
-            throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported"))
-        elseif !info.native
-            throw(ArgumentError("only native byte-order arrays are supported"))
-        elseif info.T != T
-            throw(ArgumentError("inconsistent type in PyArray constructor"))
-        elseif length(info.sz) != N || length(info.st) != N
-            throw(ArgumentError("inconsistent ndims in PyArray constructor"))
-        end
-        return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)),
-                        f_contiguous(info), c_contiguous(info),
-                        convert(Ptr{T}, info.data))
-    end
-end
-
-function PyArray(o::PyObject)
-    info = PyArray_Info(o)
-    return PyArray{info.T, length(info.sz)}(o, info)
-end
-
-size(a::PyArray) = a.dims
-ndims(a::PyArray{T,N}) where {T,N} = N
-
-similar(a::PyArray, T, dims::Dims) = Array{T}(undef, dims)
-
-function copy(a::PyArray{T,N}) where {T,N}
-    if N > 1 && a.c_contig # equivalent to f_contig with reversed dims
-        B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N))
-        return permutedims(B, (N:-1:1))
-    end
-    A = Array{T}(undef, a.dims)
-    if a.f_contig
-        ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a))
-        return A
-    else
-        return copyto!(A, a)
-    end
-end
-
-# TODO: need to do bounds-checking of these indices!
-
-getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data)
-getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1])
-
-getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} =
-  unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
-
-function getindex(a::PyArray, i::Integer)
-    if a.f_contig
-        return unsafe_load(a.data, i)
-    else
-        return a[ind2sub(a.dims, i)...]
-    end
-end
-
-function getindex(a::PyArray, is::Integer...)
-    index = 1
-    n = min(length(is),length(a.st))
-    for i = 1:n
-        index += (is[i]-1)*a.st[i]
-    end
-    for i = n+1:length(is)
-        if is[i] != 1
-            throw(BoundsError())
-        end
-    end
-    unsafe_load(a.data, index)
-end
-
-function writeok_assign(a::PyArray, v, i::Integer)
-    if a.info.readonly
-        throw(ArgumentError("read-only PyArray"))
-    else
-        unsafe_store!(a.data, v, i)
-    end
-    return a
-end
-
-setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1)
-setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1])
-
-setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} =
-  writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
-
-function setindex!(a::PyArray, v, i::Integer)
-    if a.f_contig
-        return writeok_assign(a, v, i)
-    else
-        return setindex!(a, v, ind2sub(a.dims, i)...)
-    end
-end
-
-function setindex!(a::PyArray, v, is::Integer...)
-    index = 1
-    n = min(length(is),length(a.st))
-    for i = 1:n
-        index += (is[i]-1)*a.st[i]
-    end
-    for i = n+1:length(is)
-        if is[i] != 1
-            throw(BoundsError())
-        end
-    end
-    writeok_assign(a, v, index)
-end
-
-stride(a::PyArray, i::Integer) = a.st[i]
-
-Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data
-
-pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i))
-
-function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T
-    offset = 0
-    for i = 1:length(is)
-        offset += (is[i]-1)*a.st[i]
-    end
-    return a.data + offset*sizeof(T)
-end
-
-summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ",
-                                          string(T), " PyArray")
-
-#########################################################################
-# PyArray <-> PyObject conversions
-
-PyObject(a::PyArray) = a.o
-
-convert(::Type{PyArray}, o::PyObject) = PyArray(o)
-
-function convert(::Type{Array{T, 1}}, o::PyObject) where T<:NPY_TYPES
-    try
-        copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info
-    catch
-        len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o)
-        A = Array{pyany_toany(T)}(undef, len)
-        py2array(T, A, o, 1, 1)
-    end
-end
-
-function convert(::Type{Array{T}}, o::PyObject) where T<:NPY_TYPES
-    try
-        info = PyArray_Info(o)
-        try
-            copy(PyArray{T, length(info.sz)}(o, info)) # will check T == info.T
-        catch
-            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
-        end
-    catch
-        py2array(T, o)
-    end
-end
-
-function convert(::Type{Array{T,N}}, o::PyObject) where {T<:NPY_TYPES,N}
-    try
-        info = PyArray_Info(o)
-        try
-            copy(PyArray{T,N}(o, info)) # will check T == info.T and N == length(info.sz)
-        catch
-            nd = length(info.sz)
-            if nd != N
-                throw(ArgumentError("cannot convert $(nd)d array to $(N)d"))
-            end
-            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
-        end
-    catch
-        A = py2array(T, o)
-        if ndims(A) != N
-            throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d"))
-        end
-        A
-    end
-end
-
-function convert(::Type{Array{PyObject}}, o::PyObject)
-    map(pyincref, convert(Array{PyPtr}, o))
-end
-
-function convert(::Type{Array{PyObject,1}}, o::PyObject)
-    map(pyincref, convert(Array{PyPtr, 1}, o))
-end
-
-function convert(::Type{Array{PyObject,N}}, o::PyObject) where N
-    map(pyincref, convert(Array{PyPtr, N}, o))
-end
-
-#########################################################################
diff --git a/src/pyarray.jl b/src/pyarray.jl
new file mode 100644
index 00000000..fb8559a6
--- /dev/null
+++ b/src/pyarray.jl
@@ -0,0 +1,338 @@
+#########################################################################
+# Extract shape and other information about arrays that support Python's
+# Buffer Interface/Protocol (PEP 3118)
+#########################################################################
+struct PyArray_Info{T,N}
+    native::Bool # native byte order?
+    sz::NTuple{N,Int}
+    st::NTuple{N,Int} # strides, in multiples of bytes!
+    data::Ptr{T}
+    readonly::Bool
+    pybuf::PyBuffer
+end
+
+function PyArray_Info(o::PyObject)
+    # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer
+    pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS)
+    T, native_byteorder = array_format(pybuf)
+    sz = size(pybuf)
+    strd = strides(pybuf)
+    length(strd) == 0 && (sz = ())
+    N = length(sz)
+    isreadonly = pybuf.buf.readonly==1
+    return PyArray_Info{T,N}(native_byteorder, sz, strd, pybuf.buf.buf, isreadonly, pybuf)
+end
+
+aligned(i::PyArray_Info{T,N}) where {T,N} = #  FIXME: also check pointer alignment?
+  all(m -> m == 0, mod.(i.st, sizeof(T))) # strides divisible by elsize
+
+eltype(i::PyArray_Info{T,N}) where {T,N} = T
+ndims(i::PyArray_Info{T,N})  where {T,N} = N
+
+function default_stride(sz::NTuple{N, Int}, ::Type{T}) where {T,N}
+    stv = Vector{Int}(N)
+    stv[end] = sizeof(T)
+    for i = N-1:-1:1
+        stv[i] = stv[i+1]*sz[i+1]
+    end
+    ntuple(i->stv[i], N)
+end
+
+# whether a contiguous array in column-major (Fortran, Julia) order
+function f_contiguous(T::Type, sz::NTuple{N,Int}, st::NTuple{N,Int}) where N
+    if prod(sz) == 1 || length(sz) == 1
+        # 0 or 1-dim arrays should default to f-contiguous in julia
+        return true
+    end
+    if st[1] != sizeof(T)
+        return false
+    end
+    for j = 2:N
+        if st[j] != st[j-1] * sz[j-1]
+            return false
+        end
+    end
+    return true
+end
+
+f_contiguous(T::Type, sz::NTuple{N1,Int}, st::NTuple{N2,Int}) where {N1,N2} =
+    error("stride and size are different lengths, size: $sz, strides: $sz")
+
+f_contiguous(i::PyArray_Info{T,N}) where {T,N} = f_contiguous(T, i.sz, i.st)
+c_contiguous(i::PyArray_Info{T,N}) where {T,N} =
+    f_contiguous(T, reverse(i.sz), reverse(i.st))
+
+
+#########################################################################
+# PyArray: no-copy wrapper around NumPy ndarray
+#
+# Hopefully, in the future this can be a subclass of StridedArray (see
+# Julia issue #2345), which will allow it to be used with most Julia
+# functions, but that is not possible at the moment.  So, to use this
+# with Julia linalg functions etcetera a copy is still required.
+
+"""
+    PyArray(o::PyObject)
+
+This converts an `ndarray` object `o` to a PyArray.
+
+This implements a nocopy wrapper to a NumPy array (currently of only numeric types only).
+
+If you are using `pycall` and the function returns an `ndarray`, you can use `PyArray` as the return type to directly receive a `PyArray`.
+"""
+mutable struct PyArray{T,N} <: AbstractArray{T,N}
+    o::PyObject
+    info::PyArray_Info
+    dims::Dims
+    st::NTuple{N,Int}
+    f_contig::Bool
+    c_contig::Bool
+    data::Ptr{T}
+
+    function PyArray{T,N}(o::PyObject, info::PyArray_Info) where {T,N}
+        if !aligned(info)
+            throw(ArgumentError("only NPY_ARRAY_ALIGNED arrays are supported"))
+        elseif !info.native
+            throw(ArgumentError("only native byte-order arrays are supported"))
+        elseif eltype(info) != T
+            throw(ArgumentError("inconsistent type in PyArray constructor"))
+        elseif length(info.sz) != N || length(info.st) != N
+            throw(ArgumentError("inconsistent ndims in PyArray constructor"))
+        end
+        return new{T,N}(o, info, tuple(info.sz...), div.(info.st, sizeof(T)),
+                        f_contiguous(info), c_contiguous(info),
+                        convert(Ptr{T}, info.data))
+    end
+end
+
+function PyArray(o::PyObject)
+    info = PyArray_Info(o)
+    return PyArray{eltype(info), length(info.sz)}(o, info)
+end
+
+size(a::PyArray) = a.dims
+ndims(a::PyArray{T,N}) where {T,N} = N
+
+similar(a::PyArray, ::Type{T}, dims::Dims) where {T} = Array{T}(undef, dims)
+
+"""
+Update the data ptr of the `a` to point to the buffer exposed by `o` through
+the Python buffer interface
+"""
+function setdata!(a::PyArray{T,N}, o::PyObject) where {T,N}
+    pybufinfo = a.info.pybuf
+    PyBuffer!(pybufinfo, o, PyBUF_ND_CONTIGUOUS)
+    dataptr = pybufinfo.buf.buf
+    a.data = reinterpret(Ptr{T}, dataptr)
+    a
+end
+
+function copy(a::PyArray{T,N}) where {T,N}
+    if N > 1 && a.c_contig # equivalent to f_contig with reversed dims
+        B = unsafe_wrap(Array, a.data, ntuple((n -> a.dims[N - n + 1]), N))
+        return permutedims(B, (N:-1:1))
+    end
+    A = Array{T}(undef, a.dims)
+    if a.f_contig
+        ccall(:memcpy, Cvoid, (Ptr{T}, Ptr{T}, Int), A, a, sizeof(T)*length(a))
+        return A
+    else
+        return copyto!(A, a)
+    end
+end
+
+# TODO: need to do bounds-checking of these indices!
+# TODO: need to GC root these `a`s to guard against the PyArray getting gc'd,
+# e.g. if it's a temporary in a function:
+# `two_rands() = pycall(np.rand, PyArray, 10)[1:2]`
+
+
+getindex(a::PyArray{T,0}) where {T} = unsafe_load(a.data)
+getindex(a::PyArray{T,1}, i::Integer) where {T} = unsafe_load(a.data, 1 + (i-1)*a.st[1])
+
+getindex(a::PyArray{T,2}, i::Integer, j::Integer) where {T} =
+  unsafe_load(a.data, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
+
+function getindex(a::PyArray, i::Integer)
+    if a.f_contig
+        return unsafe_load(a.data, i)
+    else
+        return a[ind2sub(a.dims, i)...]
+    end
+end
+
+function getindex(a::PyArray, is::Integer...)
+    index = 1
+    n = min(length(is),length(a.st))
+    for i = 1:n
+        index += (is[i]-1)*a.st[i]
+    end
+    for i = n+1:length(is)
+        if is[i] != 1
+            throw(BoundsError())
+        end
+    end
+    unsafe_load(a.data, index)
+end
+
+function writeok_assign(a::PyArray, v, i::Integer)
+    if a.info.readonly
+        throw(ArgumentError("read-only PyArray"))
+    else
+        unsafe_store!(a.data, v, i)
+    end
+    return a
+end
+
+setindex!(a::PyArray{T,0}, v) where {T} = writeok_assign(a, v, 1)
+setindex!(a::PyArray{T,1}, v, i::Integer) where {T} = writeok_assign(a, v, 1 + (i-1)*a.st[1])
+
+setindex!(a::PyArray{T,2}, v, i::Integer, j::Integer) where {T} =
+  writeok_assign(a, v, 1 + (i-1)*a.st[1] + (j-1)*a.st[2])
+
+function setindex!(a::PyArray, v, i::Integer)
+    if a.f_contig
+        return writeok_assign(a, v, i)
+    else
+        return setindex!(a, v, ind2sub(a.dims, i)...)
+    end
+end
+
+function setindex!(a::PyArray, v, is::Integer...)
+    index = 1
+    n = min(length(is),length(a.st))
+    for i = 1:n
+        index += (is[i]-1)*a.st[i]
+    end
+    for i = n+1:length(is)
+        if is[i] != 1
+            throw(BoundsError())
+        end
+    end
+    writeok_assign(a, v, index)
+end
+
+stride(a::PyArray, i::Integer) = a.st[i]
+
+Base.unsafe_convert(::Type{Ptr{T}}, a::PyArray{T}) where {T} = a.data
+
+pointer(a::PyArray, i::Int) = pointer(a, ind2sub(a.dims, i))
+
+function pointer(a::PyArray{T}, is::Tuple{Vararg{Int}}) where T
+    offset = 0
+    for i = 1:length(is)
+        offset += (is[i]-1)*a.st[i]
+    end
+    return a.data + offset*sizeof(T)
+end
+
+summary(a::PyArray{T}) where {T} = string(Base.dims2string(size(a)), " ",
+                                          string(T), " PyArray")
+
+#########################################################################
+# PyArray <-> PyObject conversions
+
+const PYARR_TYPES = Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64,ComplexF32,ComplexF64,PyPtr}
+
+PyObject(a::PyArray) = a.o
+
+convert(::Type{PyArray}, o::PyObject) = PyArray(o)
+
+function convert(::Type{Array{T, 1}}, o::PyObject) where T<:PYARR_TYPES
+    try
+        copy(PyArray{T, 1}(o, PyArray_Info(o))) # will check T and N vs. info
+    catch
+        len = @pycheckz ccall((@pysym :PySequence_Size), Int, (PyPtr,), o)
+        A = Array{pyany_toany(T)}(undef, len)
+        py2array(T, A, o, 1, 1)
+    end
+end
+
+function convert(::Type{Array{T}}, o::PyObject) where T<:PYARR_TYPES
+    try
+        info = PyArray_Info(o)
+        try
+            copy(PyArray{T, length(info.sz)}(o, info)) # will check T == eltype(info)
+        catch
+            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
+        end
+    catch
+        py2array(T, o)
+    end
+end
+
+function convert(::Type{Array{T,N}}, o::PyObject) where {T<:PYARR_TYPES,N}
+    try
+        info = PyArray_Info(o)
+        try
+            copy(PyArray{T,N}(o, info)) # will check T,N == eltype(info),ndims(info)
+        catch
+            nd = length(info.sz)
+            if nd != N
+                throw(ArgumentError("cannot convert $(nd)d array to $(N)d"))
+            end
+            return py2array(T, Array{pyany_toany(T)}(undef, info.sz...), o, 1, 1)
+        end
+    catch
+        A = py2array(T, o)
+        if ndims(A) != N
+            throw(ArgumentError("cannot convert $(ndims(A))d array to $(N)d"))
+        end
+        A
+    end
+end
+
+function convert(::Type{Array{PyObject}}, o::PyObject)
+    map(pyincref, convert(Array{PyPtr}, o))
+end
+
+function convert(::Type{Array{PyObject,1}}, o::PyObject)
+    map(pyincref, convert(Array{PyPtr, 1}, o))
+end
+
+function convert(::Type{Array{PyObject,N}}, o::PyObject) where N
+    map(pyincref, convert(Array{PyPtr, N}, o))
+end
+
+array_format(o::PyObject) = array_format(PyBuffer(o, PyBUF_ND_CONTIGUOUS))
+
+"""
+```
+NoCopyArray(o::PyObject)
+```
+Convert a Python array-like object, to a Julia `Array` or `PermutedDimsArray`
+without making a copy of the data.
+
+If the data is stored in row-major format
+(the default in Python/NumPy), then the returned array `nca` will be a
+`PermutedDimsArray` such that the arrays are indexed the same way in Julia and
+Python. i.e. `nca[idxs...] == o[idxs...]`
+
+If the data is stored in column-major format then a regular Julia `Array` will
+be returned.
+
+Warning: This function is only lightly tested, and should be considered
+experimental - it may cause segmentation faults on conversion or subsequent
+array access, or be subtly broken in other ways. Only dense/contiguous, native
+endian arrays that support the Python Buffer protocol are likely be converted
+correctly.
+"""
+function NoCopyArray(o::PyObject)
+    # n.b. the pydecref(::PyBuffer) finalizer handles releasing the PyBuffer
+    pybuf = PyBuffer(o, PyBUF_ND_CONTIGUOUS)
+    T, native_byteorder = array_format(pybuf)
+    !native_byteorder && throw(ArgumentError(
+      "Only native endian format supported, format string: '$(get_format_str(pybuf))'"))
+    T == Nothing && throw(ArgumentError(
+      "Array datatype '$(get_format_str(pybuf))' not supported"))
+    # TODO more checks on strides etc
+    sz = size(pybuf)
+    @static if VERSION >= v"0.7.0-DEV.3526" # julia#25647
+        arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, own=false)
+    else
+        arr = unsafe_wrap(Array, convert(Ptr{T}, pybuf.buf.buf), sz, false)
+    end
+    !f_contiguous(T, sz, strides(pybuf)) &&
+        (arr = PermutedDimsArray(reshape(arr, reverse(sz)), (pybuf.buf.ndim:-1:1)))
+    return arr
+end
diff --git a/src/pybuffer.jl b/src/pybuffer.jl
index 06295261..ea076e1f 100644
--- a/src/pybuffer.jl
+++ b/src/pybuffer.jl
@@ -29,7 +29,7 @@ end
 mutable struct PyBuffer
     buf::Py_buffer
     PyBuffer() = begin
-        b = new(Py_buffer(C_NULL, C_NULL, 0, 0,
+        b = new(Py_buffer(C_NULL, PyPtr_NULL, 0, 0,
                           0, 0, C_NULL, C_NULL, C_NULL, C_NULL,
                           C_NULL, C_NULL, C_NULL))
         @compat finalizer(pydecref, b)
@@ -37,6 +37,13 @@ mutable struct PyBuffer
     end
 end
 
+"""
+`pydecref(o::PyBuffer)`
+Release the reference to buffer `o`
+N.b. As per https://docs.python.org/3/c-api/buffer.html#c.PyBuffer_Release,
+It is an error to call this function on a PyBuffer that was not obtained via
+the python c-api function `PyObject_GetBuffer(), unless o.obj is a PyPtr(C_NULL)`
+"""
 function pydecref(o::PyBuffer)
     # note that PyBuffer_Release sets o.obj to NULL, and
     # is a no-op if o.obj is already NULL
@@ -86,6 +93,9 @@ function Base.stride(b::PyBuffer, d::Integer)
     return Int(unsafe_load(b.buf.strides, d))
 end
 
+# Strides in bytes
+Base.strides(b::PyBuffer) = ((stride(b,i) for i in 1:b.buf.ndim)...,)
+
 # TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
 iscontiguous(b::PyBuffer) =
     1 == ccall((@pysym :PyBuffer_IsContiguous), Cint,
@@ -93,7 +103,6 @@ iscontiguous(b::PyBuffer) =
 
 #############################################################################
 # pybuffer constant values from Include/object.h
-const PyBUF_MAX_NDIM = convert(Cint, 64)
 const PyBUF_SIMPLE    = convert(Cint, 0)
 const PyBUF_WRITABLE  = convert(Cint, 0x0001)
 const PyBUF_FORMAT    = convert(Cint, 0x0004)
@@ -103,16 +112,42 @@ const PyBUF_C_CONTIGUOUS   = convert(Cint, 0x0020) | PyBUF_STRIDES
 const PyBUF_F_CONTIGUOUS   = convert(Cint, 0x0040) | PyBUF_STRIDES
 const PyBUF_ANY_CONTIGUOUS = convert(Cint, 0x0080) | PyBUF_STRIDES
 const PyBUF_INDIRECT       = convert(Cint, 0x0100) | PyBUF_STRIDES
+const PyBUF_ND_CONTIGUOUS = Cint(PyBUF_WRITABLE | PyBUF_FORMAT | PyBUF_ND | PyBUF_STRIDES | PyBUF_ANY_CONTIGUOUS)
 
 # construct a PyBuffer from a PyObject, if possible
 function PyBuffer(o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
-    b = PyBuffer()
+    return PyBuffer!(PyBuffer(), o, flags)
+end
+
+function PyBuffer!(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_SIMPLE)
     # TODO change to `Ref{PyBuffer}` when 0.6 is dropped.
+    pydecref(b) # ensure b is properly released
     @pycheckz ccall((@pysym :PyObject_GetBuffer), Cint,
                      (PyPtr, Any, Cint), o, b, flags)
     return b
 end
 
+"""
+`isbuftype(b::PyBuffer, o::Union{PyObject,PyPtr}, flags=PyBUF_ND_CONTIGUOUS)`
+Returns true if the python object `o` supports the buffer protocol. False if not.
+"""
+function isbuftype(o::Union{PyObject,PyPtr})
+    # PyObject_CheckBuffer is defined in a header file here: https://github.com/python/cpython/blob/ef5ce884a41c8553a7eff66ebace908c1dcc1f89/Include/abstract.h#L510
+    # so we can't access it easily. It basically just checks if PyObject_GetBuffer exists
+    # So we'll just try call PyObject_GetBuffer and check for success/failure
+    b = PyBuffer()
+    ret = ccall((@pysym :PyObject_GetBuffer), Cint,
+                     (PyPtr, Any, Cint), o, b, PyBUF_ND_CONTIGUOUS)
+    if ret != 0
+        pyerr_clear()
+    else
+        # handle pointer types
+        T, native_byteorder = array_format(b)
+        T <: Ptr && (ret = 1)
+    end
+    return ret == 0
+end
+
 #############################################################################
 
 # recursive function to write buffer dimension by dimension, starting at
@@ -154,4 +189,69 @@ function Base.write(io::IO, b::PyBuffer)
     end
 end
 
+# ref: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966
+
+const standard_typestrs = Dict{String,DataType}(
+                           "?"=>Bool,        "P"=>Ptr{Cvoid},
+                           "b"=>Int8,        "B"=>UInt8,
+                           "h"=>Int16,       "H"=>UInt16,
+                           "i"=>Int32,       "I"=>UInt32,
+                           "l"=>Int32,       "L"=>UInt32,
+                           "q"=>Int64,       "Q"=>UInt64,
+                           "e"=>Float16,     "f"=>Float32,
+                           "d"=>Float64,     "g"=>Nothing, # Float128?
+                           # `Nothing` indicates no equiv Julia type
+                           "Z8"=>ComplexF32, "Z16"=>ComplexF64,
+                           "Zf"=>ComplexF32, "Zd"=>ComplexF64)
+
+const native_typestrs = Dict{String,DataType}(
+                           "?"=>Bool,        "P"=>Ptr{Cvoid},
+                           "b"=>Int8,        "B"=>UInt8,
+                           "h"=>Cshort,      "H"=>Cushort,
+                           "i"=>Cint,        "I"=>Cuint,
+                           "l"=>Clong,       "L"=>Culong,
+                           "q"=>Clonglong,   "Q"=>Culonglong,
+                           "e"=>Float16,     "f"=>Cfloat,
+                           "d"=>Cdouble,     "g"=>Nothing, # Float128?
+                           # `Nothing` indicates no equiv Julia type
+                           "Z8"=>ComplexF32, "Z16"=>ComplexF64,
+                           "Zf"=>ComplexF32, "Zd"=>ComplexF64)
+
+const typestrs_native =
+    Dict{DataType, String}(zip(values(native_typestrs), keys(native_typestrs)))
+
+get_format_str(pybuf::PyBuffer) = unsafe_string(convert(Ptr{UInt8}, pybuf.buf.format))
+
+function array_format(pybuf::PyBuffer)
+    # a NULL-terminated format-string ... indicating what is in each element of memory.
+    # TODO: handle more cases: https://www.python.org/dev/peps/pep-3118/#additions-to-the-struct-string-syntax
+    # refs: https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/src/multiarray/buffer.c#L966
+    #       https://github.com/numpy/numpy/blob/v1.14.2/numpy/core/_internal.py#L490
+    #       https://docs.python.org/2/library/struct.html#byte-order-size-and-alignment
+
+    # "NULL implies standard unsigned bytes ("B")" --pep 3118
+    pybuf.buf.format == C_NULL && return UInt8, true
+
+    fmt_str = get_format_str(pybuf)
+    native_byteorder = true
+    type_start_idx = 1
+    typestrs = standard_typestrs
+    if length(fmt_str) > 1
+        type_start_idx = 2
+        if fmt_str[1] == '='
+        elseif fmt_str[1] == '<'
+            native_byteorder = ENDIAN_BOM == 0x04030201
+        elseif fmt_str[1] == '>' || fmt_str =='!'
+            native_byteorder = ENDIAN_BOM == 0x01020304
+        elseif fmt_str[1] == '@' || fmt_str[1] == '^'
+            typestrs = native_typestrs
+        elseif fmt_str[1] == "Z"
+            type_start_idx = 1
+        else
+            error("Unsupported format string: \"$fmt_str\"")
+        end
+    end
+    typestrs[fmt_str[type_start_idx:end]], native_byteorder
+end
+
 #############################################################################
diff --git a/test/runtests.jl b/test/runtests.jl
index 9733cdef..25f34b5d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -272,11 +272,11 @@ const PyInt = pyversion < v"3" ? Int : Clonglong
         @test o[:real] == 1
     end
 
-    # []-based sequence access
-    let a1=[5,8,6], a2=rand(3,4), a3=rand(3,4,5), o1=PyObject(a1), o2=PyObject(a2), o3=PyObject(a3)
+    @testset "[]-based sequence access" begin
+        a1=[5,8,6]; a2=rand(3,4); a3=rand(3,4,5); o1=PyObject(a1); o2=PyObject(a2); o3=PyObject(a3)
         @test [o1[i] for i in eachindex(a1)] == a1
         @test [o1[end-(i-1)] for i in eachindex(a1)] == reverse(a1)
-        @test o2[1] == collect(a2[1,:])
+        @test all(o2[1] == collect(a2[1,:]))
         @test length(o1) == length(o2) == length(o3) == 3
         o1[end-1] = 7
         @test o1[2] == 7
@@ -286,7 +286,7 @@ const PyInt = pyversion < v"3" ? Int : Clonglong
         if PyCall.npy_initialized
             @test [o2[i,j] for i=1:3, j=1:4] == a2
             @test [o3[i,j,k] for i=1:3, j=1:4, k=1:5] == a3
-            @test o3[2,3] == collect(a3[2,3,:])
+            @test all(o3[2,3] == collect(a3[2,3,:]))
             o2[2,3] = 8
             @test o2[2,3] == 8
             o3[2,3,4] = 9
@@ -688,3 +688,4 @@ def try_call(f):
 end
 
 include("test_pyfncall.jl")
+include("testpybuffer.jl")
diff --git a/test/testpybuffer.jl b/test/testpybuffer.jl
new file mode 100644
index 00000000..8d735a05
--- /dev/null
+++ b/test/testpybuffer.jl
@@ -0,0 +1,114 @@
+using Compat.Test, PyCall, Compat
+using PyCall: f_contiguous, PyBUF_ND_CONTIGUOUS, array_format, npy_initialized,
+NoCopyArray, isbuftype, setdata!
+
+pyutf8(s::PyObject) = pycall(s["encode"], PyObject, "utf-8")
+pyutf8(s::String) = pyutf8(PyObject(s))
+
+@testset "PyBuffer" begin
+    @testset "String Buffers" begin
+        b = PyCall.PyBuffer(pyutf8("test string"))
+        @test ndims(b) == 1
+        @test (length(b),) == (length("test string"),) == (size(b, 1),) == size(b)
+        @test stride(b, 1) == 1
+        @test PyCall.iscontiguous(b) == true
+    end
+
+    if !npy_initialized
+        println("Skipping array related buffer tests since NumPy not available")
+    else
+        np = pyimport("numpy")
+        listpy = pybuiltin("list")
+        arrpyo(args...; kwargs...) =
+            pycall(np["array"], PyObject, args...; kwargs...)
+        listpyo(args...) = pycall(listpy, PyObject, args...)
+        pytestarray(sz::Int...; order="C") =
+            pycall(arrpyo(1.0:prod(sz), "d")["reshape"], PyObject, sz, order=order)
+
+        @testset "Non-native-endian" begin
+            wrong_endian_str = ENDIAN_BOM == 0x01020304 ? "<" : ">"
+            wrong_endian_arr =
+                pycall(np["ndarray"], PyObject, 2; buffer=UInt8[0,1,3,2],
+                                                   dtype=wrong_endian_str*"i2")
+            # Not supported, so throws
+            @test_throws ArgumentError NoCopyArray(wrong_endian_arr)
+            @test_throws ArgumentError PyArray(wrong_endian_arr)
+        end
+
+        @testset "NoCopyArray 1d" begin
+            ao = arrpyo(1.0:10.0, "d")
+            pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS)
+            T, native_byteorder = array_format(pybuf)
+            @test T == Float64
+            @test native_byteorder == true
+            @test size(pybuf) == (10,)
+            @test strides(pybuf) == (1,) .* sizeof(T)
+            nca = NoCopyArray(ao)
+            @test !(nca isa PermutedDimsArray)
+            @test nca isa Array
+            @test nca[3] == ao[3]
+            @test nca[4] == ao[4]
+        end
+
+        @testset "NoCopyArray 2d f-contig" begin
+            ao = arrpyo(reshape(1.0:12.0, (3,4)) |> collect, "d", order="F")
+            pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS)
+            T, native_byteorder = array_format(pybuf)
+            @test T == Float64
+            @test native_byteorder == true
+            @test size(pybuf) == (3,4)
+            @test strides(pybuf) == (1, 3) .* sizeof(T)
+            nca = NoCopyArray(ao)
+            @test !(nca isa PermutedDimsArray)
+            # @show typeof(nca) (nca isa Array)
+            @test nca isa Array
+            @test size(nca) == (3,4)
+            @test strides(nca) == (1,3)
+            @test nca[3,2] == ao[3,2]
+            @test nca[2,3] == ao[2,3]
+        end
+
+        @testset "NoCopyArray 3d c-contig" begin
+            ao = pytestarray(3,4,5)
+            pybuf = PyBuffer(ao, PyBUF_ND_CONTIGUOUS)
+            T, native_byteorder = array_format(pybuf)
+            @test T == Float64
+            @test native_byteorder == true
+            @test size(pybuf) == (3,4,5)
+            @test strides(pybuf) == (20,5,1) .* sizeof(T)
+            nca = NoCopyArray(ao)
+            @test nca isa PermutedDimsArray
+            @test !(nca isa Array)
+            @test size(nca) == (3,4,5)
+            @test strides(nca) == (20,5,1)
+            @test nca[2,3,4] == ao[2,3,4]
+            @test nca[3,2,4] == ao[3,2,4]
+        end
+
+        @testset "isbuftype" begin
+            @test isbuftype(PyObject(0)) == false
+            @test isbuftype(listpyo((1.0:10.0...,))) == false
+            @test isbuftype(arrpyo(1.0:10.0, "d")) == true
+            @test isbuftype(PyObject([1:10...])) == true
+        end
+
+        # TODO maybe move these to a test_pyarray.jl
+        @testset "setdata!" begin
+            ao1 = arrpyo(1.0:10.0, "d")
+            pyarr = convert(PyArray, ao1)
+            ao2 = arrpyo(11.0:20.0, "d")
+            setdata!(pyarr, ao2)
+            @test all(pyarr[1:10] .== 11.0:20.0)
+        end
+
+        @testset "similar on PyArray PyVec getindex" begin
+            jlarr1 = [1:10;]
+            jlarr2 = hcat([1:10;], [1:10;])
+            pyarr1 = pycall(np["array"], PyArray, jlarr1)
+            pyarr2 = pycall(np["array"], PyArray, jlarr2)
+            @test all(pyarr1[1:10]    .== jlarr1[1:10])
+            @test all(pyarr2[1:10, 2] .== jlarr2[1:10, 2])
+            @test all(pyarr2[1:10, 1:2] .== jlarr2)
+        end
+    end
+end