Skip to content

Commit

Permalink
Update to Julia 0.6-1.0, drop support for 0.4-0.5 (#24)
Browse files Browse the repository at this point in the history
* Update to Julia 0.6-1.0, drop support for 0.4-0.5

* Worry about UTF-16, UTF-32

* Comment formatting

* Take unicode seriously
  • Loading branch information
galenlynch authored and fhs committed Aug 20, 2018
1 parent 35f046e commit 0bd3fbf
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 91 deletions.
11 changes: 9 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
language: julia

os:
- linux
- osx
julia:
- 0.4
- 0.5
- 0.6
- 0.7
- 1.0
- nightly

matrix:
allow_failures:
- julia: 1.0
- julia: nightly

notifications:
email: false
4 changes: 2 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
julia 0.4
julia 0.6
ZipFile 0.3.0
Compat 0.7.15
Compat 1
150 changes: 87 additions & 63 deletions src/NPZ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,25 @@ module NPZ
# NPZ file format is described in
# https://github.com/numpy/numpy/blob/v1.7.0/numpy/lib/format.py

using ZipFile
using Compat
using ZipFile, Compat

import Compat.String
@static if VERSION >= v"0.7.0-DEV.2575"
import Base.CodeUnits
else
# CodeUnits not yet supported by Compat but not needed in julia 0.6...
# codeunits function in Compat returns uintX instead of codeunits
# therefore this 'stump' type should work
abstract type CodeUnits{U, S} end
end

export npzread, npzwrite

const NPYMagic = UInt8[0x93, 'N', 'U', 'M', 'P', 'Y']
const ZIPMagic = UInt8['P', 'K', 3, 4]
const Version = UInt8[1, 0]

const MaxMagicLen = maximum(length.([NPYMagic, ZIPMagic]))

const TypeMaps = [
("b1", Bool),
("i1", Int8),
Expand All @@ -28,8 +36,8 @@ const TypeMaps = [
("f2", Float16),
("f4", Float32),
("f8", Float64),
("c8", Complex64),
("c16", Complex128),
("c8", Complex{Float32}),
("c16", Complex{Float64}),
]
const Numpy2Julia = Dict{String, DataType}()
for (s,t) in TypeMaps
Expand Down Expand Up @@ -58,65 +66,72 @@ end
# be fixed by rehashing the Dict when the module is
# loaded.

readle(ios::IO, ::Type{UInt16}) = htol(read(ios, UInt16))
readle(ios::IO, ::Type{T}) where T = ltoh(read(ios, T)) # ltoh is inverse of htol

function writele(ios::IO, x::Vector{UInt8})
n = write(ios, x)
if n != length(x)
error("short write")
end
n
function writecheck(io::IO, x::Any)
n = write(io, x) # returns size in bytes
n == sizeof(x) || error("short write") # sizeof is size in bytes
end

writele(ios::IO, x::String) = writele(ios, convert(Vector{UInt8}, x))
writele(ios::IO, x::UInt16) = writele(ios, reinterpret(UInt8, [htol(x)]))
# Endianness only pertains to multi-byte things
writele(ios::IO, x::AbstractVector{UInt8}) = writecheck(ios, x)
writele(ios::IO, x::AbstractVector{CodeUnits{UInt8, <:Any}}) = writecheck(ios, x)
# codeunits returns vector of CodeUnits in 7+, uint in 6
writele(ios::IO, x::AbstractString) = writele(ios, codeunits(x))

writele(ios::IO, x::UInt16) = writecheck(ios, htol(x))

function parsechar(s::String, c::Char)
if s[1] != c
error("parsing header failed: expected character '$c', found '$(s[1])'")
function parsechar(s::AbstractString, c::Char)
firstchar = s[firstindex(s)]
if firstchar != c
error("parsing header failed: expected character '$c', found '$firstchar'")
end
s[2:end]
SubString(s, nextind(s, 1))
end

function parsestring(s::String)
function parsestring(s::AbstractString)
s = parsechar(s, '\'')
i = findfirst(s, '\'')
if i == 0
error("parsing header failed: malformed string")
end
s[1:i-1], s[i+1:end]
parts = split(s, '\'', limit = 2)
length(parts) != 2 && error("parsing header failed: malformed string")
parts[1], parts[2]
end

function parsebool(s::String)
if s[1:4] == "True"
return true, s[5:end]
elseif s[1:5] == "False"
return false, s[6:end]
function parsebool(s::AbstractString)
if SubString(s, firstindex(s), thisind(s, 4)) == "True"
return true, SubString(s, nextind(s, 4))
elseif SubString(s, firstindex(s), thisind(s, 5)) == "False"
return false, SubString(s, nextind(s, 5))
end
error("parsing header failed: excepted True or False")
end

function parseinteger(s::String)
i = findfirst(c -> !isdigit(c), s)
n = parse(Int, s[1:i-1])
if s[i] == 'L'
i += 1
function parseinteger(s::AbstractString)
isdigit(s[firstindex(s)]) || error("parsing header failed: no digits")
tail_idx = findfirst(c -> !isdigit(c), s)
if tail_idx == nothing
intstr = SubString(s, firstindex(s))
else
intstr = SubString(s, firstindex(s), prevind(s, tail_idx))
if s[tail_idx] == 'L' # output of firstindex should be a valid code point
tail_idx = nextind(s, i)
end
end
n, s[i:end]
n = parse(Int, intstr)
return n, SubString(s, tail_idx)
end

function parsetuple(s::String)
function parsetuple(s::AbstractString)
s = parsechar(s, '(')
tup = Int[]
while true
s = strip(s)
if s[1] == ')'
if s[firstindex(s)] == ')'
break
end
n, s = parseinteger(s)
tup = [tup; n]
push!(tup, n)
s = strip(s)
if s[1] == ')'
if s[firstindex(s)] == ')'
break
end
s = parsechar(s, ',')
Expand All @@ -125,9 +140,10 @@ function parsetuple(s::String)
tup, s
end

function parsedtype(s::String)
function parsedtype(s::AbstractString)
dtype, s = parsestring(s)
c, t = dtype[1], dtype[2:end]
c = dtype[firstindex(s)]
t = SubString(dtype, nextind(s, 1))
if c == '<'
toh = ltoh
elseif c == '>'
Expand All @@ -143,16 +159,16 @@ function parsedtype(s::String)
(toh, Numpy2Julia[t]), s
end

type Header
descr :: @compat(Tuple{Function, DataType})
fortran_order :: Bool
shape :: Vector{Int}
struct Header
descr::Tuple{Function, DataType}
fortran_order::Bool
shape::Vector{Int}
end

function parseheader(s::String)
function parseheader(s::AbstractString)
s = parsechar(s, '{')

dict = @compat Dict{String,Any}()
dict = Dict{String,Any}()
for _ in 1:3
s = strip(s)
key, s = parsestring(s)
Expand All @@ -169,7 +185,7 @@ function parseheader(s::String)
error("parsing header failed: bad dictionary key")
end
s = strip(s)
if s[1] == '}'
if s[firstindex(s)] == '}'
break
end
s = parsechar(s, ',')
Expand All @@ -184,42 +200,47 @@ function parseheader(s::String)
end

function npzreadarray(f::IO)
b = read(f, UInt8, length(NPYMagic))
@compat b = read!(f, Vector{UInt8}(undef, length(NPYMagic)))
if b != NPYMagic
error("not a numpy array file")
end
b = read(f, UInt8, length(Version))
@compat b = read!(f, Vector{UInt8}(undef, length(Version)))
if b != Version
error("unsupported NPZ version")
end
hdrlen = readle(f, UInt16)
hdr = ascii(String(read(f, UInt8, hdrlen)))
@compat hdr = ascii(String(read!(f, Vector{UInt8}(undef, hdrlen))))
hdr = parseheader(strip(hdr))

toh, typ = hdr.descr
if hdr.fortran_order
x = map(toh, read(f, typ, hdr.shape...))
@compat x = map(toh, read!(f, Array{typ}(undef, hdr.shape...)))
else
x = map(toh, read(f, typ, reverse(hdr.shape)...))
@compat x = map(toh, read!(f, Array{typ}(undef, reverse(hdr.shape)...)))
if ndims(x) > 1
x = permutedims(x, collect(ndims(x):-1:1))
end
end
x
x isa Array{<:Any, 0} ? x[1] : x
end

function samestart(a::AbstractVector, b::AbstractVector)
nb = length(b)
length(a) >= nb && view(a, 1:nb) == b
end

function npzread(filename::AbstractString)
# Detect if the file is a numpy npy array file or a npz/zip file.
f = open(filename)
b = read(f, UInt8, max(length(NPYMagic), length(ZIPMagic)))
if startswith(b, ZIPMagic)
@compat b = read!(f, Vector{UInt8}(undef, MaxMagicLen))
if samestart(b, ZIPMagic)
close(f)
f = ZipFile.Reader(filename)
data = npzread(f)
close(f)
return data
end
if startswith(b, NPYMagic)
if samestart(b, NPYMagic)
seekstart(f)
data = npzreadarray(f)
close(f)
Expand All @@ -240,7 +261,9 @@ function npzread(dir::ZipFile.Reader)
vars
end

function npzwritearray(f::IO, x::Array{UInt8}, T::DataType, shape::Vector{Int})
function npzwritearray(
f::IO, x::AbstractArray{UInt8}, T::DataType, shape::Vector{Int}
)
if !haskey(Julia2Numpy, T)
error("unsupported type $T")
end
Expand All @@ -258,28 +281,29 @@ function npzwritearray(f::IO, x::Array{UInt8}, T::DataType, shape::Vector{Int})
dict *= " "^(pad-1) * "\n"
end

writele(f, @compat UInt16(length(dict)))
writele(f, UInt16(length(dict)))
writele(f, dict)
if write(f, x) != length(x)
error("short write")
end
end

function npzwritearray{T}(f::IO, x::Array{T})
function npzwritearray(f::IO, x::AbstractArray{T}) where T
npzwritearray(f, reinterpret(UInt8, x[:]), T, [i for i in size(x)])
end

function npzwritearray{T<:Number}(f::IO, x::T)
function npzwritearray(f::IO, x::T) where T<:Number
npzwritearray(f, reinterpret(UInt8, [x]), T, Int[])
end

function npzwrite(filename::AbstractString, x)
f = open(filename, "w")
npzwritearray(f, x)
close(f)
end

function npzwrite{S<:AbstractString}(filename::AbstractString, vars::Dict{S,Any})
function npzwrite(
filename::AbstractString, vars::Dict{S,Any}
) where S<:AbstractString
dir = ZipFile.Writer(filename)
for (k, v) in vars
f = ZipFile.addfile(dir, k * ".npy")
Expand Down
Loading

0 comments on commit 0bd3fbf

Please sign in to comment.