Update to Julia 0.6-1.0, drop support for 0.4-0.5 (#24)

* Update to Julia 0.6-1.0, drop support for 0.4-0.5 * Worry about UTF-16, UTF-32 * Comment formatting * Take unicode seriously
fhs · Aug 20, 2018 · 0bd3fbf · 0bd3fbf
1 parent 35f046e
commit 0bd3fbf
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 91 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,11 +1,18 @@
 language: julia
+
 os:
     - linux
     - osx
 julia:
-    - 0.4
-    - 0.5
     - 0.6
+    - 0.7
+    - 1.0
     - nightly
+
+matrix:
+  allow_failures:
+    - julia: 1.0
+    - julia: nightly
+
 notifications:
     email: false
diff --git a/REQUIRE b/REQUIRE
@@ -1,3 +1,3 @@
-julia 0.4
+julia 0.6
 ZipFile 0.3.0
-Compat 0.7.15
+Compat 1
diff --git a/src/NPZ.jl b/src/NPZ.jl
@@ -4,17 +4,25 @@ module NPZ
 # NPZ file format is described in
 # https://github.com/numpy/numpy/blob/v1.7.0/numpy/lib/format.py
 
-using ZipFile
-using Compat
+using ZipFile, Compat
 
-import Compat.String
+@static if VERSION >=  v"0.7.0-DEV.2575"
+    import Base.CodeUnits
+else
+    # CodeUnits not yet supported by Compat but not needed in julia 0.6...
+    # codeunits function in Compat returns uintX instead of codeunits
+    # therefore this 'stump' type should work
+    abstract type CodeUnits{U, S} end
+end
 
 export npzread, npzwrite
 
 const NPYMagic = UInt8[0x93, 'N', 'U', 'M', 'P', 'Y']
 const ZIPMagic = UInt8['P', 'K', 3, 4]
 const Version = UInt8[1, 0]
 
+const MaxMagicLen = maximum(length.([NPYMagic, ZIPMagic]))
+
 const TypeMaps = [
     ("b1", Bool),
     ("i1", Int8),
@@ -28,8 +36,8 @@ const TypeMaps = [
     ("f2", Float16),
     ("f4", Float32),
     ("f8", Float64),
-    ("c8", Complex64),
-    ("c16", Complex128),
+    ("c8", Complex{Float32}),
+    ("c16", Complex{Float64}),
 ]
 const Numpy2Julia = Dict{String, DataType}()
 for (s,t) in TypeMaps
@@ -58,65 +66,72 @@ end
 # be fixed by rehashing the Dict when the module is
 # loaded.
 
-readle(ios::IO, ::Type{UInt16}) = htol(read(ios, UInt16))
+readle(ios::IO, ::Type{T}) where T = ltoh(read(ios, T)) # ltoh is inverse of htol
 
-function writele(ios::IO, x::Vector{UInt8})
-    n = write(ios, x)
-    if n != length(x)
-        error("short write")
-    end
-    n
+function writecheck(io::IO, x::Any)
+    n = write(io, x) # returns size in bytes
+    n == sizeof(x) || error("short write") # sizeof is size in bytes
 end
 
-writele(ios::IO, x::String) = writele(ios, convert(Vector{UInt8}, x))
-writele(ios::IO, x::UInt16) = writele(ios, reinterpret(UInt8, [htol(x)]))
+# Endianness only pertains to multi-byte things
+writele(ios::IO, x::AbstractVector{UInt8}) = writecheck(ios, x)
+writele(ios::IO, x::AbstractVector{CodeUnits{UInt8, <:Any}}) = writecheck(ios, x)
+# codeunits returns vector of CodeUnits in 7+, uint in 6
+writele(ios::IO, x::AbstractString) = writele(ios, codeunits(x))
+
+writele(ios::IO, x::UInt16) = writecheck(ios, htol(x))
 
-function parsechar(s::String, c::Char)
-    if s[1] != c
-        error("parsing header failed: expected character '$c', found '$(s[1])'")
+function parsechar(s::AbstractString, c::Char)
+    firstchar = s[firstindex(s)]
+    if  firstchar != c
+        error("parsing header failed: expected character '$c', found '$firstchar'")
     end
-    s[2:end]
+    SubString(s, nextind(s, 1))
 end
 
-function parsestring(s::String)
+function parsestring(s::AbstractString)
     s = parsechar(s, '\'')
-    i = findfirst(s, '\'')
-    if i == 0
-        error("parsing header failed: malformed string")
-    end
-    s[1:i-1], s[i+1:end]
+    parts = split(s, '\'', limit = 2)
+    length(parts) != 2 && error("parsing header failed: malformed string")
+    parts[1], parts[2]
 end
 
-function parsebool(s::String)
-    if s[1:4] == "True"
-        return true, s[5:end]
-    elseif s[1:5] == "False"
-        return false, s[6:end]
+function parsebool(s::AbstractString)
+    if SubString(s, firstindex(s), thisind(s, 4)) == "True"
+        return true, SubString(s, nextind(s, 4))
+    elseif SubString(s, firstindex(s), thisind(s, 5)) == "False"
+        return false, SubString(s, nextind(s, 5))
     end
     error("parsing header failed: excepted True or False")
 end
 
-function parseinteger(s::String)
-    i = findfirst(c -> !isdigit(c), s)
-    n = parse(Int, s[1:i-1])
-    if s[i] == 'L'
-        i += 1
+function parseinteger(s::AbstractString)
+    isdigit(s[firstindex(s)]) || error("parsing header failed: no digits")
+    tail_idx = findfirst(c -> !isdigit(c), s)
+    if tail_idx == nothing
+        intstr = SubString(s, firstindex(s))
+    else
+        intstr = SubString(s, firstindex(s), prevind(s, tail_idx))
+        if s[tail_idx] == 'L' # output of firstindex should be a valid code point
+            tail_idx = nextind(s, i)
+        end
     end
-    n, s[i:end]
+    n = parse(Int, intstr)
+    return n, SubString(s, tail_idx)
 end
 
-function parsetuple(s::String)
+function parsetuple(s::AbstractString)
     s = parsechar(s, '(')
     tup = Int[]
     while true
         s = strip(s)
-        if s[1] == ')'
+        if s[firstindex(s)] == ')'
             break
         end
         n, s = parseinteger(s)
-        tup = [tup; n]
+        push!(tup, n)
         s = strip(s)
-        if s[1] == ')'
+        if s[firstindex(s)] == ')'
             break
         end
         s = parsechar(s, ',')
@@ -125,9 +140,10 @@ function parsetuple(s::String)
     tup, s
 end
 
-function parsedtype(s::String)
+function parsedtype(s::AbstractString)
     dtype, s = parsestring(s)
-    c, t = dtype[1], dtype[2:end]
+    c = dtype[firstindex(s)]
+    t = SubString(dtype, nextind(s, 1))
     if c == '<'
         toh = ltoh
     elseif c == '>'
@@ -143,16 +159,16 @@ function parsedtype(s::String)
     (toh, Numpy2Julia[t]), s
 end
 
-type Header
-    descr :: @compat(Tuple{Function, DataType})
-    fortran_order :: Bool
-    shape :: Vector{Int}
+struct Header
+    descr::Tuple{Function, DataType}
+    fortran_order::Bool
+    shape::Vector{Int}
 end
 
-function parseheader(s::String)
+function parseheader(s::AbstractString)
     s = parsechar(s, '{')
 
-    dict = @compat Dict{String,Any}()
+    dict = Dict{String,Any}()
     for _ in 1:3
         s = strip(s)
         key, s = parsestring(s)
@@ -169,7 +185,7 @@ function parseheader(s::String)
             error("parsing header failed: bad dictionary key")
         end
         s = strip(s)
-        if s[1] == '}'
+        if s[firstindex(s)] == '}'
             break
         end
         s = parsechar(s, ',')
@@ -184,42 +200,47 @@ function parseheader(s::String)
 end
 
 function npzreadarray(f::IO)
-    b = read(f, UInt8, length(NPYMagic))
+    @compat b = read!(f, Vector{UInt8}(undef, length(NPYMagic)))
     if b != NPYMagic
         error("not a numpy array file")
     end
-    b = read(f, UInt8, length(Version))
+    @compat b = read!(f, Vector{UInt8}(undef, length(Version)))
     if b != Version
         error("unsupported NPZ version")
     end
     hdrlen = readle(f, UInt16)
-    hdr = ascii(String(read(f, UInt8, hdrlen)))
+    @compat hdr = ascii(String(read!(f, Vector{UInt8}(undef, hdrlen))))
     hdr = parseheader(strip(hdr))
 
     toh, typ = hdr.descr
     if hdr.fortran_order
-        x = map(toh, read(f, typ, hdr.shape...))
+        @compat x = map(toh, read!(f, Array{typ}(undef, hdr.shape...)))
     else
-        x = map(toh, read(f, typ, reverse(hdr.shape)...))
+        @compat x = map(toh, read!(f, Array{typ}(undef, reverse(hdr.shape)...)))
         if ndims(x) > 1
             x = permutedims(x, collect(ndims(x):-1:1))
         end
     end
-    x
+    x isa Array{<:Any, 0} ? x[1] : x
+end
+
+function samestart(a::AbstractVector, b::AbstractVector)
+    nb = length(b)
+    length(a) >= nb && view(a, 1:nb) == b
 end
 
 function npzread(filename::AbstractString)
     # Detect if the file is a numpy npy array file or a npz/zip file.
     f = open(filename)
-    b = read(f, UInt8, max(length(NPYMagic), length(ZIPMagic)))
-    if startswith(b, ZIPMagic)
+    @compat b = read!(f, Vector{UInt8}(undef, MaxMagicLen))
+    if samestart(b, ZIPMagic)
         close(f)
         f = ZipFile.Reader(filename)
         data = npzread(f)
         close(f)
         return data
     end
-    if startswith(b, NPYMagic)
+    if samestart(b, NPYMagic)
         seekstart(f)
         data = npzreadarray(f)
         close(f)
@@ -240,7 +261,9 @@ function npzread(dir::ZipFile.Reader)
     vars
 end
 
-function npzwritearray(f::IO, x::Array{UInt8}, T::DataType, shape::Vector{Int})
+function npzwritearray(
+    f::IO, x::AbstractArray{UInt8}, T::DataType, shape::Vector{Int}
+)
     if !haskey(Julia2Numpy, T)
         error("unsupported type $T")
     end
@@ -258,28 +281,29 @@ function npzwritearray(f::IO, x::Array{UInt8}, T::DataType, shape::Vector{Int})
         dict *= " "^(pad-1) * "\n"
     end
 
-    writele(f, @compat UInt16(length(dict)))
+    writele(f, UInt16(length(dict)))
     writele(f, dict)
     if write(f, x) != length(x)
         error("short write")
     end
 end
 
-function npzwritearray{T}(f::IO, x::Array{T})
+function npzwritearray(f::IO, x::AbstractArray{T}) where T
     npzwritearray(f, reinterpret(UInt8, x[:]), T, [i for i in size(x)])
 end
 
-function npzwritearray{T<:Number}(f::IO, x::T)
+function npzwritearray(f::IO, x::T) where T<:Number
     npzwritearray(f, reinterpret(UInt8, [x]), T, Int[])
 end
-
 function npzwrite(filename::AbstractString, x)
     f = open(filename, "w")
     npzwritearray(f, x)
     close(f)
 end
 
-function npzwrite{S<:AbstractString}(filename::AbstractString, vars::Dict{S,Any})
+function npzwrite(
+    filename::AbstractString, vars::Dict{S,Any}
+) where S<:AbstractString
     dir = ZipFile.Writer(filename)
     for (k, v) in vars
         f = ZipFile.addfile(dir, k * ".npy")