Deprecate readbytes!()

- There were only a handfull of uses of readbytes!() in Base, mostly in implementations of other io functions. - Most existing read!() methods were already resizing the result array so returning byte count from readbytes!() was not that useful. - added eachblock() to deal with countlines() usecase in datafmt.jl. (eachblock() is an iterator like eachline())
samoconnor · Jan 15, 2016 · e757e5c · kmsquire · Jan 15, 2016 · kmsquire
1 parent fd6b27f
commit e757e5c
Show file tree

Hide file tree

Showing 11 changed files with 84 additions and 70 deletions.
diff --git a/base/datafmt.jl b/base/datafmt.jl
@@ -18,11 +18,9 @@ const offs_chunk_size = 5000
 countlines(f::AbstractString, eol::Char='\n') = open(io->countlines(io,eol), f)::Int
 function countlines(io::IO, eol::Char='\n')
     isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
-    a = Array(UInt8, 8192)
     nl = 0
-    while !eof(io)
-        nb = readbytes!(io, a)
-        @simd for i=1:nb
+    for a in eachblock(io)
+        @simd for i=1:length(a)
             @inbounds nl += a[i] == eol
         end
     end

diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -964,3 +964,9 @@ end
 #https://github.com/JuliaLang/julia/issues/14608
 @deprecate readall readstring
 @deprecate readbytes read
+
+export readbytes!
+@noinline function readbytes!(io, a, n=length(a))
+    depwarn("readbytes! is deprecated, use read! instead", :readbytes!)
+    return length(read!(io, a, n))
+end
diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl
@@ -2462,10 +2462,18 @@ poll_file
 """
     eachline(stream or filename)
 
-Create an iterable object that will yield each line.
+Iterable that yields each line.
 """
 eachline
 
+"""
+    eachblock(stream or filename, [blocksize])
+
+
+Iterable that yields each block as `AbstractArray{UInt8}`
+"""
+eachblock
+
 """
     isposdef!(A) -> Bool
 
@@ -6430,14 +6438,13 @@ Compute the inverse secant of `x`, where the output is in degrees.
 asecd
 
 """
-    readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
+    read!(stream, b::Vector{UInt8}, nb=length(b); all=true)
 
-Read at most `nb` bytes from the stream into `b`, returning the number of bytes read
-(increasing the size of `b` as needed).
+Read at most `nb` bytes from the stream into `b`, resizing `b` to match the number of bytes read.
 
 See `read` for a description of the `all` option.
 """
-readbytes!
+read!
 
 """
     basename(path::AbstractString) -> AbstractString

diff --git a/base/exports.jl b/base/exports.jl
@@ -1132,6 +1132,7 @@ export
     connect,
     countlines,
     deserialize,
+    eachblock,
     eachline,
     eof,
     fd,
@@ -1167,7 +1168,6 @@ export
     read!,
     readstring,
     readavailable,
-    readbytes!,
     readchomp,
     readcsv,
     readdir,

diff --git a/base/filesystem.jl b/base/filesystem.jl
@@ -151,27 +151,22 @@ function read(f::File, ::Type{UInt8})
     return ret % UInt8
 end
 
-function read!(f::File, a::Vector{UInt8}, nel=length(a))
+function read!(f::File, a::Vector{UInt8})
     check_open(f)
-    if nel < 0 || nel > length(a)
-        throw(BoundsError())
-    end
     ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t),
-                f.handle, a, nel)
+                f.handle, a, length(a))
     uv_error("read",ret)
     return a
 end
 
 nb_available(f::File) = filesize(f) - position(f)
 
-function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
+function read!(f::File, b::Vector{UInt8}, nb=length(b))
     nr = min(nb, nb_available(f))
-    if length(b) < nr
-        resize!(b, nr)
-    end
-    read!(f, b, nr)
-    return nr
+    resize!(b, nr)
+    read!(f, b)
 end
+
 read(io::File) = read!(io, Array(UInt8, nb_available(io)))
 read(io::File, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))
 

diff --git a/base/io.jl b/base/io.jl
@@ -307,7 +307,7 @@ readline(s::IO) = readuntil(s, '\n')
 readchomp(x) = chomp!(readstring(x))
 
 # read up to nb bytes into nb, returning # bytes read
-function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
+function read!(s::IO, b::Vector{UInt8}, nb=length(b))
     olb = lb = length(b)
     nr = 0
     while nr < nb && !eof(s)
@@ -322,16 +322,15 @@ function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
     if lb > olb
         resize!(b, nr) # shrink to just contain input data if was resized
     end
-    return nr
+    return b
 end
 
 # read up to nb bytes from s, returning a Vector{UInt8} of bytes read.
 function read(s::IO, nb=typemax(Int))
-    # Let readbytes! grow the array progressively by default
+    # Let read! grow the array progressively by default
     # instead of taking of risk of over-allocating
     b = Array(UInt8, nb == typemax(Int) ? 1024 : nb)
-    nr = readbytes!(s, b, nb)
-    resize!(b, nr)
+    read!(s, b, nb)
 end
 
 function readstring(s::IO)
@@ -341,27 +340,39 @@ end
 
 ## high-level iterator interfaces ##
 
-type EachLine
+type EachChunk{T}
     stream::IO
+    f::Function
     ondone::Function
-    EachLine(stream) = EachLine(stream, ()->nothing)
-    EachLine(stream, ondone) = new(stream, ondone)
+    EachChunk(stream, f) = EachChunk{T}(stream, f, ()->nothing)
+    EachChunk(stream, f, ondone) = new(stream, f, ondone)
 end
-eachline(stream::IO) = EachLine(stream)
-eachline(filename::AbstractString) = EachLine(open(filename), close)
+eachline(stream::IO) = EachChunk{ByteString}(stream, readline)
+function eachline(filename::AbstractString)
+    io = open(filename)
+    EachChunk{ByteString}(io, readline, ()->close(io))
+end
+
+start{T}(::EachChunk{T}) = nothing
+done{T}(itr::EachChunk{T}, nada) = eof(itr.stream) ? (itr.ondone(); true) : false
+next{T}(itr::EachChunk{T}, nada) = (itr.f(itr.stream), nothing)
+eltype{T}(::Type{EachChunk{T}}) = T
+
+readlines(s=STDIN) = collect(eachline(s))
 
-start(itr::EachLine) = nothing
-function done(itr::EachLine, nada)
-    if !eof(itr.stream)
-        return false
+function eachblock(stream::IO, blocksize=0, ondone=()->nothing)
+    if blocksize == 0
+        blocksize = 8192
     end
-    itr.ondone()
-    true
+    a = Array(UInt8, blocksize)
+    EachChunk{Vector{UInt8}}(stream, io->read!(io, a), ondone)
+end
+
+function eachblock(filename::AbstractString, blocksize=0)
+    io=open(filename)
+    eachblock(io, blocksize, ()->close(io))
 end
-next(itr::EachLine, nada) = (readline(itr.stream), nothing)
-eltype(::Type{EachLine}) = ByteString
 
-readlines(s=STDIN) = collect(eachline(s))
 
 # IOStream Marking
 

diff --git a/base/iobuffer.jl b/base/iobuffer.jl
@@ -328,13 +328,12 @@ function write(to::AbstractIOBuffer, a::UInt8)
     sizeof(UInt8)
 end
 
-function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b))
+function read!(io::AbstractIOBuffer, b::Vector{UInt8}, nb=length(b))
     nr = min(nb, nb_available(io))
     if length(b) < nr
         resize!(b, nr)
     end
     read_sub(io, b, 1, nr)
-    return nr
 end
 read(io::AbstractIOBuffer) = read!(io, Array(UInt8, nb_available(io)))
 read(io::AbstractIOBuffer, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))

diff --git a/base/iostream.jl b/base/iostream.jl
@@ -218,10 +218,7 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
                         s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
         eof(s) && break
     end
-    if lb > olb && lb > nr
-        resize!(b, nr) # shrink to just contain input data if was resized
-    end
-    return nr
+    resize!(b, nr)
 end
 
 function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
@@ -231,13 +228,10 @@ function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
     end
     nr = Int(ccall(:ios_read, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t),
                    s.ios, pointer(b), nb))
-    if lb > olb && lb > nr
-        resize!(b, nr)
-    end
-    return nr
+    resize!(b, nr)
 end
 
-function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
+function read!(s::IOStream, b::Vector{UInt8}, nb=length(b); all::Bool=true)
     return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
 end
 
@@ -251,14 +245,11 @@ function read(s::IOStream)
         end
     end
     b = Array(UInt8, sz<=0 ? 1024 : sz)
-    nr = readbytes_all!(s, b, typemax(Int))
-    resize!(b, nr)
+    readbytes_all!(s, b, typemax(Int))
 end
 
 function read(s::IOStream, nb::Integer; all::Bool=true)
-    b = Array(UInt8, nb)
-    nr = readbytes!(s, b, nb, all=all)
-    resize!(b, nr)
+    read!(s, Array(UInt8, nb), nb, all)
 end
 
 ## Character streams ##

diff --git a/base/stream.jl b/base/stream.jl
@@ -892,20 +892,12 @@ function stop_reading(stream::LibuvStream)
     end
 end
 
-function readbytes!(s::LibuvStream, b::AbstractArray{UInt8}, nb=length(b))
-    wait_readnb(s, nb)
-    nr = nb_available(s)
-    resize!(b, nr) # shrink to just contain input data if was resized
-    read!(s.buffer, b)
-    return nr
-end
-
 function read(stream::LibuvStream)
     wait_readnb(stream, typemax(Int))
     return takebuf_array(stream.buffer)
 end
 
-function read!(s::LibuvStream, a::Array{UInt8, 1})
+function read!(s::LibuvStream, a::Vector{UInt8})
     nb = length(a)
     sbuf = s.buffer
     @assert sbuf.seekable == false
@@ -936,6 +928,13 @@ function read!(s::LibuvStream, a::Array{UInt8, 1})
     return a
 end
 
+function read!(s::LibuvStream, b::Vector{UInt8}, nb=length(b))
+    wait_readnb(s, nb)
+    nr = nb_available(s)
+    resize!(b, nr)
+    read!(s.buffer, b)
+end
+
 function read(this::LibuvStream, ::Type{UInt8})
     wait_readnb(this, 1)
     buf = this.buffer

diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst
@@ -152,11 +152,11 @@ General I/O
 
    Read binary data from a stream, filling in the argument ``array``\ .
 
-.. function:: readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
+.. function:: read!(stream, b::Vector{UInt8}, nb=length(b); all=true)
 
    .. Docstring generated from Julia source
 
-   Read at most ``nb`` bytes from the stream into ``b``\ , returning the number of bytes read (increasing the size of ``b`` as needed).
+   Read at most ``nb`` bytes from the stream into ``b``\ , resizing ``b`` to match the number of bytes read.
 
    See ``read`` for a description of the ``all`` option.
 
@@ -521,7 +521,13 @@ Text I/O
 
    .. Docstring generated from Julia source
 
-   Create an iterable object that will yield each line.
+   Iterable that yields each line.
+
+.. function:: eachblock(stream or filename, [blocksize])
+
+   .. Docstring generated from Julia source
+
+   Iterable that yields each block as `AbstractArray{UInt8}`
 
 .. function:: readdlm(source, delim::Char, T::Type, eol::Char; header=false, skipstart=0, skipblanks=true, use_mmap, ignore_invalid_chars=false, quotes=true, dims, comments=true, comment_char='#')
 

diff --git a/test/file.jl b/test/file.jl
@@ -1096,9 +1096,11 @@ let s = "qwerty"
 
     # Test growing output array
     x = UInt8[]
-    n = readbytes!(IOBuffer(s), x, 10)
+    a = read!(IOBuffer(s), x, 10)
     @test x == s.data
-    @test n == length(x)
+    @test a == s.data
+    @test length(a) == length(x)
+    @test length(s) == length(x)
 end
 
 # DevNull