From e757e5c3bc2d7caeefe9a446b35a82fc2e9fa72b Mon Sep 17 00:00:00 2001 From: Sam O'Connor Date: Fri, 15 Jan 2016 13:37:40 +1100 Subject: [PATCH] Deprecate readbytes!() - There were only a handfull of uses of readbytes!() in Base, mostly in implementations of other io functions. - Most existing read!() methods were already resizing the result array so returning byte count from readbytes!() was not that useful. - added eachblock() to deal with countlines() usecase in datafmt.jl. (eachblock() is an iterator like eachline()) --- base/datafmt.jl | 6 ++--- base/deprecated.jl | 6 +++++ base/docs/helpdb/Base.jl | 17 ++++++++++---- base/exports.jl | 2 +- base/filesystem.jl | 17 +++++--------- base/io.jl | 49 ++++++++++++++++++++++++--------------- base/iobuffer.jl | 3 +-- base/iostream.jl | 19 ++++----------- base/stream.jl | 17 +++++++------- doc/stdlib/io-network.rst | 12 +++++++--- test/file.jl | 6 +++-- 11 files changed, 84 insertions(+), 70 deletions(-) diff --git a/base/datafmt.jl b/base/datafmt.jl index deda884281ce7..a3506ae891597 100644 --- a/base/datafmt.jl +++ b/base/datafmt.jl @@ -18,11 +18,9 @@ const offs_chunk_size = 5000 countlines(f::AbstractString, eol::Char='\n') = open(io->countlines(io,eol), f)::Int function countlines(io::IO, eol::Char='\n') isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported")) - a = Array(UInt8, 8192) nl = 0 - while !eof(io) - nb = readbytes!(io, a) - @simd for i=1:nb + for a in eachblock(io) + @simd for i=1:length(a) @inbounds nl += a[i] == eol end end diff --git a/base/deprecated.jl b/base/deprecated.jl index a5811b154b9d0..c1947cda0031e 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -964,3 +964,9 @@ end #https://github.com/JuliaLang/julia/issues/14608 @deprecate readall readstring @deprecate readbytes read + +export readbytes! +@noinline function readbytes!(io, a, n=length(a)) + depwarn("readbytes! is deprecated, use read! instead", :readbytes!) + return length(read!(io, a, n)) +end diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl index c459402a9c20c..74380fc8adb89 100644 --- a/base/docs/helpdb/Base.jl +++ b/base/docs/helpdb/Base.jl @@ -2462,10 +2462,18 @@ poll_file """ eachline(stream or filename) -Create an iterable object that will yield each line. +Iterable that yields each line. """ eachline +""" + eachblock(stream or filename, [blocksize]) + + +Iterable that yields each block as `AbstractArray{UInt8}` +""" +eachblock + """ isposdef!(A) -> Bool @@ -6430,14 +6438,13 @@ Compute the inverse secant of `x`, where the output is in degrees. asecd """ - readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true) + read!(stream, b::Vector{UInt8}, nb=length(b); all=true) -Read at most `nb` bytes from the stream into `b`, returning the number of bytes read -(increasing the size of `b` as needed). +Read at most `nb` bytes from the stream into `b`, resizing `b` to match the number of bytes read. See `read` for a description of the `all` option. """ -readbytes! +read! """ basename(path::AbstractString) -> AbstractString diff --git a/base/exports.jl b/base/exports.jl index 6ba532042a927..00ea0e6914bba 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -1132,6 +1132,7 @@ export connect, countlines, deserialize, + eachblock, eachline, eof, fd, @@ -1167,7 +1168,6 @@ export read!, readstring, readavailable, - readbytes!, readchomp, readcsv, readdir, diff --git a/base/filesystem.jl b/base/filesystem.jl index a6f032ee95844..4d164ee686691 100644 --- a/base/filesystem.jl +++ b/base/filesystem.jl @@ -151,27 +151,22 @@ function read(f::File, ::Type{UInt8}) return ret % UInt8 end -function read!(f::File, a::Vector{UInt8}, nel=length(a)) +function read!(f::File, a::Vector{UInt8}) check_open(f) - if nel < 0 || nel > length(a) - throw(BoundsError()) - end ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t), - f.handle, a, nel) + f.handle, a, length(a)) uv_error("read",ret) return a end nb_available(f::File) = filesize(f) - position(f) -function readbytes!(f::File, b::Array{UInt8}, nb=length(b)) +function read!(f::File, b::Vector{UInt8}, nb=length(b)) nr = min(nb, nb_available(f)) - if length(b) < nr - resize!(b, nr) - end - read!(f, b, nr) - return nr + resize!(b, nr) + read!(f, b) end + read(io::File) = read!(io, Array(UInt8, nb_available(io))) read(io::File, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io)))) diff --git a/base/io.jl b/base/io.jl index a1f0f4a2bf8f4..c0b970ca94172 100644 --- a/base/io.jl +++ b/base/io.jl @@ -307,7 +307,7 @@ readline(s::IO) = readuntil(s, '\n') readchomp(x) = chomp!(readstring(x)) # read up to nb bytes into nb, returning # bytes read -function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b)) +function read!(s::IO, b::Vector{UInt8}, nb=length(b)) olb = lb = length(b) nr = 0 while nr < nb && !eof(s) @@ -322,16 +322,15 @@ function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b)) if lb > olb resize!(b, nr) # shrink to just contain input data if was resized end - return nr + return b end # read up to nb bytes from s, returning a Vector{UInt8} of bytes read. function read(s::IO, nb=typemax(Int)) - # Let readbytes! grow the array progressively by default + # Let read! grow the array progressively by default # instead of taking of risk of over-allocating b = Array(UInt8, nb == typemax(Int) ? 1024 : nb) - nr = readbytes!(s, b, nb) - resize!(b, nr) + read!(s, b, nb) end function readstring(s::IO) @@ -341,27 +340,39 @@ end ## high-level iterator interfaces ## -type EachLine +type EachChunk{T} stream::IO + f::Function ondone::Function - EachLine(stream) = EachLine(stream, ()->nothing) - EachLine(stream, ondone) = new(stream, ondone) + EachChunk(stream, f) = EachChunk{T}(stream, f, ()->nothing) + EachChunk(stream, f, ondone) = new(stream, f, ondone) end -eachline(stream::IO) = EachLine(stream) -eachline(filename::AbstractString) = EachLine(open(filename), close) +eachline(stream::IO) = EachChunk{ByteString}(stream, readline) +function eachline(filename::AbstractString) + io = open(filename) + EachChunk{ByteString}(io, readline, ()->close(io)) +end + +start{T}(::EachChunk{T}) = nothing +done{T}(itr::EachChunk{T}, nada) = eof(itr.stream) ? (itr.ondone(); true) : false +next{T}(itr::EachChunk{T}, nada) = (itr.f(itr.stream), nothing) +eltype{T}(::Type{EachChunk{T}}) = T + +readlines(s=STDIN) = collect(eachline(s)) -start(itr::EachLine) = nothing -function done(itr::EachLine, nada) - if !eof(itr.stream) - return false +function eachblock(stream::IO, blocksize=0, ondone=()->nothing) + if blocksize == 0 + blocksize = 8192 end - itr.ondone() - true + a = Array(UInt8, blocksize) + EachChunk{Vector{UInt8}}(stream, io->read!(io, a), ondone) +end + +function eachblock(filename::AbstractString, blocksize=0) + io=open(filename) + eachblock(io, blocksize, ()->close(io)) end -next(itr::EachLine, nada) = (readline(itr.stream), nothing) -eltype(::Type{EachLine}) = ByteString -readlines(s=STDIN) = collect(eachline(s)) # IOStream Marking diff --git a/base/iobuffer.jl b/base/iobuffer.jl index cd748862bb949..86483a938c512 100644 --- a/base/iobuffer.jl +++ b/base/iobuffer.jl @@ -328,13 +328,12 @@ function write(to::AbstractIOBuffer, a::UInt8) sizeof(UInt8) end -function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b)) +function read!(io::AbstractIOBuffer, b::Vector{UInt8}, nb=length(b)) nr = min(nb, nb_available(io)) if length(b) < nr resize!(b, nr) end read_sub(io, b, 1, nr) - return nr end read(io::AbstractIOBuffer) = read!(io, Array(UInt8, nb_available(io))) read(io::AbstractIOBuffer, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io)))) diff --git a/base/iostream.jl b/base/iostream.jl index 3e3aa61ca003a..7bacc4f608b17 100644 --- a/base/iostream.jl +++ b/base/iostream.jl @@ -218,10 +218,7 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb) s.ios, pointer(b, nr+1), min(lb-nr, nb-nr))) eof(s) && break end - if lb > olb && lb > nr - resize!(b, nr) # shrink to just contain input data if was resized - end - return nr + resize!(b, nr) end function readbytes_some!(s::IOStream, b::Array{UInt8}, nb) @@ -231,13 +228,10 @@ function readbytes_some!(s::IOStream, b::Array{UInt8}, nb) end nr = Int(ccall(:ios_read, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t), s.ios, pointer(b), nb)) - if lb > olb && lb > nr - resize!(b, nr) - end - return nr + resize!(b, nr) end -function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true) +function read!(s::IOStream, b::Vector{UInt8}, nb=length(b); all::Bool=true) return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb) end @@ -251,14 +245,11 @@ function read(s::IOStream) end end b = Array(UInt8, sz<=0 ? 1024 : sz) - nr = readbytes_all!(s, b, typemax(Int)) - resize!(b, nr) + readbytes_all!(s, b, typemax(Int)) end function read(s::IOStream, nb::Integer; all::Bool=true) - b = Array(UInt8, nb) - nr = readbytes!(s, b, nb, all=all) - resize!(b, nr) + read!(s, Array(UInt8, nb), nb, all) end ## Character streams ## diff --git a/base/stream.jl b/base/stream.jl index c33f6a1da1050..e1282f7d2fdf6 100644 --- a/base/stream.jl +++ b/base/stream.jl @@ -892,20 +892,12 @@ function stop_reading(stream::LibuvStream) end end -function readbytes!(s::LibuvStream, b::AbstractArray{UInt8}, nb=length(b)) - wait_readnb(s, nb) - nr = nb_available(s) - resize!(b, nr) # shrink to just contain input data if was resized - read!(s.buffer, b) - return nr -end - function read(stream::LibuvStream) wait_readnb(stream, typemax(Int)) return takebuf_array(stream.buffer) end -function read!(s::LibuvStream, a::Array{UInt8, 1}) +function read!(s::LibuvStream, a::Vector{UInt8}) nb = length(a) sbuf = s.buffer @assert sbuf.seekable == false @@ -936,6 +928,13 @@ function read!(s::LibuvStream, a::Array{UInt8, 1}) return a end +function read!(s::LibuvStream, b::Vector{UInt8}, nb=length(b)) + wait_readnb(s, nb) + nr = nb_available(s) + resize!(b, nr) + read!(s.buffer, b) +end + function read(this::LibuvStream, ::Type{UInt8}) wait_readnb(this, 1) buf = this.buffer diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst index 19a0880de1114..1e8172f248d60 100644 --- a/doc/stdlib/io-network.rst +++ b/doc/stdlib/io-network.rst @@ -152,11 +152,11 @@ General I/O Read binary data from a stream, filling in the argument ``array``\ . -.. function:: readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true) +.. function:: read!(stream, b::Vector{UInt8}, nb=length(b); all=true) .. Docstring generated from Julia source - Read at most ``nb`` bytes from the stream into ``b``\ , returning the number of bytes read (increasing the size of ``b`` as needed). + Read at most ``nb`` bytes from the stream into ``b``\ , resizing ``b`` to match the number of bytes read. See ``read`` for a description of the ``all`` option. @@ -521,7 +521,13 @@ Text I/O .. Docstring generated from Julia source - Create an iterable object that will yield each line. + Iterable that yields each line. + +.. function:: eachblock(stream or filename, [blocksize]) + + .. Docstring generated from Julia source + + Iterable that yields each block as `AbstractArray{UInt8}` .. function:: readdlm(source, delim::Char, T::Type, eol::Char; header=false, skipstart=0, skipblanks=true, use_mmap, ignore_invalid_chars=false, quotes=true, dims, comments=true, comment_char='#') diff --git a/test/file.jl b/test/file.jl index 2a6345eb2ae06..90caed9c6bb63 100644 --- a/test/file.jl +++ b/test/file.jl @@ -1096,9 +1096,11 @@ let s = "qwerty" # Test growing output array x = UInt8[] - n = readbytes!(IOBuffer(s), x, 10) + a = read!(IOBuffer(s), x, 10) @test x == s.data - @test n == length(x) + @test a == s.data + @test length(a) == length(x) + @test length(s) == length(x) end # DevNull