Skip to content

Commit

Permalink
Deprecate readbytes!()
Browse files Browse the repository at this point in the history
 - There were only a handfull of uses of readbytes!() in Base,
   mostly in implementations of other io functions.

 - Most existing read!() methods were already resizing the result array
   so returning byte count from readbytes!() was not that useful.

 - added eachblock() to deal with countlines() usecase in datafmt.jl.
   (eachblock() is an iterator like eachline())
  • Loading branch information
samoconnor committed Jan 15, 2016
1 parent fd6b27f commit e757e5c
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 70 deletions.
6 changes: 2 additions & 4 deletions base/datafmt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@ const offs_chunk_size = 5000
countlines(f::AbstractString, eol::Char='\n') = open(io->countlines(io,eol), f)::Int
function countlines(io::IO, eol::Char='\n')
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
a = Array(UInt8, 8192)
nl = 0
while !eof(io)
nb = readbytes!(io, a)
@simd for i=1:nb
for a in eachblock(io)
@simd for i=1:length(a)
@inbounds nl += a[i] == eol
end
end
Expand Down
6 changes: 6 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -964,3 +964,9 @@ end
#https://github.com/JuliaLang/julia/issues/14608
@deprecate readall readstring
@deprecate readbytes read

export readbytes!
@noinline function readbytes!(io, a, n=length(a))
depwarn("readbytes! is deprecated, use read! instead", :readbytes!)
return length(read!(io, a, n))
end
17 changes: 12 additions & 5 deletions base/docs/helpdb/Base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2462,10 +2462,18 @@ poll_file
"""
eachline(stream or filename)
Create an iterable object that will yield each line.
Iterable that yields each line.
"""
eachline

"""
eachblock(stream or filename, [blocksize])
Iterable that yields each block as `AbstractArray{UInt8}`
"""
eachblock

"""
isposdef!(A) -> Bool
Expand Down Expand Up @@ -6430,14 +6438,13 @@ Compute the inverse secant of `x`, where the output is in degrees.
asecd

"""
readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
read!(stream, b::Vector{UInt8}, nb=length(b); all=true)
Read at most `nb` bytes from the stream into `b`, returning the number of bytes read
(increasing the size of `b` as needed).
Read at most `nb` bytes from the stream into `b`, resizing `b` to match the number of bytes read.
See `read` for a description of the `all` option.
"""
readbytes!
read!

"""
basename(path::AbstractString) -> AbstractString
Expand Down
2 changes: 1 addition & 1 deletion base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1132,6 +1132,7 @@ export
connect,
countlines,
deserialize,
eachblock,
eachline,
eof,
fd,
Expand Down Expand Up @@ -1167,7 +1168,6 @@ export
read!,
readstring,
readavailable,
readbytes!,
readchomp,
readcsv,
readdir,
Expand Down
17 changes: 6 additions & 11 deletions base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -151,27 +151,22 @@ function read(f::File, ::Type{UInt8})
return ret % UInt8
end

function read!(f::File, a::Vector{UInt8}, nel=length(a))
function read!(f::File, a::Vector{UInt8})
check_open(f)
if nel < 0 || nel > length(a)
throw(BoundsError())
end
ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t),
f.handle, a, nel)
f.handle, a, length(a))
uv_error("read",ret)
return a
end

nb_available(f::File) = filesize(f) - position(f)

function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
function read!(f::File, b::Vector{UInt8}, nb=length(b))
nr = min(nb, nb_available(f))
if length(b) < nr
resize!(b, nr)
end
read!(f, b, nr)
return nr
resize!(b, nr)
read!(f, b)
end

read(io::File) = read!(io, Array(UInt8, nb_available(io)))
read(io::File, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))

Expand Down
49 changes: 30 additions & 19 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ readline(s::IO) = readuntil(s, '\n')
readchomp(x) = chomp!(readstring(x))

# read up to nb bytes into nb, returning # bytes read
function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
function read!(s::IO, b::Vector{UInt8}, nb=length(b))
olb = lb = length(b)
nr = 0
while nr < nb && !eof(s)
Expand All @@ -322,16 +322,15 @@ function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
if lb > olb
resize!(b, nr) # shrink to just contain input data if was resized
end
return nr
return b
end

# read up to nb bytes from s, returning a Vector{UInt8} of bytes read.
function read(s::IO, nb=typemax(Int))
# Let readbytes! grow the array progressively by default
# Let read! grow the array progressively by default
# instead of taking of risk of over-allocating
b = Array(UInt8, nb == typemax(Int) ? 1024 : nb)
nr = readbytes!(s, b, nb)
resize!(b, nr)
read!(s, b, nb)
end

function readstring(s::IO)
Expand All @@ -341,27 +340,39 @@ end

## high-level iterator interfaces ##

type EachLine
type EachChunk{T}
stream::IO
f::Function
ondone::Function
EachLine(stream) = EachLine(stream, ()->nothing)
EachLine(stream, ondone) = new(stream, ondone)
EachChunk(stream, f) = EachChunk{T}(stream, f, ()->nothing)
EachChunk(stream, f, ondone) = new(stream, f, ondone)
end
eachline(stream::IO) = EachLine(stream)
eachline(filename::AbstractString) = EachLine(open(filename), close)
eachline(stream::IO) = EachChunk{ByteString}(stream, readline)
function eachline(filename::AbstractString)
io = open(filename)
EachChunk{ByteString}(io, readline, ()->close(io))
end

start{T}(::EachChunk{T}) = nothing
done{T}(itr::EachChunk{T}, nada) = eof(itr.stream) ? (itr.ondone(); true) : false
next{T}(itr::EachChunk{T}, nada) = (itr.f(itr.stream), nothing)
eltype{T}(::Type{EachChunk{T}}) = T

readlines(s=STDIN) = collect(eachline(s))

start(itr::EachLine) = nothing
function done(itr::EachLine, nada)
if !eof(itr.stream)
return false
function eachblock(stream::IO, blocksize=0, ondone=()->nothing)
if blocksize == 0
blocksize = 8192
end

This comment has been minimized.

Copy link
@kmsquire

kmsquire Jan 15, 2016

Why not jus set the default blocksize to 8192?

This comment has been minimized.

Copy link
@kmsquire

kmsquire Jan 15, 2016

(Here and below.)

This comment has been minimized.

Copy link
@samoconnor

samoconnor Jan 15, 2016

Author Owner

To avoid double-maintainance when it gets changed to 65536.
Almost seems like we need some magical singleton called default for cases like this.

itr.ondone()
true
a = Array(UInt8, blocksize)
EachChunk{Vector{UInt8}}(stream, io->read!(io, a), ondone)

This comment has been minimized.

Copy link
@JeffBezanson

JeffBezanson Jan 15, 2016

I would hold off on adding this iterator. It's preferred for iterators to be as pure as possible, and it's a bit surprising that this mutates the same array on each call instead of returning a new object like eachline does.

This comment has been minimized.

Copy link
@samoconnor

samoconnor Jan 15, 2016

Author Owner

This is intended to cover this case in datafmt.jl there it appears to be important to re-use the buffer for efficiency.

It seems to me like a fairly common pattern to loop though a file or stream a block at a time with a re-used buffer to avoid creating lots of work for the GC.

I can also imagine that this abstraction allows nice zero-copy implementations for particular stream types where a pointer to an underlying buffer can be returned.

This comment has been minimized.

Copy link
@JeffBezanson

JeffBezanson Jan 15, 2016

That's true but it's debatable whether iterators are the right abstraction for mutating a buffer. I don't see any urgency to rewrite countlines at this point. I would also worry about performance regressions in eachline due to the extra indirection.

This comment has been minimized.

Copy link
@samoconnor

samoconnor Jan 15, 2016

Author Owner

Fair 'nuf.

end

function eachblock(filename::AbstractString, blocksize=0)
io=open(filename)
eachblock(io, blocksize, ()->close(io))
end
next(itr::EachLine, nada) = (readline(itr.stream), nothing)
eltype(::Type{EachLine}) = ByteString

readlines(s=STDIN) = collect(eachline(s))

# IOStream Marking

Expand Down
3 changes: 1 addition & 2 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -328,13 +328,12 @@ function write(to::AbstractIOBuffer, a::UInt8)
sizeof(UInt8)
end

function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b))
function read!(io::AbstractIOBuffer, b::Vector{UInt8}, nb=length(b))
nr = min(nb, nb_available(io))
if length(b) < nr
resize!(b, nr)
end
read_sub(io, b, 1, nr)
return nr
end
read(io::AbstractIOBuffer) = read!(io, Array(UInt8, nb_available(io)))
read(io::AbstractIOBuffer, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))
Expand Down
19 changes: 5 additions & 14 deletions base/iostream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,7 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
eof(s) && break
end
if lb > olb && lb > nr
resize!(b, nr) # shrink to just contain input data if was resized
end
return nr
resize!(b, nr)
end

function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
Expand All @@ -231,13 +228,10 @@ function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
end
nr = Int(ccall(:ios_read, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t),
s.ios, pointer(b), nb))
if lb > olb && lb > nr
resize!(b, nr)
end
return nr
resize!(b, nr)
end

function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
function read!(s::IOStream, b::Vector{UInt8}, nb=length(b); all::Bool=true)
return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
end

Expand All @@ -251,14 +245,11 @@ function read(s::IOStream)
end
end
b = Array(UInt8, sz<=0 ? 1024 : sz)
nr = readbytes_all!(s, b, typemax(Int))
resize!(b, nr)
readbytes_all!(s, b, typemax(Int))
end

function read(s::IOStream, nb::Integer; all::Bool=true)
b = Array(UInt8, nb)
nr = readbytes!(s, b, nb, all=all)
resize!(b, nr)
read!(s, Array(UInt8, nb), nb, all)
end

## Character streams ##
Expand Down
17 changes: 8 additions & 9 deletions base/stream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -892,20 +892,12 @@ function stop_reading(stream::LibuvStream)
end
end

function readbytes!(s::LibuvStream, b::AbstractArray{UInt8}, nb=length(b))
wait_readnb(s, nb)
nr = nb_available(s)
resize!(b, nr) # shrink to just contain input data if was resized
read!(s.buffer, b)
return nr
end

function read(stream::LibuvStream)
wait_readnb(stream, typemax(Int))
return takebuf_array(stream.buffer)
end

function read!(s::LibuvStream, a::Array{UInt8, 1})
function read!(s::LibuvStream, a::Vector{UInt8})
nb = length(a)
sbuf = s.buffer
@assert sbuf.seekable == false
Expand Down Expand Up @@ -936,6 +928,13 @@ function read!(s::LibuvStream, a::Array{UInt8, 1})
return a
end

function read!(s::LibuvStream, b::Vector{UInt8}, nb=length(b))

This comment has been minimized.

Copy link
@JeffBezanson

JeffBezanson Jan 15, 2016

This definition should be removed in favor of the one above (as it is, I believe it actually overwrites it). The above definition handles large data more efficiently.

This comment has been minimized.

Copy link
@samoconnor

samoconnor Jan 15, 2016

Author Owner

True (there are probably more examples like that).
This branch is pretty raw still. Worth looking to see what direction I'm going, but not ready for in depth review yet.

wait_readnb(s, nb)
nr = nb_available(s)
resize!(b, nr)
read!(s.buffer, b)
end

function read(this::LibuvStream, ::Type{UInt8})
wait_readnb(this, 1)
buf = this.buffer
Expand Down
12 changes: 9 additions & 3 deletions doc/stdlib/io-network.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,11 @@ General I/O
Read binary data from a stream, filling in the argument ``array``\ .

.. function:: readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
.. function:: read!(stream, b::Vector{UInt8}, nb=length(b); all=true)

.. Docstring generated from Julia source
Read at most ``nb`` bytes from the stream into ``b``\ , returning the number of bytes read (increasing the size of ``b`` as needed).
Read at most ``nb`` bytes from the stream into ``b``\ , resizing ``b`` to match the number of bytes read.

See ``read`` for a description of the ``all`` option.

Expand Down Expand Up @@ -521,7 +521,13 @@ Text I/O

.. Docstring generated from Julia source
Create an iterable object that will yield each line.
Iterable that yields each line.

.. function:: eachblock(stream or filename, [blocksize])

.. Docstring generated from Julia source
Iterable that yields each block as `AbstractArray{UInt8}`

.. function:: readdlm(source, delim::Char, T::Type, eol::Char; header=false, skipstart=0, skipblanks=true, use_mmap, ignore_invalid_chars=false, quotes=true, dims, comments=true, comment_char='#')

Expand Down
6 changes: 4 additions & 2 deletions test/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1096,9 +1096,11 @@ let s = "qwerty"

# Test growing output array
x = UInt8[]
n = readbytes!(IOBuffer(s), x, 10)
a = read!(IOBuffer(s), x, 10)
@test x == s.data
@test n == length(x)
@test a == s.data
@test length(a) == length(x)
@test length(s) == length(x)
end

# DevNull
Expand Down

0 comments on commit e757e5c

Please sign in to comment.