Skip to content

Commit

Permalink
faster filesize and read(::IOStream) (JuliaLang#35925)
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson authored and simeonschaub committed Aug 11, 2020
1 parent 3862523 commit 43e8023
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 17 deletions.
2 changes: 1 addition & 1 deletion base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import .Base:
IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
bytesavailable, position, read, read!, readavailable, seek, seekend, show,
skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize

import .Base.RefValue

Expand Down
51 changes: 35 additions & 16 deletions base/iostream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ function position(s::IOStream)
return pos
end

function filesize(s::IOStream)
sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
systemerror("filesize", sz == -1)
return sz
end

_eof_nolock(s::IOStream) = ccall(:ios_eof_blocking, Cint, (Ptr{Cvoid},), s.ios) != 0
eof(s::IOStream) = @_lock_ios s _eof_nolock(s)

Expand Down Expand Up @@ -441,9 +447,10 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
lb = max(65536, (nr+1) * 2)
resize!(b, lb)
end
nr += Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
_eof_nolock(s) && break
thisr = Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
nr += thisr
(nr == nb || thisr == 0 || _eof_nolock(s)) && break
end
end
if lb > olb && lb > nr
Expand Down Expand Up @@ -486,21 +493,33 @@ function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
end

function read(s::IOStream)
sz = try # filesize is just a hint, so ignore if `fstat` fails
filesize(s)
catch ex
ex isa IOError || rethrow()
Int64(0)
end
if sz > 0
pos = position(s)
if pos > 0
sz -= pos
# First we try to fill the buffer. If that gives us the whole file,
# copy it out and return. Otherwise look at the file size and use it
# to prealloate space. Determining the size requires extra syscalls,
# which we want to avoid for small files.
@_lock_ios s begin
nb = ccall(:ios_fillbuf, Cssize_t, (Ptr{Cvoid},), s.ios)
if nb != -1
b = StringVector(nb)
readbytes_all!(s, b, nb)
else
sz = try # filesize is just a hint, so ignore if it fails
filesize(s)
catch ex
ex isa IOError || rethrow()
Int64(-1)
end
if sz > 0
pos = position(s)
if pos > 0
sz -= pos
end
end
b = StringVector(sz < 0 ? 1024 : sz)
nr = readbytes_all!(s, b, sz < 0 ? typemax(Int) : sz)
resize!(b, nr)
end
end
b = StringVector(sz <= 0 ? 1024 : sz)
nr = readbytes_all!(s, b, typemax(Int))
resize!(b, nr)
return b
end

Expand Down
28 changes: 28 additions & 0 deletions src/support/ios.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,17 @@ size_t ios_readprep(ios_t *s, size_t n)
return (size_t)(s->size - s->bpos);
}

// attempt to fill the buffer. returns the number of bytes available if we
// have read the whole file, or -1 if there might be more data.
ssize_t ios_fillbuf(ios_t *s)
{
size_t nb = s->maxsize - s->bpos;
size_t got = ios_readprep(s, nb);
if (got < nb)
return (ssize_t)got;
return -1;
}

static void _write_update_pos(ios_t *s)
{
if (s->bpos > s->ndirty) s->ndirty = s->bpos;
Expand Down Expand Up @@ -535,6 +546,22 @@ int64_t ios_pos(ios_t *s)
return fdpos;
}

int64_t ios_filesize(ios_t *s)
{
if (s->fd == -1)
return -1;
int64_t fdpos = s->fpos;
if (fdpos == (int64_t)-1) {
fdpos = lseek(s->fd, 0, SEEK_CUR);
if (fdpos == (int64_t)-1)
return fdpos;
s->fpos = fdpos;
}
off_t sz = lseek(s->fd, 0, SEEK_END);
lseek(s->fd, (off_t)fdpos, SEEK_SET);
return sz;
}

int ios_trunc(ios_t *s, size_t size)
{
if (s->bm == bm_mem) {
Expand Down Expand Up @@ -936,6 +963,7 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
goto open_file_err;

s = ios_fd(s, fd, 1, 1);
s->fpos = 0;
if (!rd)
s->readable = 0;
if (!wr)
Expand Down
3 changes: 3 additions & 0 deletions src/support/ios.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ JL_DLLEXPORT int64_t ios_seek(ios_t *s, int64_t pos) JL_NOTSAFEPOINT; // absolut
JL_DLLEXPORT int64_t ios_seek_end(ios_t *s) JL_NOTSAFEPOINT;
JL_DLLEXPORT int64_t ios_skip(ios_t *s, int64_t offs); // relative seek
JL_DLLEXPORT int64_t ios_pos(ios_t *s) JL_NOTSAFEPOINT; // get current position
JL_DLLEXPORT int64_t ios_filesize(ios_t *s);
JL_DLLEXPORT int ios_trunc(ios_t *s, size_t size) JL_NOTSAFEPOINT;
JL_DLLEXPORT int ios_eof(ios_t *s);
JL_DLLEXPORT int ios_eof_blocking(ios_t *s);
Expand All @@ -108,6 +109,8 @@ JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFE
JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite);
// ensure at least n bytes are buffered if possible. returns # available.
JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n);
// fill the buffer and determine whether it contains the whole rest of the file
JL_DLLEXPORT ssize_t ios_fillbuf(ios_t *s);

/* stream creation */
JL_DLLEXPORT
Expand Down

0 comments on commit 43e8023

Please sign in to comment.