Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

faster filesize and read(::IOStream) #35925

Merged
merged 1 commit into from
May 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion base/filesystem.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ import .Base:
IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
bytesavailable, position, read, read!, readavailable, seek, seekend, show,
skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize

import .Base.RefValue

Expand Down
51 changes: 35 additions & 16 deletions base/iostream.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,12 @@ function position(s::IOStream)
return pos
end

function filesize(s::IOStream)
sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
systemerror("filesize", sz == -1)
return sz
end

_eof_nolock(s::IOStream) = ccall(:ios_eof_blocking, Cint, (Ptr{Cvoid},), s.ios) != 0
eof(s::IOStream) = @_lock_ios s _eof_nolock(s)

Expand Down Expand Up @@ -441,9 +447,10 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
lb = max(65536, (nr+1) * 2)
resize!(b, lb)
end
nr += Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
_eof_nolock(s) && break
thisr = Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
nr += thisr
(nr == nb || thisr == 0 || _eof_nolock(s)) && break
end
end
if lb > olb && lb > nr
Expand Down Expand Up @@ -486,21 +493,33 @@ function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
end

function read(s::IOStream)
sz = try # filesize is just a hint, so ignore if `fstat` fails
filesize(s)
catch ex
ex isa IOError || rethrow()
Int64(0)
end
if sz > 0
pos = position(s)
if pos > 0
sz -= pos
# First we try to fill the buffer. If that gives us the whole file,
# copy it out and return. Otherwise look at the file size and use it
# to prealloate space. Determining the size requires extra syscalls,
# which we want to avoid for small files.
@_lock_ios s begin
nb = ccall(:ios_fillbuf, Cssize_t, (Ptr{Cvoid},), s.ios)
if nb != -1
b = StringVector(nb)
readbytes_all!(s, b, nb)
Comment on lines +503 to +504
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to handle the case where this is a directory; we don't want to return a successful zero-byte read of a directory.

else
sz = try # filesize is just a hint, so ignore if it fails
filesize(s)
catch ex
ex isa IOError || rethrow()
Int64(-1)
end
if sz > 0
pos = position(s)
if pos > 0
sz -= pos
end
end
b = StringVector(sz < 0 ? 1024 : sz)
nr = readbytes_all!(s, b, sz < 0 ? typemax(Int) : sz)
resize!(b, nr)
end
end
b = StringVector(sz <= 0 ? 1024 : sz)
nr = readbytes_all!(s, b, typemax(Int))
resize!(b, nr)
return b
end

Expand Down
28 changes: 28 additions & 0 deletions src/support/ios.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,17 @@ size_t ios_readprep(ios_t *s, size_t n)
return (size_t)(s->size - s->bpos);
}

// attempt to fill the buffer. returns the number of bytes available if we
// have read the whole file, or -1 if there might be more data.
ssize_t ios_fillbuf(ios_t *s)
{
size_t nb = s->maxsize - s->bpos;
size_t got = ios_readprep(s, nb);
if (got < nb)
return (ssize_t)got;
return -1;
}

static void _write_update_pos(ios_t *s)
{
if (s->bpos > s->ndirty) s->ndirty = s->bpos;
Expand Down Expand Up @@ -535,6 +546,22 @@ int64_t ios_pos(ios_t *s)
return fdpos;
}

int64_t ios_filesize(ios_t *s)
{
if (s->fd == -1)
return -1;
int64_t fdpos = s->fpos;
if (fdpos == (int64_t)-1) {
fdpos = lseek(s->fd, 0, SEEK_CUR);
if (fdpos == (int64_t)-1)
return fdpos;
s->fpos = fdpos;
}
off_t sz = lseek(s->fd, 0, SEEK_END);
lseek(s->fd, (off_t)fdpos, SEEK_SET);
return sz;
}

int ios_trunc(ios_t *s, size_t size)
{
if (s->bm == bm_mem) {
Expand Down Expand Up @@ -936,6 +963,7 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
goto open_file_err;

s = ios_fd(s, fd, 1, 1);
s->fpos = 0;
if (!rd)
s->readable = 0;
if (!wr)
Expand Down
3 changes: 3 additions & 0 deletions src/support/ios.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ JL_DLLEXPORT int64_t ios_seek(ios_t *s, int64_t pos) JL_NOTSAFEPOINT; // absolut
JL_DLLEXPORT int64_t ios_seek_end(ios_t *s) JL_NOTSAFEPOINT;
JL_DLLEXPORT int64_t ios_skip(ios_t *s, int64_t offs); // relative seek
JL_DLLEXPORT int64_t ios_pos(ios_t *s) JL_NOTSAFEPOINT; // get current position
JL_DLLEXPORT int64_t ios_filesize(ios_t *s);
JL_DLLEXPORT int ios_trunc(ios_t *s, size_t size) JL_NOTSAFEPOINT;
JL_DLLEXPORT int ios_eof(ios_t *s);
JL_DLLEXPORT int ios_eof_blocking(ios_t *s);
Expand All @@ -108,6 +109,8 @@ JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFE
JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite);
// ensure at least n bytes are buffered if possible. returns # available.
JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n);
// fill the buffer and determine whether it contains the whole rest of the file
JL_DLLEXPORT ssize_t ios_fillbuf(ios_t *s);

/* stream creation */
JL_DLLEXPORT
Expand Down