Skip to content

Commit

Permalink
writeBin() for long vectors {and use local declarations in its C code}.
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.r-project.org/R/trunk@77541 00db46b3-68df-0310-9c12-caf00c1e9a41
  • Loading branch information
maechler committed Dec 7, 2019
1 parent 60c9255 commit 7430cf3
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 33 deletions.
3 changes: 3 additions & 0 deletions doc/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,9 @@
\item \code{sort.list(x)} now works for non-atomic objects \code{x}
and \code{method="auto"} (default) or \code{"radix"} in cases
\code{order(x)} works.
\item Where long vectors are available (i.e., almost everywhere),
\code{writeBin()} now allows long vectors, i.e., of length >= 2^31.
}
}% end{ NEW FEATURES }
Expand Down
61 changes: 28 additions & 33 deletions src/main/connections.c
Original file line number Diff line number Diff line change
Expand Up @@ -4398,47 +4398,43 @@ SEXP attribute_hidden do_readbin(SEXP call, SEXP op, SEXP args, SEXP env)
/* writeBin(object, con, size, swap, useBytes) */
SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
{
SEXP object, ans = R_NilValue;
int i, j, size, swap, len, useBytes;
const char *s;
char *buf;
Rboolean wasopen = TRUE, isRaw = FALSE;
Rconnection con = NULL;
RCNTXT cntxt;

checkArity(op, args);
object = CAR(args);
SEXP object = CAR(args);
if(!isVectorAtomic(object))
error(_("'x' is not an atomic vector type"));

if(TYPEOF(CADR(args)) == RAWSXP) {
isRaw = TRUE;
} else {
Rboolean
isRaw = TYPEOF(CADR(args)) == RAWSXP,
wasopen = isRaw;
Rconnection con = NULL;
if(!isRaw) {
con = getConnection(asInteger(CADR(args)));
if(con->text) error(_("can only write to a binary connection"));
wasopen = con->isopen;
if(!con->canwrite) error(_("cannot write to this connection"));
}

size = asInteger(CADDR(args));
swap = asLogical(CADDDR(args));
int size = asInteger(CADDR(args)),
swap = asLogical(CADDDR(args));
if(swap == NA_LOGICAL)
error(_("invalid '%s' argument"), "swap");
useBytes = asLogical(CAD4R(args));
int useBytes = asLogical(CAD4R(args));
if(useBytes == NA_LOGICAL)
error(_("invalid '%s' argument"), "useBytes");
len = LENGTH(object);
if(len == 0) {
if(isRaw) return allocVector(RAWSXP, 0); else return R_NilValue;
}
/* RAW vectors are limited to 2^31 - 1 bytes */
if((double)len *size > INT_MAX) {
R_xlen_t i, len = XLENGTH(object);
if(len == 0)
return (isRaw) ? allocVector(RAWSXP, 0) : R_NilValue;

#ifndef LONG_VECTOR_SUPPORT
/* without long vectors RAW vectors are limited to 2^31 - 1 bytes */
if(len * (double)size > INT_MAX) {
if(isRaw)
error(_("only 2^31-1 bytes can be written to a raw vector"));
else
error(_("only 2^31-1 bytes can be written in a single writeBin() call"));
}
#endif

RCNTXT cntxt;
if(!wasopen) {
/* Documented behaviour */
char mode[5];
Expand All @@ -4454,7 +4450,9 @@ SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
if(!con->canwrite) error(_("cannot write to this connection"));
}

SEXP ans = R_NilValue;
if(TYPEOF(object) == STRSXP) {
const char *s;
if(isRaw) {
Rbyte *bytes;
size_t np, outlen = 0;
Expand Down Expand Up @@ -4534,7 +4532,8 @@ SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
default:
UNIMPLEMENTED_TYPE("writeBin", object);
}
buf = R_chk_calloc(len, size);
char *buf = R_chk_calloc(len, size);
R_xlen_t j;
switch(TYPEOF(object)) {
case LGLSXP:
case INTSXP:
Expand All @@ -4545,29 +4544,26 @@ SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
#if SIZEOF_LONG == 8
case sizeof(long):
{
long l1;
for (i = 0, j = 0; i < len; i++, j += size) {
l1 = (long) INTEGER(object)[i];
long l1 = (long) INTEGER(object)[i];
memcpy(buf + j, &l1, size);
}
break;
}
#elif SIZEOF_LONG_LONG == 8
case sizeof(_lli_t):
{
_lli_t ll1;
for (i = 0, j = 0; i < len; i++, j += size) {
ll1 = (_lli_t) INTEGER(object)[i];
_lli_t ll1 = (_lli_t) INTEGER(object)[i];
memcpy(buf + j, &ll1, size);
}
break;
}
#endif
case 2:
{
short s1;
for (i = 0, j = 0; i < len; i++, j += size) {
s1 = (short) INTEGER(object)[i];
short s1 = (short) INTEGER(object)[i];
memcpy(buf + j, &s1, size);
}
break;
Expand All @@ -4587,9 +4583,8 @@ SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
break;
case sizeof(float):
{
float f1;
for (i = 0, j = 0; i < len; i++, j += size) {
f1 = (float) REAL(object)[i];
float f1 = (float) REAL(object)[i];
memcpy(buf+j, &f1, size);
}
break;
Expand Down Expand Up @@ -4632,7 +4627,7 @@ SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
}

/* write it now */
if(isRaw) { /* We checked size*len < 2^31-1 above */
if(isRaw) { /* for non-long vectors, we checked size*len < 2^31-1 above */
PROTECT(ans = allocVector(RAWSXP, size*len));
memcpy(RAW(ans), buf, size*len);
} else {
Expand Down Expand Up @@ -5504,7 +5499,7 @@ SEXP attribute_hidden do_url(SEXP call, SEXP op, SEXP args, SEXP env)
warning(_("'raw = FALSE' but '%s' is not a regular file"),
url);
}
#endif
#endif
if (!raw &&
(!strlen(open) || streql(open, "r") || streql(open, "rt"))) {
/* check if this is a compressed file */
Expand Down
27 changes: 27 additions & 0 deletions tests/reg-large.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ availableGB <-
cat("Available (processor aka CPU) memory: ", round(availableGB, 1),
"GB (Giga Bytes)\n")

if(.Machine$sizeof.pointer < 8) {
cat(".Machine :\n"); str(.Machine)
cat("not a 64-bit system -- forget about these tests!\n")
q("no")
}

### Testing readLines() *large* file with embedded nul aka `\0'
##
## takes close to one minute and ~ 10 GB RAM
Expand Down Expand Up @@ -286,6 +292,27 @@ if(availableGB > 14) withAutoprint({ ## seen 11.6 G
## Error in readBin(raw_con, "raw", n = 1e+06) : too large a block specified
})

## writeBin() for long vectors
if(availableGB > 20) withAutoprint({ ## seen 20.9 G
x <- raw(2^31)
writeBin(x, con = nullfile())

con <- rawConnection(raw(0L), "w")
writeBin(x, con = con)
stopifnot(identical(x, rawConnectionValue(con)))

system.time(x <- pi*seq_len(2.1*2^30)) # 25 sec
zzfil <- tempfile("test-large-bin")
zz <- file(zzfil, "wb") ## file size will be 2.5 GB !!!
system.time(z <- writeBin(x, zz)) # 32 sec
stopifnot(is.null(z))
close(zz); zz <- file(zzfil, "rb")
system.time(r <- readBin(zz, double(), n = length(x) + 999)) # 32 sec
system.time(stopifnot(identical(x, r))) # 24 sec
close(zz); rm(r, zz)
})



gc() # NB the "max used"
proc.time() # total [ ~ 40 minutes in full case, 2019-04-12]

0 comments on commit 7430cf3

Please sign in to comment.