Skip to content

Commit

Permalink
More direct fix for #563, since mkCharLen internally call mkCharLenCE.
Browse files Browse the repository at this point in the history
  • Loading branch information
arunsrinivasan committed Aug 25, 2015
1 parent 8bb3b4c commit e592419
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 14 deletions.
4 changes: 1 addition & 3 deletions R/fread.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
fread <- function(input="",sep="auto",sep2="auto",nrows=-1L,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),autostart=1L,skip=0L,select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"),dec=if (sep!=".") "." else ",", check.names=FALSE, encoding="unknown", showProgress=getOption("datatable.showProgress"),data.table=getOption("datatable.fread.datatable")) {
if (!is.character(dec) || length(dec)!=1L || nchar(dec)!=1) stop("dec must be a single character e.g. '.' or ','")
# handle encoding, #563
if (missing(encoding)) {
encoding = NULL
} else if (!encoding %in% c("unknown", "UTF-8", "Latin-1")) {
if (!encoding %in% c("unknown", "UTF-8", "Latin-1")) {
stop("Argument 'encoding' must be 'unknown', 'UTF-8' or 'Latin-1'.")
}
if (getOption("datatable.fread.dec.experiment") && Sys.localeconv()["decimal_point"] != dec) {
Expand Down
15 changes: 4 additions & 11 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,13 +429,9 @@ SEXP readfile(SEXP input, SEXP separg, SEXP nrowsarg, SEXP headerarg, SEXP nastr
// https://github.com/wch/r-source/blob/ca5348f0b5e3f3c2b24851d7aff02de5217465eb/src/main/util.c#L1115
// Check for mkCharLenCE function to locate as to where where this is implemented.
cetype_t ienc;
Rboolean is_no_encoding = TRUE;
if (!isNull(encoding)) {
is_no_encoding = FALSE;
if (!strcmp(CHAR(STRING_ELT(encoding, 0)), "Latin-1")) ienc = CE_LATIN1;
else if (!strcmp(CHAR(STRING_ELT(encoding, 0)), "UTF-8")) ienc = CE_UTF8;
else ienc = CE_NATIVE;
}
if (!strcmp(CHAR(STRING_ELT(encoding, 0)), "Latin-1")) ienc = CE_LATIN1;
else if (!strcmp(CHAR(STRING_ELT(encoding, 0)), "UTF-8")) ienc = CE_UTF8;
else ienc = CE_NATIVE;

// Extra tracing for apparent 32bit Windows problem: https://github.com/Rdatatable/data.table/issues/1111
if (!isInteger(showProgressArg)) error("showProgress is not type integer but type '%s'. Please report.", type2char(TYPEOF(showProgressArg)));
Expand Down Expand Up @@ -1095,10 +1091,7 @@ SEXP readfile(SEXP input, SEXP separg, SEXP nrowsarg, SEXP headerarg, SEXP nastr
SET_VECTOR_ELT(ans, resj, thiscol = coerceVectorSoFar(thiscol, type[j]++, SXP_STR, i, j));
case SXP_STR: case SXP_NULL: case_SXP_STR:
Field(1);
if (type[j]==SXP_STR) {
SET_STRING_ELT(thiscol, i, (is_no_encoding) ?
mkCharLen(fieldStart, fieldLen) : mkCharLenCE(fieldStart, fieldLen, ienc));
}
if (type[j]==SXP_STR) SET_STRING_ELT(thiscol, i, mkCharLenCE(fieldStart, fieldLen, ienc));
}
if (ch<eof && *ch==sep && j<ncol-1) {ch++; continue;} // done, next field
if (j<ncol-1) {
Expand Down

0 comments on commit e592419

Please sign in to comment.