Skip to content

Commit

Permalink
Raise default value of -data_max_factor_levels from 65k to 1M.
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Jan 19, 2015
1 parent 362f0de commit 452392e
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions R/h2o-package/R/Wrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# 3) If user does want to start H2O, but running non-locally, print an error
h2o.init <- function(ip = "127.0.0.1", port = 54321, startH2O = TRUE, forceDL = FALSE, Xmx,
beta = FALSE, assertion = TRUE, license = NULL, nthreads = -2, max_mem_size = NULL, min_mem_size = NULL,
ice_root = NULL, strict_version_check = TRUE, data_max_factor_levels = 65000, many_cols = FALSE, chunk_bytes = 22) {
ice_root = NULL, strict_version_check = TRUE, data_max_factor_levels = 1000000, many_cols = FALSE, chunk_bytes = 22) {
if(!is.character(ip)) stop("ip must be of class character")
if(!is.numeric(port)) stop("port must be of class numeric")
if(!is.logical(startH2O)) stop("startH2O must be of class logical")
Expand Down Expand Up @@ -262,7 +262,7 @@ h2o.clusterStatus <- function(client) {
.h2o.startJar <- function(nthreads = -1, max_memory = NULL,
min_memory = NULL, beta = FALSE,
assertion = TRUE, forceDL = FALSE,
license = NULL, ice_root, max_factor_levels = 65000,
license = NULL, ice_root, max_factor_levels = 1000000,
many_cols = FALSE, chunk_bytes = 22) {
command <- .h2o.checkJava()

Expand Down
4 changes: 2 additions & 2 deletions R/h2o-package/man/h2o.init.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Connects to a running H2O instance and checks the local H2O R package is the cor
h2o.init(ip = "127.0.0.1", port = 54321, startH2O = TRUE, forceDL = FALSE, Xmx,
beta = FALSE, assertion = TRUE, license = NULL,
nthreads = -2, max_mem_size, min_mem_size,
ice_root = NULL, strict_version_check = TRUE, data_max_factor_levels = 65000,
ice_root = NULL, strict_version_check = TRUE, data_max_factor_levels = 1000000,
many_cols = FALSE, chunk_bytes = 22)}

\arguments{
Expand All @@ -24,7 +24,7 @@ h2o.init(ip = "127.0.0.1", port = 54321, startH2O = TRUE, forceDL = FALSE, Xmx,
\item{min_mem_size}{(Optional) A string specifying the minimum size, in bytes, of the memory allocation pool to H2O. This value must a multiple of 1024 greater than 2MB. Append the letter m or M to indicate megabytes, or g or G to indicate gigabytes. This value is only used when R starts H2O. }
\item{ice_root}{(Optional) A directory specifying where H2O should write log files and spill to disk (if needed). Default is tempdir(). This value is only used when R starts H2O.}
\item{strict_version_check}{(Optional) Setting this to FALSE is unsupported and should only be done when advised by technical support.}
\item{data_max_factor_levels}{(Optional) The limit for the number of factor levels that may appear in a single column. Default is 65,000.}
\item{data_max_factor_levels}{(Optional) The limit for the number of factor levels that may appear in a single column. Default is 1,000,000.}
\item{many_cols}{(Optional) Enables improved handling of high-dimensional datasets. Same as -chunk_bytes 24.}
\item{chunk_bytes}{(Optional) Not in combination with -many_cols. The log (base 2) of chunk size in bytes. (The default is 22, which leads to a chunk size of 4.0 MB.).}
}
Expand Down
2 changes: 1 addition & 1 deletion hadoop/src/main/java/water/hadoop/h2odriver.java
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ static void usage() {
" [-baseport <starting HTTP port for H2O nodes; default is 54321>]\n" +
" [-many_cols] (improve handling of high-dimensional datasets, same as -chunk_bytes 24)\n" +
" [-chunk_bytes <log (base 2) of chunk size in bytes (e.g., default is 22 for 4MB chunks)>]\n" +
" [-data_max_factor_levels <max. number of factors per column (e.g., default is 65000)>]\n" +
" [-data_max_factor_levels <max. number of factors per column (e.g., default is 1,000,000)>]\n" +
" [-ea]\n" +
" [-verbose:gc]\n" +
" [-XX:+PrintGCDetails]\n" +
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/water/H2O.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public final class H2O {
public static boolean SINGLE_PRECISION = false;

// Max. number of factor levels ber column (before flipping all to NAs)
public static int DATA_MAX_FACTOR_LEVELS = 65000;
public static int DATA_MAX_FACTOR_LEVELS = 1000000;

public static int LOG_CHK = 22; // Chunks are 1<<22, or 4Meg

Expand Down

0 comments on commit 452392e

Please sign in to comment.