Skip to content

Commit

Permalink
Update HDFS deletion command, and run GBM before DRF.
Browse files Browse the repository at this point in the history
  • Loading branch information
arnocandel committed Jan 12, 2015
1 parent e8c97e3 commit 28c4990
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions R/examples/manycols.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ library(h2o)
#wget http://h2o-release.s3.amazonaws.com/h2o/h2o-parsemanycols/8/h2o-2.9.0.8.zip
#unzip h2o-2.9.0.8.zip
#cd h2o-2.9.0.8/hadoop
#hadoop fs -rmr myDir
#hadoop fs -rm -r myDir
#hadoop jar h2odriver_hdp2.1.jar water.hadoop.h2odriver -libjars ../h2o.jar -n 8 -mapperXmx 40g -output myDir -baseport 61111 -data_max_factor_levels 65000 -chunk_bits 24

h2oCluster <- h2o.init(ip="mr-0xd1", port=61111)
Expand Down Expand Up @@ -40,10 +40,10 @@ mdl.glm
pred.glm <- h2o.predict(mdl.glm, valid)
h2o.performance(pred.glm[,3], valid[,response], measure="F1")

# Random Forest
mdl.rf <- h2o.randomForest(x=predictors, y=response, data=train, validation=valid, type="BigData", depth=15, importance=T, balance.classes = T, class.sampling.factors = c(1,250))
mdl.rf

# Gradient Boosted Trees
mdl.gbm <- h2o.gbm(x=predictors, y=response, data=train, validation=valid, importance=T, balance.classes = T, class.sampling.factors = c(1,250))
mdl.gbm

# Random Forest
mdl.rf <- h2o.randomForest(x=predictors, y=response, data=train, validation=valid, type="BigData", depth=15, importance=T, balance.classes = T, class.sampling.factors = c(1,250))
mdl.rf

0 comments on commit 28c4990

Please sign in to comment.