Merge pull request apache#1086 from thirdwing/master

[R] misc updates: README and comments for ndsb2
LoneStarAI · Dec 27, 2015 · 2645bac · 2645bac
2 parents ac38dc4 + ff983c7
commit 2645bac
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 29 deletions.
diff --git a/R-package/R/mxnet_generated.R b/R-package/R/mxnet_generated.R
@@ -243,7 +243,7 @@ mx.io.CSVIter <- function(...) {
 #'     Batch Param: Batch size.
 #' @param round.batch  boolean, optional, default=True
 #'     Batch Param: Use round robin to handle overflow batch.
-#' @param prefetch.buffer  , optional, default=4
+#' @param prefetch.buffer  long (non-negative), optional, default=4
 #'     Backend Param: Number of prefetched parameters
 #' @param rand.crop  boolean, optional, default=False
 #'     Augmentation Param: Whether to random crop on the image
@@ -284,9 +284,11 @@ mx.io.CSVIter <- function(...) {
 #' @param mean.r  float, optional, default=0
 #'     Augmentation Param: Mean value on R channel.
 #' @param mean.g  float, optional, default=0
-#'     Augmentation: Mean value on G channel.
+#'     Augmentation Param: Mean value on G channel.
 #' @param mean.b  float, optional, default=0
-#'     Augmentation: Mean value on B channel.
+#'     Augmentation Param: Mean value on B channel.
+#' @param mean.a  float, optional, default=0
+#'     Augmentation Param: Mean value on Alpha channel.
 #' @param scale  float, optional, default=1
 #'     Augmentation Param: Scale in color space.
 #' @param max.random.contrast  float, optional, default=0
@@ -320,7 +322,7 @@ mx.io.ImageRecordIter <- function(...) {
 #'     partition the data into multiple parts
 #' @param part.index  int, optional, default='0'
 #'     the index of the part will read
-#' @param prefetch.buffer  , optional, default=4
+#' @param prefetch.buffer  long (non-negative), optional, default=4
 #'     Backend Param: Number of prefetched parameters
 #' @return iter The result mx.dataiter
 #' 

diff --git a/R-package/man/mx.io.ImageRecordIter.Rd b/R-package/man/mx.io.ImageRecordIter.Rd
diff --git a/R-package/man/mx.io.MNISTIter.Rd b/R-package/man/mx.io.MNISTIter.Rd
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
@@ -1,13 +1,3 @@
-# _*_ mode: makefile; _*_
-PKGROOT=../../
 
-# This file is only used for compilation from github
-# It will be replaced by more formal Rpackage structure
-# Where PKGROOT moved to root directory
-
-.PHONY: all mxnet
-all: $(SHLIB)
-
-
-PKG_CPPFLAGS = -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include
-PKG_LIBS = -L../inst/libs/x64/ -llibmxnet
+PKG_CPPFLAGS = -I../inst/include
+PKG_LIBS =  $(LAPACK_LIBS) $(BLAS_LIBS) -L../inst/libs/x64/ -llibmxnet
diff --git a/example/kaggle-ndsb2/README.md b/example/kaggle-ndsb2/README.md
@@ -53,17 +53,14 @@ Prepare raw data in ```data``` folder. The tree of ```data``` folder is like
 
 2. Run ```python3 Preprocessing.py``` to do preprocessing of data.
 3. After we have the processed data, run ```python3 Train.py``` to generate ```submission.csv```
-
+4. We also provide the R code with the same network structure and parameters in ```Train.R```. Right now it used the pre-processed csv files by ```Preprocessing.py```. We will add the pre-processing R code later.
 
 Note:
 - To run with python2, you need to change ```Train.py, line #139``` to the python2 syntax.
-- To modify network, change ```get_lenet``` function in ```Train.py```
+- To modify network, change ```get_lenet``` function in ```Train.py``` or ```get.lenet``` function in ```Train.R```.
 - We also provide ```local_train```, ```local_test``` file for local parameter tuning.
-- To run on multiple GPU with huge network, or questions about saving network paramter etc, please refer [MXNet docs](https://mxnet.readthedocs.org/en/latest/)
+- To run on multiple GPU with huge network, or questions about saving network parameters etc, please refer [MXNet docs](https://mxnet.readthedocs.org/en/latest/)
 
 
 ## About MXNet
-MXNet is a deep learning framework designed for both efficiency and flexibility by DMLC group. Like all other packages in DMLC,  it will fully utilize all the resources to solve the problem under limited resource constraint, with a flexible programming interface. You can use it for all purposes of data science and deep learning tasks with R, Julia, python and more. see
-
-
-
+MXNet is a deep learning framework designed for both efficiency and flexibility by DMLC group. Like all other packages in DMLC, it will fully utilize all the resources to solve the problem under limited resource constraint, with a flexible programming interface. You can use it for all purposes of data science and deep learning tasks with R, Julia, python and more.
diff --git a/example/kaggle-ndsb2/Train.R b/example/kaggle-ndsb2/Train.R
@@ -1,6 +1,12 @@
+# Train.R for Second Annual Data Science Bowl
+# Deep learning model with GPU support
+# Please refer to https://mxnet.readthedocs.org/en/latest/build.html#r-package-installation
+# for installation guide
+
 require(mxnet)
 require(data.table)
 
+##A lenet style net, takes difference of each frame as input.
 get.lenet <- function() {
   source <- mx.symbol.Variable("data")
   source <- (source-128) / 128
@@ -27,15 +33,19 @@ get.lenet <- function() {
     mx.symbol.Pooling(
       net, pool.type = "max", kernel = c(2, 2), stride = c(2, 2)
     )
+  # first fullc
   flatten <- mx.symbol.Flatten(net)
   flatten <- mx.symbol.Dropout(flatten)
   fc1 <- mx.symbol.FullyConnected(data = flatten, num.hidden = 600)
+  # Name the final layer as softmax so it auto matches the naming of data iterator
+  # Otherwise we can also change the provide_data in the data iter
   return(mx.symbol.LogisticRegressionOutput(data = fc1, name = 'softmax'))
 }
 
 network <- get.lenet()
 batch_size <- 32
 
+# CSVIter is uesed here, since the data can't fit into memory
 data_train <- mx.io.CSVIter(
   data.csv = "./train-64x64-data.csv", data.shape = c(64, 64, 30),
   label.csv = "./train-stytole.csv", label.shape = 600,
@@ -48,6 +58,7 @@ data_validate <- mx.io.CSVIter(
   batch.size = 1
 )
 
+# Custom evaluation metric on CRPS.
 mx.metric.CRPS <- mx.metric.custom("CRPS", function(label, pred) {
   pred <- as.array(pred)
   label <- as.array(label)
@@ -61,6 +72,7 @@ mx.metric.CRPS <- mx.metric.custom("CRPS", function(label, pred) {
   return(sum((label - pred) ^ 2) / length(label))
 })
 
+# Training the stytole net
 mx.set.seed(0)
 stytole_model <- mx.model.FeedForward.create(
   X = data_train,
@@ -73,8 +85,10 @@ stytole_model <- mx.model.FeedForward.create(
   eval.metric = mx.metric.CRPS
 )
 
+# Predict stytole
 stytole_prob = predict(stytole_model, data_validate)
 
+# Training the diastole net
 network = get.lenet()
 batch_size = 32
 data_train <-
@@ -95,6 +109,7 @@ diastole_model = mx.model.FeedForward.create(
   eval.metric = mx.metric.CRPS
 )
 
+# Predict diastole
 diastole_prob = predict(diastole_model, data_validate)
 
 accumulate_result <- function(validate_lst, prob) {
@@ -109,11 +124,12 @@ diastole_result = as.data.frame(accumulate_result("./validate-label.csv", diasto
 
 train_csv <- read.table("./train-label.csv", sep = ',')
 
+# we have 2 person missing due to frame selection, use udibr's hist result instead
 doHist <- function(data) {
   res <- rep(0, 600)
   for (i in 1:length(data)) {
     for (j in round(data[i]):600) {
-      res[j] = res[j] + 1 
+      res[j] = res[j] + 1
     }
   }
   return(res / length(data))
@@ -147,7 +163,7 @@ for (i in 1:nrow(res)) {
       res[i, 2:601] <- hDiastole
     } else {
       res[i, 2:601] <- hSystole
-    }    
+    }
   }
 }