Skip to content

Commit

Permalink
Merge pull request apache#1086 from thirdwing/master
Browse files Browse the repository at this point in the history
[R] misc updates: README and comments for ndsb2
  • Loading branch information
tqchen committed Dec 27, 2015
2 parents ac38dc4 + ff983c7 commit 2645bac
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 29 deletions.
10 changes: 6 additions & 4 deletions R-package/R/mxnet_generated.R
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ mx.io.CSVIter <- function(...) {
#' Batch Param: Batch size.
#' @param round.batch boolean, optional, default=True
#' Batch Param: Use round robin to handle overflow batch.
#' @param prefetch.buffer , optional, default=4
#' @param prefetch.buffer long (non-negative), optional, default=4
#' Backend Param: Number of prefetched parameters
#' @param rand.crop boolean, optional, default=False
#' Augmentation Param: Whether to random crop on the image
Expand Down Expand Up @@ -284,9 +284,11 @@ mx.io.CSVIter <- function(...) {
#' @param mean.r float, optional, default=0
#' Augmentation Param: Mean value on R channel.
#' @param mean.g float, optional, default=0
#' Augmentation: Mean value on G channel.
#' Augmentation Param: Mean value on G channel.
#' @param mean.b float, optional, default=0
#' Augmentation: Mean value on B channel.
#' Augmentation Param: Mean value on B channel.
#' @param mean.a float, optional, default=0
#' Augmentation Param: Mean value on Alpha channel.
#' @param scale float, optional, default=1
#' Augmentation Param: Scale in color space.
#' @param max.random.contrast float, optional, default=0
Expand Down Expand Up @@ -320,7 +322,7 @@ mx.io.ImageRecordIter <- function(...) {
#' partition the data into multiple parts
#' @param part.index int, optional, default='0'
#' the index of the part will read
#' @param prefetch.buffer , optional, default=4
#' @param prefetch.buffer long (non-negative), optional, default=4
#' Backend Param: Number of prefetched parameters
#' @return iter The result mx.dataiter
#'
Expand Down
9 changes: 6 additions & 3 deletions R-package/man/mx.io.ImageRecordIter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion R-package/man/mx.io.MNISTIter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 2 additions & 12 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
# _*_ mode: makefile; _*_
PKGROOT=../../

# This file is only used for compilation from github
# It will be replaced by more formal Rpackage structure
# Where PKGROOT moved to root directory

.PHONY: all mxnet
all: $(SHLIB)


PKG_CPPFLAGS = -I$(PKGROOT)/include -I$(PKGROOT)/dmlc-core/include
PKG_LIBS = -L../inst/libs/x64/ -llibmxnet
PKG_CPPFLAGS = -I../inst/include
PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) -L../inst/libs/x64/ -llibmxnet
11 changes: 4 additions & 7 deletions example/kaggle-ndsb2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,14 @@ Prepare raw data in ```data``` folder. The tree of ```data``` folder is like

2. Run ```python3 Preprocessing.py``` to do preprocessing of data.
3. After we have the processed data, run ```python3 Train.py``` to generate ```submission.csv```

4. We also provide the R code with the same network structure and parameters in ```Train.R```. Right now it used the pre-processed csv files by ```Preprocessing.py```. We will add the pre-processing R code later.

Note:
- To run with python2, you need to change ```Train.py, line #139``` to the python2 syntax.
- To modify network, change ```get_lenet``` function in ```Train.py```
- To modify network, change ```get_lenet``` function in ```Train.py``` or ```get.lenet``` function in ```Train.R```.
- We also provide ```local_train```, ```local_test``` file for local parameter tuning.
- To run on multiple GPU with huge network, or questions about saving network paramter etc, please refer [MXNet docs](https://mxnet.readthedocs.org/en/latest/)
- To run on multiple GPU with huge network, or questions about saving network parameters etc, please refer [MXNet docs](https://mxnet.readthedocs.org/en/latest/)


## About MXNet
MXNet is a deep learning framework designed for both efficiency and flexibility by DMLC group. Like all other packages in DMLC, it will fully utilize all the resources to solve the problem under limited resource constraint, with a flexible programming interface. You can use it for all purposes of data science and deep learning tasks with R, Julia, python and more. see



MXNet is a deep learning framework designed for both efficiency and flexibility by DMLC group. Like all other packages in DMLC, it will fully utilize all the resources to solve the problem under limited resource constraint, with a flexible programming interface. You can use it for all purposes of data science and deep learning tasks with R, Julia, python and more.
20 changes: 18 additions & 2 deletions example/kaggle-ndsb2/Train.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Train.R for Second Annual Data Science Bowl
# Deep learning model with GPU support
# Please refer to https://mxnet.readthedocs.org/en/latest/build.html#r-package-installation
# for installation guide

require(mxnet)
require(data.table)

##A lenet style net, takes difference of each frame as input.
get.lenet <- function() {
source <- mx.symbol.Variable("data")
source <- (source-128) / 128
Expand All @@ -27,15 +33,19 @@ get.lenet <- function() {
mx.symbol.Pooling(
net, pool.type = "max", kernel = c(2, 2), stride = c(2, 2)
)
# first fullc
flatten <- mx.symbol.Flatten(net)
flatten <- mx.symbol.Dropout(flatten)
fc1 <- mx.symbol.FullyConnected(data = flatten, num.hidden = 600)
# Name the final layer as softmax so it auto matches the naming of data iterator
# Otherwise we can also change the provide_data in the data iter
return(mx.symbol.LogisticRegressionOutput(data = fc1, name = 'softmax'))
}

network <- get.lenet()
batch_size <- 32

# CSVIter is uesed here, since the data can't fit into memory
data_train <- mx.io.CSVIter(
data.csv = "./train-64x64-data.csv", data.shape = c(64, 64, 30),
label.csv = "./train-stytole.csv", label.shape = 600,
Expand All @@ -48,6 +58,7 @@ data_validate <- mx.io.CSVIter(
batch.size = 1
)

# Custom evaluation metric on CRPS.
mx.metric.CRPS <- mx.metric.custom("CRPS", function(label, pred) {
pred <- as.array(pred)
label <- as.array(label)
Expand All @@ -61,6 +72,7 @@ mx.metric.CRPS <- mx.metric.custom("CRPS", function(label, pred) {
return(sum((label - pred) ^ 2) / length(label))
})

# Training the stytole net
mx.set.seed(0)
stytole_model <- mx.model.FeedForward.create(
X = data_train,
Expand All @@ -73,8 +85,10 @@ stytole_model <- mx.model.FeedForward.create(
eval.metric = mx.metric.CRPS
)

# Predict stytole
stytole_prob = predict(stytole_model, data_validate)

# Training the diastole net
network = get.lenet()
batch_size = 32
data_train <-
Expand All @@ -95,6 +109,7 @@ diastole_model = mx.model.FeedForward.create(
eval.metric = mx.metric.CRPS
)

# Predict diastole
diastole_prob = predict(diastole_model, data_validate)

accumulate_result <- function(validate_lst, prob) {
Expand All @@ -109,11 +124,12 @@ diastole_result = as.data.frame(accumulate_result("./validate-label.csv", diasto

train_csv <- read.table("./train-label.csv", sep = ',')

# we have 2 person missing due to frame selection, use udibr's hist result instead
doHist <- function(data) {
res <- rep(0, 600)
for (i in 1:length(data)) {
for (j in round(data[i]):600) {
res[j] = res[j] + 1
res[j] = res[j] + 1
}
}
return(res / length(data))
Expand Down Expand Up @@ -147,7 +163,7 @@ for (i in 1:nrow(res)) {
res[i, 2:601] <- hDiastole
} else {
res[i, 2:601] <- hSystole
}
}
}
}

Expand Down

0 comments on commit 2645bac

Please sign in to comment.