Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Merge pull request #85 from antinucleon/master
Browse files Browse the repository at this point in the history
Add rrelu, dropout
  • Loading branch information
antinucleon committed Sep 16, 2015
2 parents 43afd11 + 2512e2a commit 82450ad
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 32 deletions.
36 changes: 17 additions & 19 deletions src/operator/cudnn_convolution-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class CuDNNConvolutionOp : public Operator {
Init(s, in_data, out_data);
}
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(workspace_), s);
mshadow::Shape1(forward_workspace_), s);
CHECK_EQ(cudnnConvolutionForward(s->dnn_handle_,
&alpha,
in_desc_,
Expand All @@ -65,7 +65,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
algo_,
workspace.dptr_,
workspace_size_,
forward_workspace_byte_,
&beta,
out_desc_,
out.dptr_), CUDNN_STATUS_SUCCESS);
Expand Down Expand Up @@ -106,7 +106,7 @@ class CuDNNConvolutionOp : public Operator {
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 4> gdata = in_grad[kData].get<gpu, 4, real_t>(s);
Tensor<gpu, 1> workspace = ctx.requested[kTempSpace].get_space<gpu>(
mshadow::Shape1(workspace_), s);
mshadow::Shape1(backward_workspace_), s);
if (!param_.no_bias) {
Tensor<gpu, 1> gbias = in_grad[kBias].get<gpu, 1, real_t>(s);
CHECK_EQ(cudnnConvolutionBackwardBias(s->dnn_handle_,
Expand All @@ -126,7 +126,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_w_,
workspace.dptr_,
workspace_size_,
backward_workspace_byte_,
&beta,
filter_desc_,
gwmat.dptr_), CUDNN_STATUS_SUCCESS);
Expand All @@ -139,7 +139,7 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
back_algo_,
workspace.dptr_,
workspace_size_,
backward_workspace_byte_,
&beta,
in_desc_,
gdata.dptr_), CUDNN_STATUS_SUCCESS);
Expand All @@ -155,7 +155,7 @@ class CuDNNConvolutionOp : public Operator {
CHECK_EQ(out_data.size(), 1);
if (!init_cudnn_) {
init_cudnn_ = true;
size_t workspace = static_cast<size_t>(param_.workspace * sizeof(real_t));
size_t workspace_byte = static_cast<size_t>(param_.workspace * sizeof(real_t));
size_t back_size = 0;
size_t back_size_w = 0;
Tensor<gpu, 4> data = in_data[kData].get<gpu, 4, real_t>(s);
Expand Down Expand Up @@ -210,23 +210,23 @@ class CuDNNConvolutionOp : public Operator {
conv_desc_,
out_desc_,
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
workspace,
workspace_byte,
&algo_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardFilterAlgorithm(s->dnn_handle_,
in_desc_,
out_desc_,
conv_desc_,
filter_desc_,
CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
workspace,
workspace_byte,
&back_algo_w_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardDataAlgorithm(s->dnn_handle_,
filter_desc_,
out_desc_,
conv_desc_,
in_desc_,
CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
workspace,
workspace_byte,
&back_algo_), CUDNN_STATUS_SUCCESS);
CHECK_EQ(cudnnGetConvolutionBackwardDataWorkspaceSize(s->dnn_handle_,
filter_desc_,
Expand All @@ -242,26 +242,24 @@ class CuDNNConvolutionOp : public Operator {
filter_desc_,
back_algo_w_,
&back_size_w), CUDNN_STATUS_SUCCESS);
back_size = std::max(back_size, back_size_w);
backward_workspace_byte_ = std::max(back_size, back_size_w);
CHECK_EQ(cudnnGetConvolutionForwardWorkspaceSize(s->dnn_handle_,
in_desc_,
filter_desc_,
conv_desc_,
out_desc_,
algo_,
&workspace), CUDNN_STATUS_SUCCESS);
workspace = std::max(workspace, back_size);
CHECK_GE(param_.workspace * sizeof(real_t), workspace + sizeof(real_t))
<< "\nMinimum workspace: " << workspace << "\n"
<< "Given: " << param_.workspace * sizeof(real_t);
workspace_ = workspace / sizeof(real_t) + 1;
workspace_size_ = workspace_ * sizeof(real_t);
&forward_workspace_byte_), CUDNN_STATUS_SUCCESS);
forward_workspace_ = forward_workspace_byte_ / sizeof(real_t) + 1;
backward_workspace_ = backward_workspace_byte_ / sizeof(real_t) + 1;
}
}

bool init_cudnn_;
size_t workspace_;
size_t workspace_size_;
size_t forward_workspace_;
size_t backward_workspace_;
size_t forward_workspace_byte_;
size_t backward_workspace_byte_;
cudnnDataType_t dtype_;
cudnnTensorDescriptor_t in_desc_;
cudnnTensorDescriptor_t out_desc_;
Expand Down
192 changes: 192 additions & 0 deletions src/operator/dropout-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout-inl.h
* \brief
* \author Bing Xu
*/

#ifndef MXNET_OPERATOR_DROPOUT_INL_H_
#define MXNET_OPERATOR_DROPOUT_INL_H_
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <mxnet/operator.h>
#include <map>
#include <vector>
#include <string>
#include <utility>
#include "./operator_common.h"
#include "./mshadow_op.h"

enum DropoutOpInputs {kData};
enum DropoutOpOutputs {kOut, kMask};
enum DropoutOpForwardResource {kRandom};

namespace mxnet {
namespace op {

struct DropoutParam : public dmlc::Parameter<DropoutParam> {
float p;
DMLC_DECLARE_PARAMETER(DropoutParam) {
DMLC_DECLARE_FIELD(p).set_default(0.5)
.set_range(0, 1)
.describe("Fraction of the input that gets dropped out at training time");
}
}; // struct DropoutParam

template<typename xpu>
class DropoutOp : public Operator {
public:
explicit DropoutOp(DropoutParam param) {
this->pkeep_ = 1.0f - param.p;
}

virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_states) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 1);
if (ctx.is_train) {
CHECK_EQ(out_data.size(), 2);
}
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> data, out, mask;
if (in_data[kData].ndim() == 2) {
uint32_t ds[] = {in_data[kData].shape_[0], in_data[kData].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
data = in_data[kData].get_with_shape<xpu, 4, real_t>(dshape, s);
out = out_data[kOut].get_with_shape<xpu, 4, real_t>(dshape, s);
if (ctx.is_train) {
mask = out_data[kMask].get_with_shape<xpu, 4, real_t>(dshape, s);
}
} else {
data = in_data[kData].get<xpu, 4, real_t>(s);
out = out_data[kOut].get<xpu, 4, real_t>(s);
if (ctx.is_train) {
mask = out_data[kMask].get<xpu, 4, real_t>(s);
}
}
if (ctx.is_train) {
Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu>(s);
mask = F<mshadow_op::threshold>(prnd->uniform(mask.shape_), pkeep_) * (1.0f / pkeep_);
Assign(out, req[kOut], data * mask);
} else {
Assign(out, req[kOut], data + 0.0f);
}
}

virtual void Backward(const OpContext &ctx,
const std::vector<TBlob> &out_grad,
const std::vector<TBlob> &in_data,
const std::vector<TBlob> &out_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &in_grad,
const std::vector<TBlob> &aux_states) {
using namespace mshadow;
using namespace mshadow::expr;
CHECK_EQ(out_grad.size(), 1);
CHECK_EQ(in_grad.size(), 1);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 4> grad, gdata, mask;
if (out_grad[kOut].ndim() == 2) {
uint32_t ds[] = {out_grad[kOut].shape_[0], out_grad[kOut].shape_[1], 1, 1};
TShape dshape(ds, ds + 4);
gdata = in_grad[kData].get_with_shape<xpu, 4, real_t>(dshape, s);
grad = out_grad[kOut].get_with_shape<xpu, 4, real_t>(dshape, s);
mask = out_data[kMask].get_with_shape<xpu, 4, real_t>(dshape, s);
} else {
grad = out_grad[kOut].get<xpu, 4, real_t>(s);
gdata = in_grad[kData].get<xpu, 4, real_t>(s);
mask = out_data[kMask].get<xpu, 4, real_t>(s);
}
Assign(gdata, req[kData], grad * mask);
}

private:
real_t pkeep_;
}; // class DropoutOp


template<typename xpu>
Operator *CreateOp(DropoutParam param);

#if DMLC_USE_CXX11
class DropoutProp : public OperatorProperty {
public:
void Init(const std::vector<std::pair<std::string, std::string> >& kwargs) override {
param_.Init(kwargs);
}

bool InferShape(std::vector<TShape> *in_shape,
std::vector<TShape> *out_shape,
std::vector<TShape> *aux_shape) const override {
using namespace mshadow;
CHECK_EQ(in_shape->size(), 1);
const TShape &dshape = in_shape->at(0);
if (dshape.ndim() == 0) return false;
out_shape->clear();
out_shape->push_back(dshape);
out_shape->push_back(dshape);
return true;
}

OperatorProperty* Copy() const override {
auto ptr = new DropoutProp();
ptr->param_ = param_;
return ptr;
}

std::string TypeString() const override {
return "Dropout";
}

std::vector<int> DeclareBackwardDependency(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data) const override {
return {out_grad[kOut], out_data[kMask]};
}

std::vector<std::pair<int, void*> > BackwardInplaceOption(
const std::vector<int> &out_grad,
const std::vector<int> &in_data,
const std::vector<int> &out_data,
const std::vector<void*> &in_grad) const override {
return {{out_grad[kOut], in_grad[kData]}};
}

std::vector<std::pair<int, void*> > ForwardInplaceOption(
const std::vector<int> &in_data,
const std::vector<void*> &out_data) const override {
return {{in_data[kData], out_data[kOut]}};
}

std::vector<ResourceRequest> ForwardResource(
const std::vector<TShape> &in_shape) const override {
return {ResourceRequest::kRandom};
}

int NumVisibleOutputs() const override {
return 1;
}

int NumOutputs() const override {
return 2;
}

std::vector<std::string> ListOutputs() const override {
return {"output", "mask"};
}

Operator* CreateOperator(Context ctx) const;

private:
DropoutParam param_;
}; // class DropoutProp
#endif // DMLC_USE_CXX11
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_DROPOUT_INL_H_

32 changes: 32 additions & 0 deletions src/operator/dropout.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout.cc
* \brief
* \author Bing Xu
*/

#include "./dropout-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<cpu>(DropoutParam param) {
return new DropoutOp<cpu>(param);
}

// DO_BIND_DISPATCH comes from operator_common.h
Operator *DropoutProp::CreateOperator(Context ctx) const {
DO_BIND_DISPATCH(CreateOp, param_);
}

DMLC_REGISTER_PARAMETER(DropoutParam);

MXNET_REGISTER_OP_PROPERTY(Dropout, DropoutProp)
.describe("Apply dropout to input")
.add_argument("data", "Symbol", "Input data to dropout.")
.add_arguments(DropoutParam::__FIELDS__());

} // namespace op
} // namespace mxnet


19 changes: 19 additions & 0 deletions src/operator/dropout.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*!
* Copyright (c) 2015 by Contributors
* \file dropout.cc
* \brief
* \author Bing Xu
*/

#include "./dropout-inl.h"

namespace mxnet {
namespace op {
template<>
Operator *CreateOp<gpu>(DropoutParam param) {
return new DropoutOp<gpu>(param);
}
} // namespace op
} // namespace mxnet


Loading

0 comments on commit 82450ad

Please sign in to comment.