Skip to content

Commit

Permalink
A few fixes (apache#163)
Browse files Browse the repository at this point in the history
* fix batch norm gpu kernel. register random operators on gpu

* register sparse random op on gpu, too
  • Loading branch information
eric-haibin-lin authored Aug 11, 2017
1 parent ceca9b6 commit 1c60a05
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 24 deletions.
4 changes: 2 additions & 2 deletions src/operator/batch_norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ __global__ void BatchNormalizationUpdateOutputKernel(
}

// Write normalized and update the output
const AccReal gamma = weight.numElements() > 0
const AccReal gamma = ((flags & FIX_GAMMA_FLAG) == 0 && weight.numElements() > 0)
? ScalarConvert<DType, AccReal>::to(weight[plane])
: ScalarConvert<int, AccReal>::to(1);
const AccReal beta = bias.numElements() > 0 ? ScalarConvert<DType, AccReal>::to(bias[plane])
Expand Down Expand Up @@ -332,7 +332,7 @@ static __global__ void BatchNormalizationBackwardKernel(
invstd = VARIANCE_TO_INVSTD(tensors.runningVar[plane], eps);
}

const AccReal weightVal = tensors.weight.numElements() > 0 ?
const AccReal weightVal = ((flags & FIX_GAMMA_FLAG) == 0 && tensors.weight.numElements() > 0) ?
ScalarConvert<DType, AccReal>::to(tensors.weight[plane]) : AccReal(1);
const AccReal norm = AccReal(1) / N;

Expand Down
60 changes: 40 additions & 20 deletions src/operator/random/sample_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,20 @@ namespace op {

// GPU versions of uniform and normal distribution.
template<>
void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
void SampleUniformDnsImpl<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const OpReqType& req,
TBlob* output) {
using namespace mxnet::op;
using namespace mshadow::expr;
typedef gpu xpu;
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const SampleUniformParam& param = nnvm::get<SampleUniformParam>(attrs.parsed);
mshadow::Random<xpu, float> *prnd = ctx.requested[0].get_random<xpu, float>(s);
if (outputs[0].type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
if (output->type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(output->type_flag_, DType, {
// Not float32: use workspace and copy to output
mshadow::Tensor<xpu, 2, DType> out = outputs[0].FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 2, DType> out = output->FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 1, float> workspace =
ctx.requested[1].get_space_typed<xpu, 1, float>
(mshadow::Shape1(out.shape_.Size()), s);
Expand All @@ -51,27 +50,36 @@ void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
});
} else {
// float32: write directly into output
mshadow::Tensor<xpu, 2, float> out = outputs[0].FlatTo2D<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> out = output->FlatTo2D<xpu, float>(s);
prnd->SampleUniform(&out, param.low, param.high);
}
}

template<>
void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
void SampleUniform_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
TBlob out = outputs[0];
SampleUniformDnsImpl<gpu>(attrs, ctx, req[0], &out);
}

template<>
void SampleNormalDnsImpl<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const OpReqType& req,
TBlob* output) {
using namespace mxnet::op;
using namespace mshadow::expr;
typedef gpu xpu;
mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
const SampleNormalParam& param = nnvm::get<SampleNormalParam>(attrs.parsed);
mshadow::Random<xpu, float> *prnd = ctx.requested[0].get_random<xpu, float>(s);
if (outputs[0].type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
if (output->type_flag_ != mshadow::kFloat32) {
MSHADOW_REAL_TYPE_SWITCH(output->type_flag_, DType, {
// Not float32: use workspace and copy to output
mshadow::Tensor<xpu, 2, DType> out = outputs[0].FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 2, DType> out = output->FlatTo2D<xpu, DType>(s);
mshadow::Tensor<xpu, 1, float> workspace =
ctx.requested[1].get_space_typed<xpu, 1, float>
(mshadow::Shape1(out.shape_.Size()), s);
Expand All @@ -80,16 +88,28 @@ void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
});
} else {
// float32: write directly into output
mshadow::Tensor<xpu, 2, float> out = outputs[0].FlatTo2D<xpu, float>(s);
mshadow::Tensor<xpu, 2, float> out = output->FlatTo2D<xpu, float>(s);
prnd->SampleGaussian(&out, param.loc, param.scale);
}
}

template<>
void SampleNormal_<gpu>(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
TBlob out = outputs[0];
SampleNormalDnsImpl<gpu>(attrs, ctx, req[0], &out);
}

NNVM_REGISTER_OP(random_uniform)
.set_attr<FCompute>("FCompute<gpu>", SampleUniform_<gpu>);
.set_attr<FCompute>("FCompute<gpu>", SampleUniform_<gpu>)
.set_attr<FComputeEx>("FComputeEx<gpu>", SampleUniformEx_<gpu>);

NNVM_REGISTER_OP(random_normal)
.set_attr<FCompute>("FCompute<gpu>", SampleNormal_<gpu>);
.set_attr<FCompute>("FCompute<gpu>", SampleNormal_<gpu>)
.set_attr<FComputeEx>("FComputeEx<gpu>", SampleNormalEx_<gpu>);

} // namespace op
} // namespace mxnet
3 changes: 1 addition & 2 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,7 +867,6 @@ def check_batchnorm_training(stype):
rolling_mean = np.random.uniform(size=s)
rolling_std = np.random.uniform(size=s)

stype = 'row_sparse'
data = mx.symbol.Variable('data', stype=stype)
in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(gamma).tostype(stype),
mx.nd.array(beta).tostype(stype)]
Expand Down Expand Up @@ -935,7 +934,7 @@ def check_batchnorm_training(stype):
test = mx.symbol.BatchNorm(data, fix_gamma=False, use_global_stats=True, axis=chaxis)
check_numeric_gradient(test, in_location, xmean_std, numeric_eps=1e-2, rtol=0.2, atol=0.01)

stypes = ['row_sparse', 'csr', 'default']
stypes = ['row_sparse', 'default']
for stype in stypes:
check_batchnorm_training(stype)

Expand Down
4 changes: 4 additions & 0 deletions tests/python/unittest/test_sparse_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ def test_sparse_nd_output_fallback():
assert(np.sum(out.asnumpy()) != 0)

def test_sparse_nd_random():
""" test sparse random operator on cpu """
# gpu random operator doesn't use fixed seed
if default_context().device_type is 'gpu':
return
shape = (100, 100)
fns = [mx.nd.random_uniform, mx.nd.random_normal, mx.nd.random_gamma]
for fn in fns:
Expand Down

0 comments on commit 1c60a05

Please sign in to comment.