diff --git a/src/operator/nn/cudnn/cudnn_convolution-inl.h b/src/operator/nn/cudnn/cudnn_convolution-inl.h index 056f93b5237c..c5beb8a9c575 100644 --- a/src/operator/nn/cudnn/cudnn_convolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_convolution-inl.h @@ -212,16 +212,6 @@ class CuDNNConvolutionOp { typename DataType::ScaleType alpha = 1.0f; typename DataType::ScaleType beta = 0.0f; typename DataType::ScaleType beta_add = 1.0f; - if (!param_.no_bias && (req[conv::kBias] != kNullOp)) { - Tensor gbias = in_grad[conv::kBias].get(s); - CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, - &alpha, - out_desc_, - grad_ptr, - req[conv::kBias] == kAddTo ? &beta_add : &beta, - bias_desc_, - gbias.dptr_)); - } if (req[conv::kWeight] != kNullOp) { CHECK_EQ(add_to_weight_, req[conv::kWeight] == kAddTo); CUDNN_CALL(cudnnConvolutionBackwardFilter(s->dnn_handle_, @@ -238,6 +228,16 @@ class CuDNNConvolutionOp { filter_desc_, gwmat_ptr)); } + if (!param_.no_bias && (req[conv::kBias] != kNullOp)) { + Tensor gbias = in_grad[conv::kBias].get(s); + CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr, + req[conv::kBias] == kAddTo ? &beta_add : &beta, + bias_desc_, + gbias.dptr_)); + } if (req[conv::kData] != kNullOp) { CUDNN_CALL(cudnnConvolutionBackwardData(s_dgrad.GetStream()->dnn_handle_, &alpha, @@ -459,13 +459,14 @@ class CuDNNConvolutionOp { if (!param_.no_bias) { mxnet::TShape bias = in_shape[conv::kBias]; + int bias_dim = static_cast(bias[0]); std::vector bias_shape = {1, - static_cast(bias[0]), + bias_dim, 1, 1}; - std::vector bias_stride = {static_cast(bias[0]), 1, 1, 1}; + std::vector bias_stride = {bias_dim, 1, bias_dim, bias_dim}; if (param_.kernel.ndim() == 3) { bias_shape.push_back(1); - bias_stride.push_back(1); + bias_stride.push_back(bias_dim); } CUDNN_CALL(cudnnSetTensorNdDescriptor(bias_desc_, dtype_, diff --git a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h index b701883366ec..4f025113a45e 100644 --- a/src/operator/nn/cudnn/cudnn_deconvolution-inl.h +++ b/src/operator/nn/cudnn/cudnn_deconvolution-inl.h @@ -201,16 +201,6 @@ class CuDNNDeconvolutionOp { req[deconv::kData] == kAddTo ? 1.0f : 0.0f; typename DataType::ScaleType weight_beta = req[deconv::kWeight] == kAddTo ? 1.0f : 0.0f; - if (!param_.no_bias && (req[deconv::kBias] != kNullOp)) { - Tensor gbias = in_grad[deconv::kBias].get(s); - CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, - &alpha, - out_desc_, - grad_ptr + out_offset_ * g, - &bias_beta, - bias_desc_, - gbias.dptr_ + bias_offset_ * g)); - } if (req[deconv::kWeight] != kNullOp) { CHECK_EQ(add_to_weight_, req[deconv::kWeight] == kAddTo); CUDNN_CALL(cudnnConvolutionBackwardFilter( @@ -228,6 +218,16 @@ class CuDNNDeconvolutionOp { filter_desc_, gwmat_ptr + weight_offset_ * g)); } + if (!param_.no_bias && (req[deconv::kBias] != kNullOp)) { + Tensor gbias = in_grad[deconv::kBias].get(s); + CUDNN_CALL(cudnnConvolutionBackwardBias(s->dnn_handle_, + &alpha, + out_desc_, + grad_ptr + out_offset_ * g, + &bias_beta, + bias_desc_, + gbias.dptr_ + bias_offset_ * g)); + } if (req[deconv::kData] != kNullOp) { CUDNN_CALL(cudnnConvolutionForward(s->dnn_handle_, &alpha, @@ -460,13 +460,14 @@ class CuDNNDeconvolutionOp { if (!param_.no_bias) { mxnet::TShape bias = in_shape[deconv::kBias]; bias_offset_ = bias[0] / param_.num_group; + int bias_dim = static_cast(bias_offset_); std::vector bias_shape = {1, - static_cast(bias[0] / param_.num_group), + bias_dim, 1, 1}; - std::vector bias_stride = {static_cast(bias_offset_), 1, 1, 1}; + std::vector bias_stride = {bias_dim, 1, bias_dim, bias_dim}; if (param_.kernel.ndim() == 3) { bias_shape.push_back(1); - bias_stride.push_back(1); + bias_stride.push_back(bias_dim); } CUDNN_CALL(cudnnSetTensorNdDescriptor(bias_desc_, dtype_,