diff --git a/src/operator/optimizer_op-inl.h b/src/operator/optimizer_op-inl.h index 2df574c46909..291d98013d8a 100644 --- a/src/operator/optimizer_op-inl.h +++ b/src/operator/optimizer_op-inl.h @@ -225,10 +225,10 @@ struct MultiSGDKernelParam { template struct MultiSGDKernel { template - MSHADOW_XINLINE static void Map(int i, const MultiSGDKernelParam& param, + MSHADOW_XINLINE static void Map(index_t i, const MultiSGDKernelParam& param, const OpReqType req) { for (int index = 0; index < param.count; ++index) { - if ((size_t)i < param.sizes[index]) { + if (i < static_cast(param.sizes[index])) { MPDType w = has_mixed_precision ? param.weights32[index][i] : MPDType(param.weights[index][i]); MPDType mom = has_momentum ? param.mom[index][i] : MPDType(0); @@ -381,7 +381,7 @@ inline void MultiSGDMomUpdate(const nnvm::NodeAttrs& attrs, struct SGDKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data, const DType* grad_data, const DType param_clip_gradient, const DType param_lr, const DType param_wd, const DType param_rescale_grad, const OpReqType req) { @@ -429,9 +429,9 @@ struct SGDDnsRspKernel { // IType is row sparse idx type // i is the ith element in row sparse gradient template - MSHADOW_XINLINE static void Map(int i, const index_t row_length, DType* out, const DType* weight, - const IType* grad_idx, const DType *grad_val, - const DType clip_gradient, const DType lr, + MSHADOW_XINLINE static void Map(index_t i, const index_t row_length, DType* out, + const DType* weight, const IType* grad_idx, + const DType *grad_val, const DType clip_gradient, const DType lr, const DType wd, const DType rescale_grad) { using nnvm::dim_t; using namespace mshadow_op; @@ -457,9 +457,9 @@ struct SGDDnsRspKernel { // IType is row sparse idx type // i is the ith row in row sparse gradient template - MSHADOW_XINLINE static void Map(int i, const index_t row_length, DType* out, const DType* weight, - const IType* grad_idx, const DType *grad_val, - const DType clip_gradient, const DType lr, + MSHADOW_XINLINE static void Map(index_t i, const index_t row_length, DType* out, + const DType* weight, const IType* grad_idx, + const DType *grad_val, const DType clip_gradient, const DType lr, const DType wd, const DType rescale_grad) { for (index_t j = 0; j < row_length; j++) { index_t data_i = grad_idx[i] * row_length + j; @@ -600,10 +600,11 @@ struct SGDMomParam : public dmlc::Parameter { struct SGDMomKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data, const DType* weight_data, - const DType* grad_data, const DType param_clip_gradient, const DType param_momentum, - const DType param_lr, const DType param_wd, const DType param_rescale_grad, - const OpReqType req) { + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data, + const DType* weight_data, const DType* grad_data, + const DType param_clip_gradient, const DType param_momentum, + const DType param_lr, const DType param_wd, + const DType param_rescale_grad, const OpReqType req) { if (param_clip_gradient >= 0.0f) { mom_data[i] = param_momentum*mom_data[i] - param_lr*param_wd*weight_data[i] @@ -654,7 +655,7 @@ inline bool MP_InferType(const nnvm::NodeAttrs& attrs, struct MP_SGDKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data, const DType* grad_data, float* weight32, const float param_clip_gradient, const float param_lr, const float param_wd, const float param_rescale_grad, const OpReqType req) { @@ -698,7 +699,7 @@ inline void MP_SGDUpdate(const nnvm::NodeAttrs& attrs, struct MP_SGDMomKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, float* mom_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, float* mom_data, const DType* weight_data, const DType* grad_data, float* weight32, const float param_clip_gradient, const float param_momentum, const float param_lr, const float param_wd, const float param_rescale_grad, const OpReqType req) { @@ -749,7 +750,7 @@ struct SGDMomDnsRspDnsKernel; template struct SGDMomDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data, DType* mom_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType momentum, const DType lr, const DType wd, const DType rescale_grad) { @@ -776,7 +777,7 @@ struct SGDMomDnsRspDnsKernel { template struct SGDMomDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data, DType* mom_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType momentum, const DType lr, const DType wd, const DType rescale_grad) { @@ -1060,7 +1061,7 @@ struct NAGMomParam : public dmlc::Parameter { struct NAGMomKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data, const DType* weight_data, const DType* grad_data, const DType param_clip_gradient, const DType param_momentum, const DType param_lr, const DType param_wd, @@ -1107,7 +1108,7 @@ inline void NAGMomUpdate(const nnvm::NodeAttrs& attrs, struct MP_NAGMomKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, float* mom_data, const DType* weight_data, const DType* grad_data, float* weight32, const float param_clip_gradient, @@ -1204,7 +1205,7 @@ struct FTMLParam : public dmlc::Parameter { struct FTMLKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out, DType* weight, DType* grad, + MSHADOW_XINLINE static void Map(index_t i, DType* out, DType* weight, DType* grad, DType* d, DType* v, DType* z, const DType lr, const DType beta1, const DType beta2, const DType epsilon, const DType t, const DType wd, const DType rescale_grad, const DType clip_grad, @@ -1291,7 +1292,7 @@ struct AdamParam : public dmlc::Parameter { struct AdamUpdateKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mean_data, DType* var_data, const DType* weight_data, const DType* grad_data, const DType clip_gradient, const DType rescale_grad, const DType beta1, const DType beta2, @@ -1350,7 +1351,7 @@ struct AdamDnsRspDnsKernel; template struct AdamDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data, DType* mean_data, DType* var_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType beta1, const DType beta2, const DType lr, const DType wd, const DType epsilon, const DType rescale_grad) { @@ -1383,7 +1384,7 @@ struct AdamDnsRspDnsKernel { template struct AdamDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data, DType* mean_data, DType* var_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType beta1, const DType beta2, const DType lr, const DType wd, const DType epsilon, const DType rescale_grad) { @@ -1620,7 +1621,7 @@ struct LambUpdatePhaseTwoParam : public dmlc::Parameter struct LambUpdatePhaseOneKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mean_data, DType* var_data, const DType* weight_data, const DType* grad_data, const DType clip_gradient, const DType rescale_grad, const DType beta1, const DType beta1_t, const DType beta2, const DType beta2_t, @@ -1704,7 +1705,7 @@ inline bool LambUpdatePhaseTwoShape(const nnvm::NodeAttrs& attrs, struct LambUpdatePhaseTwoKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data, const DType* g, const DType* r1, const DType* r2, DType lr, const DType lower_bound, @@ -1771,7 +1772,7 @@ inline bool MPLambPhaseOneType(const nnvm::NodeAttrs& attrs, struct MPLambUpdatePhaseOneKernel { template - MSHADOW_XINLINE static void Map(int i, float* out_data, + MSHADOW_XINLINE static void Map(index_t i, float* out_data, float* mean_data, float* var_data, const DType* weight_data, const DType* grad_data, const float* weight32_data, const float clip_gradient, const float rescale_grad, @@ -1861,7 +1862,7 @@ inline bool MPLambUpdatePhaseTwoShape(const nnvm::NodeAttrs& attrs, struct MPLambUpdatePhaseTwoKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data, const float* g, const float* r1, const float* r2, const float* weight32_data, float lr, const float lower_bound, @@ -1952,7 +1953,7 @@ struct RMSPropAlexParam : public dmlc::Parameter { struct RMSPropAlexUpdateKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* state_n_data, DType* state_g_data, DType* delta_data, const DType* weight_data, const DType* grad_data, const DType clip_gradient, const DType rescale_grad, @@ -2051,7 +2052,7 @@ struct RMSPropParam : public dmlc::Parameter { struct RMSPropUpdateKernel { template - MSHADOW_XINLINE static void Map(int i, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* state_n_data, const DType* weight_data, const DType* grad_data, const DType clip_gradient, const DType rescale_grad, @@ -2132,7 +2133,7 @@ struct FtrlParam : public dmlc::Parameter { struct FtrlUpdateKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* n_data, DType* z_data, const DType* weight_data, const DType* grad_data, const DType clip_gradient, const DType rescale_grad, const DType beta, const DType lamda1, @@ -2185,7 +2186,7 @@ inline void FtrlUpdate(const nnvm::NodeAttrs& attrs, template struct FtrlDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, const nnvm::dim_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, const nnvm::dim_t row_length, DType* out_data, DType* z_data, DType* n_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType lamda1, const DType beta, const DType lr, const DType wd, const DType rescale_grad) { @@ -2343,7 +2344,7 @@ struct SignSGDParam : public dmlc::Parameter { struct SignSGDKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* weight_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* weight_data, const DType* grad_data, const DType param_clip_gradient, const DType param_lr, const DType param_wd, const DType param_rescale_grad, const OpReqType req) { @@ -2411,10 +2412,12 @@ struct SignumParam : public dmlc::Parameter { struct SignumKernel { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, DType* mom_data, const DType* weight_data, - const DType* grad_data, const DType param_clip_gradient, const DType param_momentum, - const DType param_lr, const DType param_wd, const DType param_rescale_grad, - const DType param_wd_lh, const OpReqType req) { + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, DType* mom_data, + const DType* weight_data, const DType* grad_data, + const DType param_clip_gradient, const DType param_momentum, + const DType param_lr, const DType param_wd, + const DType param_rescale_grad, const DType param_wd_lh, + const OpReqType req) { if (param_clip_gradient >= 0.0f) { mom_data[i] = param_momentum*mom_data[i] - (1-param_momentum)*param_wd*weight_data[i] @@ -2506,7 +2509,7 @@ struct AdagradDnsRspDnsKernel; template<> struct AdagradDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data, DType* state_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType epsilon, const DType lr, const DType rescale_grad) { @@ -2533,7 +2536,7 @@ struct AdagradDnsRspDnsKernel { template<> struct AdagradDnsRspDnsKernel { template - MSHADOW_XINLINE static void Map(int i, index_t row_length, DType* out_data, + MSHADOW_XINLINE static void Map(index_t i, index_t row_length, DType* out_data, DType* state_data, const DType* weight_data, const IType* grad_idx, const DType* grad_data, const DType clip_gradient, const DType epsilon, const DType lr, const DType rescale_grad) { diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h index 4486b0dcd712..dcbd53aac69b 100644 --- a/src/operator/tensor/elemwise_unary_op.h +++ b/src/operator/tensor/elemwise_unary_op.h @@ -495,7 +495,7 @@ struct HardSigmoidParam : public dmlc::Parameter { template struct hard_sigmoid_forward { template - MSHADOW_XINLINE static void Map(int i, DType* out_data, const DType* in_data, + MSHADOW_XINLINE static void Map(index_t i, DType* out_data, const DType* in_data, const real_t alpha, const real_t beta) { DType result = DType(alpha * in_data[i] + beta); result = (DType(1) < result) ? DType(1) : result; @@ -507,7 +507,7 @@ struct hard_sigmoid_forward { template struct hard_sigmoid_backward { template - MSHADOW_XINLINE static void Map(int i, DType* in_grad, const DType* in_data, + MSHADOW_XINLINE static void Map(index_t i, DType* in_grad, const DType* in_data, const DType* out_grad, const real_t alpha, const real_t beta) { DType out_val = DType(alpha) * in_data[i] + DType(beta); DType grad = (out_val > DType(0) && out_val < DType(1)) ?