From 1409be24d522e75507f33175fe3ebfecd679d156 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 11 Jun 2023 23:53:31 -0400 Subject: [PATCH] ProdEnvMatAMixOp: move filter_ftype out of nsamples loop Signed-off-by: Jinzhe Zeng --- source/op/prod_env_mat_multi_device.cc | 42 +++++++++++++------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc index 9a516dde35..9e4e1753e1 100644 --- a/source/op/prod_env_mat_multi_device.cc +++ b/source/op/prod_env_mat_multi_device.cc @@ -1135,6 +1135,12 @@ class ProdEnvMatAMixOp : public OpKernel { context->allocate_output(context_output_index++, nmask_shape, &nmask_tensor)); + Tensor fake_type_tensor; // all zeros + TensorShape fake_type_shape; + fake_type_shape.AddDim(nsamples * nall); + OP_REQUIRES_OK(context, context->allocate_temp(DT_INT32, fake_type_shape, + &fake_type_tensor)); + FPTYPE* p_em = descrpt_tensor->flat().data(); FPTYPE* p_em_deriv = descrpt_deriv_tensor->flat().data(); FPTYPE* p_rij = rij_tensor->flat().data(); @@ -1146,6 +1152,20 @@ class ProdEnvMatAMixOp : public OpKernel { const FPTYPE* avg = avg_tensor.flat().data(); const FPTYPE* std = std_tensor.flat().data(); const int* p_type = type_tensor.flat().data(); + int* p_f_type = fake_type_tensor.flat().data(); + + if (device == "GPU") { +#if GOOGLE_CUDA + deepmd::filter_ftype_gpu_cuda(p_f_type, p_type, nsamples * nall); +#endif +#if TENSORFLOW_USE_ROCM + deepmd::filter_ftype_gpu_rocm(p_f_type, p_type, nsamples * nall); +#endif + } else if (device == "CPU") { + for (int ii = 0; ii < nsamples * nall; ii++) { + p_f_type[ii] = (p_type[ii] < 0) ? -1 : 0; + } + } // loop over samples for (int_64 ff = 0; ff < nsamples; ++ff) { @@ -1158,6 +1178,7 @@ class ProdEnvMatAMixOp : public OpKernel { const FPTYPE* coord = p_coord + ff * nall * 3; const FPTYPE* box = p_box + ff * 9; const int* type = p_type + ff * nall; + const int* f_type = p_f_type + ff * nall; if (device == "GPU") { #if GOOGLE_CUDA @@ -1171,13 +1192,6 @@ class ProdEnvMatAMixOp : public OpKernel { int frame_nall = nall; int mesh_tensor_size = static_cast(mesh_tensor.NumElements()); std::vector tensor_list(7); - Tensor fake_type; // all zeros - TensorShape fake_type_shape; - fake_type_shape.AddDim(nall); - OP_REQUIRES_OK(context, context->allocate_temp( - DT_INT32, fake_type_shape, &fake_type)); - deepmd::filter_ftype_gpu_cuda(fake_type.flat().data(), type, nall); - const int* f_type = fake_type.flat().data(); // prepare coord and nlist _prepare_coord_nlist_gpu( context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy, @@ -1222,13 +1236,6 @@ class ProdEnvMatAMixOp : public OpKernel { int frame_nall = nall; int mesh_tensor_size = static_cast(mesh_tensor.NumElements()); std::vector tensor_list(7); - Tensor fake_type; // all zeros - TensorShape fake_type_shape; - fake_type_shape.AddDim(nall); - OP_REQUIRES_OK(context, context->allocate_temp( - DT_INT32, fake_type_shape, &fake_type)); - deepmd::filter_ftype_gpu_rocm(fake_type.flat().data(), type, nall); - const int* f_type = fake_type.flat().data(); // prepare coord and nlist _prepare_coord_nlist_gpu_rocm( context, &tensor_list[0], &coord, coord_cpy, &f_type, type_cpy, @@ -1271,13 +1278,6 @@ class ProdEnvMatAMixOp : public OpKernel { std::vector coord_cpy; std::vector type_cpy; int frame_nall = nall; - std::vector fake_type(nall, 0); - for (int ii = 0; ii < nall; ii++) { - if (type[ii] < 0) { - fake_type[ii] = -1; - } - } - const int* f_type = &fake_type[0]; // prepare coord and nlist _prepare_coord_nlist_cpu( context, &coord, coord_cpy, &f_type, type_cpy, idx_mapping, inlist,