[AutoParallel] Support vector and optional<vector> InferSPMD input and output. #58573

GhostScreaming · 2023-11-01T09:00:18Z

PR types

New features

PR changes

Others

Description

Pcard-73145

Support std::vector<Tensor> and paddle::optional<std::vector<Tensor>> InferSPMD input and output.

Example generated code (linear_interp):

  // Kernel Dispatch Body
  // Auto Parallel condition
  if (run_auto_parallel) {
    // 1. InferSpmd (Infer DistAttr of Inputs&Outputs)
    auto meta_dist_input_x = MakeDistMetaTensor(*x.impl());
    auto meta_dist_input_out_size = out_size ? MakeDistMetaTensor(*(*out_size).impl()) : phi::distributed::DistMetaTensor();
    std::vector<phi::distributed::DistMetaTensor> meta_dist_input_size_tensor;
    if (size_tensor) {
        for(auto& e : *size_tensor) {
            meta_dist_input_size_tensor.push_back(MakeDistMetaTensor(*e.impl()));
        }
    }
    auto meta_dist_input_scale_tensor = scale_tensor ? MakeDistMetaTensor(*(*scale_tensor).impl()) : phi::distributed::DistMetaTensor();
    auto spmd_info = phi::distributed::VariadicReplicatedInferSpmdDynamic(meta_dist_input_x, meta_dist_input_out_size, meta_dist_input_size_tensor, meta_dist_input_scale_tensor);

    // 2. Create API Output & Prepare Dist and Dense Output
    Tensor api_output;

    auto dist_out = SetKernelDistOutput(&api_output);
    auto dense_out = dist_out->unsafe_mutable_value();
    if (!rank_is_in_current_mesh) {{
      *dense_out = phi::DenseTensor(
            std::make_shared<phi::Allocation>(nullptr, 0, phi::distributed::GetDefaultPlace()),
            phi::DenseTensorMeta());
    }}

    // 3. Infer DistTensor's Global Shape
    phi::MetaTensor meta_dist_out(dist_out);
    phi::MetaTensor meta_dist_out_size = out_size ? MakeMetaTensor(*(*out_size).impl()) : phi::MetaTensor();

    std::vector<phi::MetaTensor> size_tensor_meta_vec_tmp;
    if (size_tensor) {
      for (auto tmp : *size_tensor) {
        size_tensor_meta_vec_tmp.emplace_back(MakeMetaTensor(*tmp.impl()));
      }
    }
    std::vector<const phi::MetaTensor*> size_tensor_meta_ptr_vec_tmp(size_tensor_meta_vec_tmp.size());
    for (size_t i = 0; i < size_tensor_meta_ptr_vec_tmp.size(); ++i) {
      size_tensor_meta_ptr_vec_tmp[i] = &size_tensor_meta_vec_tmp[i];
    }
    paddle::optional<std::vector<const phi::MetaTensor*>> size_tensor_meta_ptr_vec =
        size_tensor ? paddle::make_optional<std::vector<const phi::MetaTensor*>>(size_tensor_meta_ptr_vec_tmp) : paddle::none;

    phi::MetaTensor meta_dist_scale_tensor = scale_tensor ? MakeMetaTensor(*(*scale_tensor).impl()) : phi::MetaTensor();

    phi::InterpolateInferMeta(MakeMetaTensor(*x.impl()), meta_dist_out_size, size_tensor_meta_ptr_vec, meta_dist_scale_tensor, data_layout, out_d, out_h, out_w, scale, interp_method, align_corners, align_mode, &meta_dist_out);


    if (rank_is_in_current_mesh) {
      // 4. Select Kernel
      VLOG(6) << "linear_interp API dist branch: kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
      auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError(
          "linear_interp", {kernel_backend, kernel_layout, kernel_data_type});
      const auto& kernel = kernel_result.kernel;
      VLOG(6) << "linear_interp kernel: " << kernel;
      auto* dev_ctx = GetDeviceContextByBackend(kernel_result.has_fallback_cpu ? Backend::CPU : kernel_backend);

      // 5. Reshard Input
      auto dist_input_x = ReshardApiInputToKernelInput(dev_ctx, x, spmd_info.first[0]);
      auto dist_input_out_size = ReshardApiInputToKernelInput(dev_ctx, out_size, spmd_info.first[1]);
      auto dist_input_size_tensor = ReshardApiInputToKernelInput(dev_ctx, size_tensor, spmd_info.first[2]);
      auto dist_input_scale_tensor = ReshardApiInputToKernelInput(dev_ctx, scale_tensor, spmd_info.first[3]);

      // 6. PrepareData (DataTransform & Prepare Dense Input)
      dist_input_x = PrepareDataForDistTensor(dist_input_x, GetKernelInputArgDef(kernel.InputAt(0), kernel_backend), {}, kernel_result.is_stride_kernel);
      auto input_x = &dist_input_x->value();

      dist_input_out_size = PrepareDataForDistTensor(dist_input_out_size, GetKernelInputArgDef(kernel.InputAt(1), kernel_backend), {true}, kernel_result.is_stride_kernel);
      paddle::optional<phi::DenseTensor> input_out_size = dist_input_out_size ? paddle::make_optional<phi::DenseTensor>((*dist_input_out_size)->value()) : paddle::none;

      auto dist_input_size_tensor_vec = PrepareDataForDistTensor(dist_input_size_tensor, GetKernelInputArgDef(kernel.InputAt(2), kernel_backend), {true}, kernel_result.is_stride_kernel);
      std::vector<const phi::DenseTensor*> dense_input_size_tensor_vec;
      if (size_tensor) {
        for (auto tmp : *dist_input_size_tensor_vec) {
          dense_input_size_tensor_vec.emplace_back(&tmp->value());
      }
    }
    paddle::optional<std::vector<const phi::DenseTensor*>> input_size_tensor(dense_input_size_tensor_vec);
    std::vector<phi::MetaTensor> dense_input_size_tensor_meta_vec = MakeMetaTensor(dense_input_size_tensor_vec);
    std::vector<const phi::MetaTensor*> dense_input_size_tensor_meta_ptr_vec_tmp(dense_input_size_tensor_meta_vec.size());
    for (size_t i = 0; i < dense_input_size_tensor_meta_ptr_vec_tmp.size(); ++i) {
      dense_input_size_tensor_meta_ptr_vec_tmp[i] = &dense_input_size_tensor_meta_vec[i];
    }
    paddle::optional<std::vector<const phi::MetaTensor*>> dense_input_size_tensor_meta_ptr_vec =
            size_tensor ? paddle::make_optional<std::vector<const phi::MetaTensor*>>(dense_input_size_tensor_meta_ptr_vec_tmp) : paddle::none;

      dist_input_scale_tensor = PrepareDataForDistTensor(dist_input_scale_tensor, GetKernelInputArgDef(kernel.InputAt(3), kernel_backend), {true}, kernel_result.is_stride_kernel);
      paddle::optional<phi::DenseTensor> input_scale_tensor = dist_input_scale_tensor ? paddle::make_optional<phi::DenseTensor>((*dist_input_scale_tensor)->value()) : paddle::none;

      // 7. Infer Local DenseTensor Meta
      phi::MetaTensor meta_dense_out(dense_out);
      phi::InterpolateInferMeta(MakeMetaTensor(*input_x), MakeMetaTensor(input_out_size), dense_input_size_tensor_meta_ptr_vec, MakeMetaTensor(input_scale_tensor), data_layout, out_d, out_h, out_w, scale, interp_method, align_corners, align_mode, &meta_dense_out);

      // 8. DenseTensor Kernel Call
      using kernel_signature = void(*)(const phi::DeviceContext&, const phi::DenseTensor&, const paddle::optional<phi::DenseTensor>&, const paddle::optional<std::vector<const phi::DenseTensor*>>&, const paddle::optional<phi::DenseTensor>&, const std::string&, int, int, int, const std::vector<float>&, const std::string&, bool, int, phi::DenseTensor*);
      auto* kernel_fn = kernel.GetVariadicKernelFn<kernel_signature>();
      (*kernel_fn)(*dev_ctx, *input_x, input_out_size, input_size_tensor, input_scale_tensor, data_layout, out_d, out_h, out_w, scale, interp_method, align_corners, align_mode, dense_out);

    dist_out->unsafe_set_dims(dense_out->dims());

      // 9. Reshard Partial Output to Replicated (Temporary)
      // API `linear_interp` does not need to support ReshardOutput now.

    }

    // 10. Set Output Dist Attr For Default Impl
    auto current_process_mesh = paddle::holds_alternative<phi::distributed::TensorDistAttr>(spmd_info.first[0]) ?
               paddle::get<0>(spmd_info.first[0]).process_mesh() : paddle::get<1>(spmd_info.first[0]).at(0).process_mesh();
    SetReplicatedDistAttrForOutput(dist_out, current_process_mesh);

    // 11. Return
    return api_output;
  }

…d output.

… support_vector_inferspmd

LiYuRio

LGTM

…d output. (PaddlePaddle#58573) * [AutoParallel] Support vector and optional<vector> InferSPMD input and output. * Fix some problems. * Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into support_vector_inferspmd * Fix conflicts. * Polish code. * Polish code. * Polish code.

GhostScreaming added 3 commits November 1, 2023 16:57

[AutoParallel] Support vector and optional<vector> InferSPMD input an…

56cd477

…d output.

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

276d004

… support_vector_inferspmd

Fix some problems.

be41f74

paddle-bot bot added the contributor External developers label Nov 1, 2023

GhostScreaming added 4 commits November 2, 2023 17:52

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

4f08ef0

… support_vector_inferspmd

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…

536db66

… support_vector_inferspmd

Fix conflicts.

89acd77

Polish code.

788f308

paddle-bot bot removed the contributor External developers label Nov 3, 2023

GhostScreaming added 2 commits November 3, 2023 12:01

Polish code.

2128a56

Polish code.

0e14c0e

LiYuRio approved these changes Nov 3, 2023

View reviewed changes

chenwhql approved these changes Nov 3, 2023

View reviewed changes

GhostScreaming merged commit 130c891 into PaddlePaddle:develop Nov 6, 2023

GhostScreaming mentioned this pull request Nov 9, 2023

[AutoParallel] Fix problem of vector InferSPMD. #58853

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AutoParallel] Support vector and optional<vector> InferSPMD input and output. #58573

[AutoParallel] Support vector and optional<vector> InferSPMD input and output. #58573

GhostScreaming commented Nov 1, 2023 •

edited

Loading

LiYuRio left a comment

[AutoParallel] Support vector and optional<vector> InferSPMD input and output. #58573

[AutoParallel] Support vector and optional<vector> InferSPMD input and output. #58573

Conversation

GhostScreaming commented Nov 1, 2023 • edited Loading

PR types

PR changes

Description

LiYuRio left a comment

Choose a reason for hiding this comment

GhostScreaming commented Nov 1, 2023 •

edited

Loading