Skip to content

Commit

Permalink
merged develop
Browse files Browse the repository at this point in the history
  • Loading branch information
jim19930609 committed Apr 13, 2022
2 parents 6ba6a13 + 1e56ca8 commit ca255d9
Show file tree
Hide file tree
Showing 210 changed files with 8,884 additions and 1,878 deletions.
48 changes: 32 additions & 16 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,62 @@ SET(XPU_PROJECT "extern_xpu")
SET(XPU_API_LIB_NAME "libxpuapi.so")
SET(XPU_RT_LIB_NAME "libxpurt.so")

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

# ubuntu and centos: use output by XDNN API team
if(NOT DEFINED XPU_XDNN_BASE_URL)
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220412")
else()
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
endif()

IF(WITH_AARCH64)
SET(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
SET(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
SET(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64")
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_SUNWAY)
SET(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
SET(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
SET(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64")
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_BDCENTOS)
SET(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_UBUNTU)
SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-ubuntu_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSEIF(WITH_CENTOS)
SET(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")

ELSE ()
# ubuntu and centos: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ELSE()
SET(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
SET(XPU_XDNN_DIR_NAME "XDNN-bdcentos_x86_64")
SET(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64")
# default: use output by XDNN API team
SET(XPU_XDNN_URL "${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
ENDIF()

if(NOT DEFINED XPU_BASE_URL)
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220408")
else()
SET(XPU_BASE_URL "${XPU_BASE_URL}")
endif()

SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220411/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_PACK_DEPENCE_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" CACHE STRING "" FORCE)

SET(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
SET(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
SET(XPU_DOWNLOAD_DIR "${SNAPPY_PREFIX_DIR}/src/${XPU_PROJECT}")
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
SET(XPU_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
Expand Down
4 changes: 2 additions & 2 deletions cmake/xpu_kp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ macro(compile_kernel COMPILE_ARGS)
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
cp ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu -rf
${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.bin.o.sec kernel_build/${kernel_name}.xpu
Expand All @@ -151,7 +151,7 @@ macro(compile_kernel COMPILE_ARGS)
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
cp ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu -rf
${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps kernel_build/${kernel_name}.xpu
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.host.o kernel_build/${kernel_name}.xpu
Expand Down
15 changes: 6 additions & 9 deletions paddle/fluid/distributed/collective/Common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
namespace paddle {
namespace distributed {

std::vector<Place> GetPlaceList(const std::vector<Tensor>& tensors) {
std::vector<Place> GetPlaceList(const std::vector<phi::DenseTensor>& tensors) {
std::vector<Place> places;
places.reserve(tensors.size());
for (auto& tensor : tensors) {
places.push_back(tensor.inner_place());
places.push_back(tensor.place());
}
return places;
}
Expand All @@ -40,14 +40,11 @@ std::string GetKeyFromPlaces(const std::vector<Place>& places) {
return placeList;
}

static bool CheckTensorsInPlace(const std::vector<Tensor>& tensors,
const PlaceType type) {
bool CheckTensorsInCudaPlace(const std::vector<phi::DenseTensor>& tensors) {
return std::all_of(tensors.cbegin(), tensors.cend(),
[&](const Tensor& t) { return t.place() == type; });
}

bool CheckTensorsInCudaPlace(const std::vector<Tensor>& tensors) {
return CheckTensorsInPlace(tensors, PlaceType::kGPU);
[&](const phi::DenseTensor& t) {
return platform::is_gpu_place(t.place());
});
}

} // namespace distributed
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/distributed/collective/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@

#include "paddle/fluid/platform/place.h"
#include "paddle/phi/api/include/api.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"
namespace paddle {
namespace distributed {

using Tensor = paddle::experimental::Tensor;

using Place = paddle::platform::Place;
// Get the list of devices from list of tensors
std::vector<Place> GetPlaceList(const std::vector<Tensor>& tensors);
std::vector<Place> GetPlaceList(const std::vector<phi::DenseTensor>& tensors);
// Get the deviceList String from the list of devices
std::string GetKeyFromPlaces(const std::vector<Place>& places);

bool CheckTensorsInCudaPlace(const std::vector<Tensor>& tensors);
bool CheckTensorsInCudaPlace(const std::vector<phi::DenseTensor>& tensors);

} // namespace distributed
} // namespace paddle
3 changes: 2 additions & 1 deletion paddle/fluid/distributed/collective/ProcessGroup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
namespace paddle {
namespace distributed {

ProcessGroup::Task::Task(int rank, const std::vector<Tensor>& inputTensors,
ProcessGroup::Task::Task(int rank,
const std::vector<phi::DenseTensor>& inputTensors,
CommType comm_type)
: rank_(rank), comm_type_(comm_type) {}

Expand Down
37 changes: 17 additions & 20 deletions paddle/fluid/distributed/collective/ProcessGroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class ProcessGroup {
public:
class Task {
public:
Task(int rank, const std::vector<Tensor>& inputTensors,
Task(int rank, const std::vector<phi::DenseTensor>& inputTensors,
CommType opType = CommType::UNKNOWN);

virtual ~Task();
Expand All @@ -79,68 +79,65 @@ class ProcessGroup {
virtual const std::string GetBackendName() const = 0;

virtual std::shared_ptr<ProcessGroup::Task> AllReduce(
std::vector<Tensor>& /* tensors */,
std::vector<phi::DenseTensor>& /* input tensors */, // NOLINT
std::vector<phi::DenseTensor>& /* output tensors */, // NOLINT
const AllreduceOptions& = AllreduceOptions()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support allreduce", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Broadcast(
std::vector<Tensor>& /* tensors */,
std::vector<phi::DenseTensor>& /* input tensors */, // NOLINT
std::vector<phi::DenseTensor>& /* output tensors */, // NOLINT
const BroadcastOptions& = BroadcastOptions()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support broadcast", GetBackendName()));
}

virtual void Broadcast(const phi::DenseTensor* in, phi::DenseTensor* out) {
PADDLE_THROW(platform::errors::Fatal(
"ProcessGroup%s does not support broadcast for static mode runtime",
GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Barrier(
const BarrierOptions& = BarrierOptions()) {
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support barrier", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Send(
std::vector<Tensor>& tensors /* tensors */, int dst_rank) { // NOLINT
std::vector<phi::DenseTensor>&, int) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support send", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Recv(
std::vector<Tensor>& tensors /* tensors */, int src_rank) { // NOLINT
std::vector<phi::DenseTensor>& tensors, int) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support receive", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> AllGather(
std::vector<Tensor>& in_tensors /* tensors */, // NOLINT
std::vector<Tensor>& out_tensors /* tensors */) { // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
std::vector<phi::DenseTensor>&) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support AllGather", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> AllToAll(
std::vector<Tensor>& in /* tensors */, // NOLINT
std::vector<Tensor>& out /* tensors */) { // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
std::vector<phi::DenseTensor>&) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support AllToAll", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Reduce(
std::vector<Tensor>& tensors /* tensors */, // NOLINT
const ReduceOptions& opts) { // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
const ReduceOptions& opts) {
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support Reduce", GetBackendName()));
}

virtual std::shared_ptr<ProcessGroup::Task> Scatter(
std::vector<Tensor>& in_tensors /* tensors */, // NOLINT
std::vector<Tensor>& out_tensors /* tensors */, // NOLINT
const ScatterOptions&) { // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
std::vector<phi::DenseTensor>&, // NOLINT
const ScatterOptions&) { // NOLINT
PADDLE_THROW(platform::errors::InvalidArgument(
"ProcessGroup%s does not support Scatter", GetBackendName()));
}
Expand Down
Loading

1 comment on commit ca255d9

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on ca255d9 Apr 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #41668 Commit ID: ca255d9 contains failed CI.

🔹 Failed: PR-CI-Mac-Python3

Unknown Failed
Unknown Failed

🔹 Failed: PR-CI-Windows

Unknown Failed
Unknown Failed

Please sign in to comment.