Skip to content

Commit

Permalink
paddle_musa v2.6.0 release initialization (#64265)
Browse files Browse the repository at this point in the history
* Revert "fix rpc_sync and rpc_async doc;test=develop (#64107)"

This reverts commit 1319992.

* Revert "[Dy2St][2.6] Disable `test_sentiment` on release/2.6 (#63197)"

This reverts commit 9013831.

* Revert "Revert "fix security (#62626) (#62683)" (#62890)"

This reverts commit 89a60d7.

* Revert "Enhance several unit tests (#62477) (#62776)"

This reverts commit 0348f3f.

* Revert "[Fix_ci] set PLUGIN_TAG release/2.6 (#62731)"

This reverts commit 97ffa07.

* Revert "fix security (#62626) (#62683)"

This reverts commit 6a73547.

* Revert "add more capi to support stride (#62716)"

This reverts commit 683a141.

* Revert "[XPU] default no autotune (#62636)"

This reverts commit fde63d1.

* Revert "[DCU] fix dcu compile failure (#62573)"

This reverts commit d527fb5.

* Revert "[AutoParallel] Adjust time restriction for test_semi_auto_parallel_hybrid_strategy.py (#62278)"

This reverts commit fbf852d.

* Revert "disable llm_int8 ut (#62282)"

This reverts commit e816529.

* Revert "fix openssl-cpu compile bug (#62079) (#62224)"

This reverts commit 59c61db.

* Revert "[CINN] Add IntrinsicOps into ir_codes_collector (#60556) (#62245)"

This reverts commit 773ea41.

* Revert "rm graph_reindex_test (#62057)"

This reverts commit 521dc70.

* Revert "fix (#61923) (#62186)"

This reverts commit d077553.

* Revert "fix cpups training bug:executor trainer use_ps_gpu value;test=develop (#62111)"

This reverts commit d804975.

* Revert "[cherry-pick 2.6] Fix bug of put_along_axis/take_along_axis (#62065)"

This reverts commit 3a083c3.

* Revert "[Cherry-pick] Fix indexing shape bug and Optimize (#62117)"

This reverts commit 609f55e.

* Revert "cherry pick: reduce log for type promotion. (#62116)"

This reverts commit f4d9adf.

* Revert "fix test_communicator_half_async random core;test=develop (#62092)"

This reverts commit dba9992.

* Revert "fix the unqiue op that generate the wrong the inreverse result (#62104)"

This reverts commit b89066a.

* Revert "[Cherry-pick] Fix Paddle-TRT UT fails (#61605)"

This reverts commit 867ab0d.

* Revert "fix se (#61640) (#61702)"

This reverts commit c0f4a49.

* Revert "fix dataloaer for toolkit (#61867) (#61994)"

This reverts commit b50e906.

* Revert "[Cherry-Pick] Fix CacheKV Quant Bug (#61966)"

This reverts commit 04ac1c0.

* Revert "[Paddle-TRT] fix solve (#61806)"

This reverts commit df0155f.

* Revert "fix launch when elastic run (#61847) (#61878)"

This reverts commit f09d9d8.

* Revert "Support Fake GroupWise Quant (#61900)"

This reverts commit 2175de0.

* Revert "repeat_interleave support bf16 dtype (#61854) (#61899)"

This reverts commit 96c2aaf.

* Revert "[security] refine _get_program_cache_key (#61827) (#61896)"

This reverts commit b6a38d0.

* Revert "merge (#61866)"

This reverts commit 39010bf.

* Revert "fix doc style (#61688)"

This reverts commit 12e5c97.

* Revert "fix layer_norm decompose dtyte bugs, polish codes (#61631)"

This reverts commit e5a85b6.

* Revert "remove _wget (#61356) (#61569)"

This reverts commit 9250f66.

* Revert "cinn(py-dsl): skip eval string in python-dsl (#61380) (#61586)"

This reverts commit a37f6fb.

* Revert "Fix unique (#60840) (#61044)"

This reverts commit 3452e61.

* Revert "[CherryPick] Fix issue 60092 (#61427)"

This reverts commit f025385.

* Revert "[cherry-pick] adapt c_embedding to phi namespace for custom devices (#60774) (#61045)"

This reverts commit 0ccb9cb.

* Revert "check eval for security (#61389)"

This reverts commit 60325a1.

* Revert "[Security] fix download security problem (#61162) (#61388)"

This reverts commit 5f3bbeb.

* Revert "[Security] fix security problem for run_cmd (#61285) (#61398)"

This reverts commit 9cd0c91.

* Revert "[Security] fix security problem for prune_by_memory_estimation (#61382)"

This reverts commit af9b8c5.

* Revert "Fix CVE-2024-0521 (#61032) (#61287)"

This reverts commit f99d4f2.

* Revert "fix _decompress security problem (#61294) (#61337)"

This reverts commit 0227a0d.

* Revert "[Security] fix draw security problem (#61161) (#61338)"

This reverts commit aeaa0ca.

* Revert "fix qat tests (#61211) (#61284)"

This reverts commit ff119d0.

* Revert "fix core dump when fallback gather_nd_grad and MemoryAllocateHost (#61067)"

This reverts commit ac1702b.

* Revert "[cherry-pick] This PR enable offset of generator for custom device. (#60616) (#60772)"

This reverts commit 0f732a5.

* Revert "[Cherry-pick] fix set_value with scalar grad (#60930)"

This reverts commit 1aa5f4b.

* Revert "[Dy2St][2.6] Increase `test_transformer` and `test_mobile_net` ut time (#60829) (#60875)"

This reverts commit d788e9b.

* Revert "[Dy2St][2.6] Disable `test_transformer` on `release/2.6` and update README (#60786)"

This reverts commit e738f49.

* Revert "fix bug of ci (#59926) (#60785)"

This reverts commit 7b0d2e9.

* Revert "[Dy2St][2.6] Disable `test_grad` on release/2.6 (#60662)"

This reverts commit e50f43e.

* Revert "[cherry-pick]update pdsa-2023-019 (#60649)"

This reverts commit ccdf528.

* Revert "[cherry-pick]fix fleetutil get_online_pass_interval bug3 (#60620)"

This reverts commit bbc13eb.

* Revert "fix fused_rope diff (#60217) (#60593)"

This reverts commit 97b65c7.

* Revert "fix fleetutil get_online_pass_interval bug2; test=develop (#60545)"

This reverts commit ae2e588.

* Revert "update 2023 security advisory, test=document_fix (#60532)"

This reverts commit 83ce809.

* Revert "add chunk allocator posix_memalign return value check (#60208) (#60495)"

This reverts commit b065877.

* Revert "tile (#60261)"

This reverts commit 203754e.

* Revert "[Cherry-pick] fix weight quant kernel bug when n div 64 != 0 (#60184)"

This reverts commit 20d3558.

* Revert "[Dy2St] Disable `test_bert` on CPU (#60173) (#60324)"

This reverts commit a4cd847.

* Revert "fix windows bug for common lib (#60308)"

This reverts commit 1b696a1.

* update to v2.6.0

* enable WITH_DISTRIBUTED in CMakeLists.txt and port related source file from cuda to musa

* fix some bugs when WITH_DISTRIBUTED is enabled

* delete useless cout in ../paddle/phi/backends/gpu/musa/musa_info.cc and set compute capacity to 9.9 for UT
  • Loading branch information
hanhaowen-mt authored May 13, 2024
1 parent 1319992 commit 6caf5d5
Show file tree
Hide file tree
Showing 915 changed files with 11,920 additions and 8,842 deletions.
8 changes: 0 additions & 8 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,3 @@
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
ignore = dirty
[submodule "third_party/cryptopp"]
path = third_party/cryptopp
url = https://github.com/weidai11/cryptopp.git
ignore = dirty
[submodule "third_party/cryptopp-cmake"]
path = third_party/cryptopp-cmake
url = https://github.com/noloader/cryptopp-cmake.git
ignore = dirty
71 changes: 68 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ if(NOT CMAKE_BUILD_TYPE)
endif()

project(paddle CXX C)

# set(CMAKE_VERBOSE_MAKEFILE ON)
# enable language CUDA
# TODO(Shibo Tao): remove find_package(CUDA) completely.
find_package(CUDA QUIET)
find_package(MKL CONFIG QUIET)
option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" OFF)
option(WITH_MUSA "Compile PaddlePaddle with MUSA" ON)
option(WITH_MPI "Compile PaddlePaddle with MPI" OFF)
option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
Expand Down Expand Up @@ -89,6 +90,9 @@ endif()
if(WITH_GPU AND WITH_ROCM)
message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
endif()
if(WITH_GPU AND WITH_MUSA)
message(FATAL_ERROR "Error when compile CUDA and MUSA at the same time")
endif()

if(WITH_GPU AND NOT APPLE)
enable_language(CUDA)
Expand Down Expand Up @@ -252,7 +256,7 @@ option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_MULTINODE_TESTING "Test multinode apis and ops" OFF)
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" ON)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation"
ON)
Expand Down Expand Up @@ -285,6 +289,7 @@ option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_CINN "Compile PaddlePaddle with CINN" OFF)
option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
option(WITH_RCCL "Compile PaddlePaddle with RCCL support" ON)
option(WITH_MCCL "Compile PaddlePaddle with MCCL support" ON)
option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF)
option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
Expand Down Expand Up @@ -352,6 +357,7 @@ endif()
if(LINUX
AND NOT WITH_CUSTOM_DEVICE
AND NOT WITH_GPU
AND NOT WITH_MUSA
AND NOT WITH_ROCM
AND NOT WITH_XPU
AND NOT WITH_XPU_KP
Expand Down Expand Up @@ -404,6 +410,14 @@ if(NOT WITH_GPU AND WITH_NCCL)
CACHE STRING "Disable NCCL when compiling without GPU" FORCE)
endif()

if(NOT WITH_MUSA AND WITH_MCCL)
message(
WARNING "Disable MCCL when compiling without MUSA. Force WITH_MCCL=OFF.")
set(WITH_MCCL
OFF
CACHE STRING "Disable MCCL when compiling without MUSA" FORCE)
endif()

if(NOT WITH_GPU AND WITH_CUDNN_DSO)
message(
WARNING
Expand Down Expand Up @@ -461,6 +475,19 @@ else()
endif()
endif()

if(WITH_MCCL)
add_definitions("-DPADDLE_WITH_MCCL")
include(mccl)
else()
if(WITH_MUSA)
message(
WARNING
"If the environment is multi-card, the WITH_MCCL option needs to be turned on, otherwise only a single card can be used."
)
endif()
endif()


if(WITH_BRPC_RDMA)
message(STATUS "Use brpc with rdma.")
if(NOT WITH_DISTRIBUTE)
Expand All @@ -486,6 +513,11 @@ if(WITH_ROCM)
include(cupti)
endif()

if(WITH_MUSA)
include(musa)
include(mudnn)
endif()

if(WITH_XPU_KP)
include(xpu_kp)
endif()
Expand All @@ -498,6 +530,14 @@ if(NOT WITH_ROCM AND WITH_RCCL)
CACHE STRING "Disable RCCL when compiling without ROCM" FORCE)
endif()

if(NOT WITH_MUSA AND WITH_MCCL)
message(
WARNING "Disable MCCL when compiling without MUSA. Force WITH_MCCL=OFF.")
set(WITH_MCCL
OFF
CACHE STRING "Disable MCCL when compiling without MUSA" FORCE)
endif()

if(WITH_RCCL)
add_definitions("-DPADDLE_WITH_RCCL")
include(rccl)
Expand All @@ -510,6 +550,18 @@ else()
endif()
endif()

if(WITH_MCCL)
add_definitions("-DPADDLE_WITH_MCCL")
include(mccl)
else()
if(WITH_MUSA)
message(
WARNING
"If the environment is multi-card, the WITH_MCCL option needs to be turned on, otherwise only a single card can be used."
)
endif()
endif()

if(WITH_HETERPS AND WITH_PSLIB)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif()
Expand Down Expand Up @@ -560,6 +612,13 @@ if(WITH_RPC)
OFF
CACHE BOOL "Disable WITH_RPC when compiling with ROCM" FORCE)
endif()
if(WITH_MUSA AND WITH_RPC)
message(
WARNING "Disable WITH_RPC when compiling with MUSA. Force WITH_RPC=OFF.")
set(WITH_RPC
OFF
CACHE BOOL "Disable WITH_RPC when compiling with MUSA" FORCE)
endif()
if(WITH_XPU AND WITH_RPC)
message(
WARNING "Disable WITH_RPC when compiling with XPU. Force WITH_RPC=OFF.")
Expand Down Expand Up @@ -631,6 +690,12 @@ include(configure) # add paddle env configuration

include_directories("${PADDLE_SOURCE_DIR}")

# distribute need openssl
# openssl install tutorial: https://www.howtoforge.com/tutorial/how-to-install-openssl-from-source-on-linux/
include_directories("/usr/local/ssl/include")
link_directories("/usr/local/ssl/lib64")


if(WITH_NV_JETSON)
set(WITH_ARM
ON
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PaddlePaddle is originated from industrial practices with dedication and commitm

## Installation

### Latest PaddlePaddle Release: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)

Our vision is to enable deep learning for everyone via PaddlePaddle.
Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
Expand Down
4 changes: 2 additions & 2 deletions README_cn.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

## 安装

### PaddlePaddle 最新版本: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)

跟进 PaddlePaddle 最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)

### 安装最新稳定版本:
```
Expand Down
2 changes: 1 addition & 1 deletion README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ PaddlePaddle は、工業化に対するコミットメントを持つ工業的

## インストール

### PaddlePaddle の最新リリース: [v2.6](https://github.com/PaddlePaddle/Paddle/tree/release/2.6)
### PaddlePaddle の最新リリース: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)

私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。
Expand Down
13 changes: 13 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,19 @@ elseif(WITH_ROCM)
if(${MIOPEN_VERSION} VERSION_LESS 2090)
message(FATAL_ERROR "Paddle needs MIOPEN >= 2.9 to compile")
endif()
elseif(WITH_MUSA)
add_definitions(-DPADDLE_WITH_MUSA)
add_definitions(-DEIGEN_USE_GPU)
add_definitions(-DEIGEN_USE_MUSA)
if(MUPTI_FOUND)
include_directories(${CUPTI_INCLUDE_DIR})
add_definitions(-DPADDLE_WITH_MUPTI)
else()
message(STATUS "Cannot find MUPTI, GPU Profiling is incorrect.")
endif()
if(NOT MUDNN_FOUND)
message(FATAL_ERROR "Paddle needs mudnn to compile")
endif()
else()
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
Expand Down
6 changes: 5 additions & 1 deletion cmake/cupti.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
if(NOT WITH_GPU AND NOT WITH_ROCM)
if(NOT WITH_GPU AND NOT WITH_ROCM AND NOT WITH_MUSA)
return()
endif()

if(WITH_ROCM)
set(CUPTI_ROOT
"${ROCM_PATH}/cuda/extras/CUPTI"
CACHE PATH "CUPTI ROOT")
elseif(WITH_MUSA)
set(CUPTI_ROOT
"/usr/local/musa"
CACHE PATH "CUPTI ROOT")
else()
set(CUPTI_ROOT
"/usr"
Expand Down
18 changes: 9 additions & 9 deletions cmake/external/cryptopp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@

include(ExternalProject)

set(CRYPTOPP_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp)
set(CRYPTOPP_CMAKE_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cryptopp-cmake)
set(CRYPTOPP_PREFIX_DIR ${THIRD_PARTY_PATH}/cryptopp)
set(CRYPTOPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cryptopp)
set(CRYPTOPP_INCLUDE_DIR
"${CRYPTOPP_INSTALL_DIR}/include"
CACHE PATH "cryptopp include directory." FORCE)
set(CRYPTOPP_REPOSITORY ${GIT_URL}/weidai11/cryptopp.git)
set(CRYPTOPP_TAG CRYPTOPP_8_2_0)

if(WIN32)
Expand Down Expand Up @@ -64,16 +63,17 @@ include_directories(${CRYPTOPP_INCLUDE_DIR})
ExternalProject_Add(
extern_cryptopp
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE}
GIT_REPOSITORY ${CRYPTOPP_REPOSITORY}
GIT_TAG ${CRYPTOPP_TAG}
PREFIX ${CRYPTOPP_PREFIX_DIR}
SOURCE_DIR ${CRYPTOPP_SOURCE_DIR}
UPDATE_COMMAND ""
PATCH_COMMAND
COMMAND ${CMAKE_COMMAND} -E copy "${CRYPTOPP_CMAKE_SOURCE_DIR}/CMakeLists.txt"
"<SOURCE_DIR>/CMakeLists.txt"
COMMAND
${CMAKE_COMMAND} -E copy
"${CRYPTOPP_CMAKE_SOURCE_DIR}/cryptopp-config.cmake"
"<SOURCE_DIR>/cryptopp-config.cmake"
COMMAND ${CMAKE_COMMAND} -E remove_directory "<SOURCE_DIR>/cmake/"
COMMAND git clone ${GIT_URL}/noloader/cryptopp-cmake "<SOURCE_DIR>/cmake"
COMMAND cd "<SOURCE_DIR>/cmake" && git checkout tags/${CRYPTOPP_TAG} -b
${CRYPTOPP_TAG}
COMMAND ${CMAKE_COMMAND} -E copy_directory "<SOURCE_DIR>/cmake/"
"<SOURCE_DIR>/"
COMMAND ${CRYPTOPP_PATCH_COMMAND}
INSTALL_DIR ${CRYPTOPP_INSTALL_DIR}
CMAKE_ARGS ${CRYPTOPP_CMAKE_ARGS}
Expand Down
70 changes: 70 additions & 0 deletions cmake/external/eigen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,76 @@ if(CMAKE_COMPILER_IS_GNUCC)
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/Eigen/src/Core/arch/SSE/ < ${complex_header})
endif()
if(WITH_MUSA)
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_ConfigureVectorization.h.patch
configure_vectorization_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd ${SOURCE_DIR}/Eigen/src/Core/util/
< ${configure_vectorization_header})
file(TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_Macros.h.patch
util_macros_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd ${SOURCE_DIR}/Eigen/src/Core/util/
< ${util_macros_header})
file(TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_src_Core_util_Meta.h.patch
meta_header)
set(EIGEN_PATCH_COMMAND ${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/Eigen/src/Core/util/ < ${meta_header})
file(TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_Tensor.patch
cxx11_tensor)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/ < ${cxx11_tensor})
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorContractionGpu.h.patch
tensor_contraction_gpu_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
${tensor_contraction_gpu_header})
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorDeviceDefault.h.patch
tensor_device_default_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
${tensor_device_default_header})
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorGpuHipCudaDefines.h.patch
tensor_gpu_hip_cuda_defines_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
${tensor_gpu_hip_cuda_defines_header})
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorReduction.h.patch
tensor_reduction_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
${tensor_reduction_header})
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Eigen_CORE.patch
eigen_core)
set(EIGEN_PATCH_COMMAND ${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/Eigen/ < ${eigen_core})
file(
TO_NATIVE_PATH
${PADDLE_SOURCE_DIR}/patches/eigen/unsupported_Eigen_CXX11_src_Tensor_TensorDeviceGpu.h.patch
tensor_device_gpu_header)
set(EIGEN_PATCH_COMMAND
${EIGEN_PATCH_COMMAND} && patch -Nd
${SOURCE_DIR}/unsupported/Eigen/CXX11/src/Tensor/ <
${tensor_device_gpu_header})
endif()
endif()

set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
Expand Down
5 changes: 5 additions & 0 deletions cmake/flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ if(WITH_GPU)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()

if(WITH_MUSA)
set(CMAKE_MUSA_FLAGS "${CMAKE_MUSA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()


if(WITH_ROCM)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()
Expand Down
Loading

0 comments on commit 6caf5d5

Please sign in to comment.