fix conflict

PaddlePaddle · Mar 29, 2024 · b9d874f · b9d874f
2 parents fb315d2 + 351ed7d
commit b9d874f
Show file tree

Hide file tree

Showing 442 changed files with 14,260 additions and 4,060 deletions.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,9 +1,13 @@
+<!-- TemplateReference: https://github.com/PaddlePaddle/Paddle/wiki/PULL-REQUEST-TEMPLATE--REFERENCE -->
 <!-- Demo: https://github.com/PaddlePaddle/Paddle/pull/24810 -->
-### PR types
-<!-- One of [ New features | Bug fixes | Function optimization | Performance optimization | Breaking changes | Others ] -->
 
-### PR changes
-<!-- One of [ OPs | APIs | Docs | Others ] -->
+### PR Category
+<!-- One of [ User Experience | Execute Infrastructure | Operator Mechanism | CINN | Custom Device | Performance Optimization | Distributed Strategy | Parameter Server | Communication Library | Auto Parallel | Inference | Environment Adaptation | Others ] -->
+
+
+### PR Types
+<!-- One of [ New features | Bug fixes | Improvements | Performance | BC Breaking | Deprecations | Docs | Devs | Not User Facing | Security | Deprecations | Others ] -->
+
 
 ### Description
 <!-- Describe what you’ve done -->
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -142,7 +142,10 @@ endif()
 if(WIN32)
   option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
   message("Build static library of PHI")
-  set(CMAKE_SUPPRESS_REGENERATION ON)
+  # (Note xuxinyi04): If CMAKE_SUPPRESS_REGENERATION is OFF, which is default, then CMake adds a
+  # special target on which all other targets depend that checks the build system and optionally
+  # re-runs CMake to regenerate the build system when the target specification source changes.
+  set(CMAKE_SUPPRESS_REGENERATION OFF)
   set(CMAKE_STATIC_LIBRARY_PREFIX lib)
   set(WITH_SHARED_PHI
       OFF

diff --git a/cmake/coveralls.cmake b/cmake/coveralls.cmake
@@ -60,8 +60,8 @@ endfunction()
 
 if(WITH_COVERAGE)
   if(WITH_INCREMENTAL_COVERAGE)
-    # if *.h changed, generate coverage report totaly.
-    # if pybind.cc changed, generate coverage report totaly.
+    # if *.h changed, generate coverage report totally.
+    # if pybind.cc changed, generate coverage report totally.
     # Because if pybind.cc add '-g -O0 -fprofile-arcs -ftest-coverage' only, some testcase will fail.
     if((NOT ("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL ""))
        OR ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc"))

diff --git a/cmake/coverallsGcovJsons.cmake b/cmake/coverallsGcovJsons.cmake
@@ -248,7 +248,7 @@ foreach(GCOV_FILE ${GCOV_FILES})
   # Instead of trying to parse the source from the
   # gcov file, simply read the file contents from the source file.
   # (Parsing it from the gcov is hard because C-code uses ; in many places
-  #  which also happens to be the same as the CMake list delimeter).
+  #  which also happens to be the same as the CMake list delimiter).
   file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)
 
   string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")

diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # this file contains experimental build options for lazy cuda module loading
-# cuda moduel lazy loading is supported by CUDA 11.7+
+# cuda module lazy loading is supported by CUDA 11.7+
 # this experiment option makes Paddle supports lazy loading before CUDA 11.7.
 
 if(LINUX)

diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake
@@ -25,7 +25,7 @@ if(WIN32)
 elseif(LINUX)
   if(WITH_ROCM)
     # For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC
-    # which will cause compiler error of using __host__ funciont
+    # which will cause compiler error of using __host__ function
     # in __host__ __device__
     file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src)
     file(TO_NATIVE_PATH ${SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst)

diff --git a/cmake/external/lapack.cmake b/cmake/external/lapack.cmake
@@ -48,19 +48,34 @@ elseif(WIN32)
   set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran-3.dll")
   set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dll")
   set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dll")
-else()
-  set(LAPACK_FILE
-      "lapack_mac_v3.10.0.20210628.tar.gz"
-      CACHE STRING "" FORCE)
-  set(LAPACK_URL
-      "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
-      CACHE STRING "" FORCE)
-  set(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7)
-  set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
-  set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
-  set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
-  set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib")
-  set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib")
+else() # MacOS
+  if(APPLE AND WITH_ARM)
+    set(LAPACK_FILE
+        "lapack_mac_arm64_v0.3.26.tar.gz"
+        CACHE STRING "" FORCE)
+    set(LAPACK_URL
+        "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
+        CACHE STRING "" FORCE)
+    set(LAPACK_URL_MD5 3f6412105ae2b7465e5ee90c8673e6d4)
+    set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
+    set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
+    set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
+    set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dylib")
+    set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dylib")
+  else()
+    set(LAPACK_FILE
+        "lapack_mac_v3.10.0.20210628.tar.gz"
+        CACHE STRING "" FORCE)
+    set(LAPACK_URL
+        "https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
+        CACHE STRING "" FORCE)
+    set(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7)
+    set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
+    set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
+    set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
+    set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib")
+    set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib")
+  endif()
 endif()
 
 function(download_lapack)

diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake
@@ -16,7 +16,7 @@ include(python_module)
 
 check_py_version(${PY_VERSION})
 
-# Find Python with mnimum PY_VERSION specified or will raise error!
+# Find Python with minimum PY_VERSION specified or will raise error!
 find_package(PythonInterp ${PY_VERSION} REQUIRED)
 find_package(PythonLibs ${PY_VERSION} REQUIRED)
 

diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake
@@ -29,7 +29,7 @@ if(NOT DEFINED XPU_BASE_DATE)
   set(XPU_BASE_DATE "20240104")
 endif()
 if(NOT DEFINED XPU_XHPC_BASE_DATE)
-  set(XPU_XHPC_BASE_DATE "20240315")
+  set(XPU_XHPC_BASE_DATE "20240328")
 endif()
 set(XPU_XCCL_BASE_VERSION "1.1.8.1")
 if(NOT DEFINED XPU_XFT_BASE_VERSION)

diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -494,7 +494,7 @@ function(op_library TARGET)
     if(NOT ${op_name} EQUAL "")
       file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n")
       # why change TARGET here?
-      # when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
+      # when building paddle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
       # in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add
       # and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h
       # however, grad_add has no mkldnn kernel.

diff --git a/cmake/simd.cmake b/cmake/simd.cmake
@@ -1,5 +1,5 @@
 # This file is use to check all support level of AVX on your machine
-# so that PaddlePaddle can unleash the vectorization power of muticore.
+# so that PaddlePaddle can unleash the vectorization power of multicore.
 
 include(CheckCXXSourceRuns)
 include(CheckCXXSourceCompiles)

diff --git a/paddle/cinn/adt/adapter_dynamic_tensor.h b/paddle/cinn/adt/adapter_dynamic_tensor.h
@@ -18,13 +18,13 @@
 #include "paddle/cinn/adt/adt.h"
 #include "paddle/cinn/adt/dim_expr.h"
 #include "paddle/cinn/adt/symbolic_dim.h"
-#include "paddle/cinn/hlir/framework/pir/group.h"
+#include "paddle/cinn/hlir/framework/pir/op_lowering_group.h"
 
 namespace cinn::adt::adapter {
 
 struct DynamicTensor final {
   ::pir::Value node_data;
-  const hlir::framework::pir::Group* group;
+  const hlir::framework::pir::OpLoweringGroup* group;
 
   bool operator==(const DynamicTensor& other) const {
     return this->node_data == other.node_data;

diff --git a/paddle/cinn/adt/generate_map_expr.cc b/paddle/cinn/adt/generate_map_expr.cc
@@ -109,8 +109,9 @@ bool HasDynamicShape(const ::pir::Value& tensor) {
   return false;
 }
 
-List<Arg> MakeOpStmtInputList(const ::pir::Operation* op,
-                              const hlir::framework::pir::Group* group) {
+List<Arg> MakeOpStmtInputList(
+    const ::pir::Operation* op,
+    const hlir::framework::pir::OpLoweringGroup* group) {
   List<Arg> ret{};
 
   VisitEachInputTensor(op, [&](const ::pir::Value& tensor) {
@@ -131,8 +132,9 @@ void VisitEachOutputTensor(const ::pir::Operation* op, const DoEachT& DoEach) {
   }
 }
 
-List<Arg> MakeOpStmtOutputList(const ::pir::Operation* op,
-                               const hlir::framework::pir::Group* group) {
+List<Arg> MakeOpStmtOutputList(
+    const ::pir::Operation* op,
+    const hlir::framework::pir::OpLoweringGroup* group) {
   List<Arg> ret{};
 
   VisitEachOutputTensor(op, [&](const ::pir::Value& tensor) {
@@ -147,9 +149,10 @@ List<Arg> MakeOpStmtOutputList(const ::pir::Operation* op,
 }
 
 template <typename DoEachT>
-void VisitEachOpStmt(const std::shared_ptr<hlir::framework::pir::Group>& group,
-                     const DoEachT& DoEach) {
-  for (const auto* op : group->CollectOps()) {
+void VisitEachOpStmt(
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group,
+    const DoEachT& DoEach) {
+  for (const auto* op : group->ops()) {
     DoEach(OpStmt{MakeOp(op),
                   MakeOpStmtInputList(op, group.get()),
                   MakeOpStmtOutputList(op, group.get())});
@@ -187,7 +190,7 @@ void CollectRewrittenOpStmts(const OpStmt& op_stmt, List<OpStmt>* ret) {
 }
 
 List<OpStmt> MakeOpStmts(
-    const std::shared_ptr<hlir::framework::pir::Group>& group) {
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
   List<OpStmt> ret{};
 
   VisitEachOpStmt(group, [&](const auto& op_stmt) {
@@ -223,7 +226,7 @@ std::shared_ptr<IGroup> MakeIGroup(const AnchorGroup& igroup_spec) {
 }
 
 std::vector<std::shared_ptr<IGroup>> GenerateIGroups(
-    const std::shared_ptr<hlir::framework::pir::Group>& group) {
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
   std::vector<std::shared_ptr<IGroup>> ret{};
 
   List<OpStmt> op_stmts = MakeOpStmts(group);
@@ -237,7 +240,7 @@ std::vector<std::shared_ptr<IGroup>> GenerateIGroups(
 }
 
 std::shared_ptr<KGroup> GenerateKGroups(
-    const std::shared_ptr<hlir::framework::pir::Group>& group,
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group,
     const std::vector<std::shared_ptr<IGroup>>& igroups) {
   CHECK_EQ(igroups.size(), 1);
   return std::make_shared<KGroup>(group, igroups);
@@ -352,15 +355,15 @@ Tensor GetAnchorTensor(const std::shared_ptr<IGroup>& igroup) {
 }
 
 template <typename DoEachT>
-void VisitInputTensor(const hlir::framework::pir::Group& group,
+void VisitInputTensor(const hlir::framework::pir::OpLoweringGroup& group,
                       const DoEachT& DoEach) {
   for (const ::pir::Value& node_data : group.GetInputOpValues()) {
     DoEach(node_data);
   }
 }
 
 template <typename DoEachT>
-void VisitOutputTensor(const hlir::framework::pir::Group& group,
+void VisitOutputTensor(const hlir::framework::pir::OpLoweringGroup& group,
                        const DoEachT& DoEach) {
   for (const ::pir::Value& node_data : group.GetOutputOpValues()) {
     DoEach(node_data);
@@ -444,7 +447,7 @@ MapExpr GenerateMapExpr(const std::shared_ptr<KGroup>& kgroup) {
 }  // namespace
 
 MapExpr GenerateMapExpr(
-    const std::shared_ptr<hlir::framework::pir::Group>& group) {
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
   const auto& igroups = GenerateIGroups(group);
 
   const auto& kgroup = GenerateKGroups(group, igroups);
@@ -453,13 +456,14 @@ MapExpr GenerateMapExpr(
 }
 
 void TryGenerateMapExprFromGroup(
-    const std::shared_ptr<hlir::framework::pir::Group>& fusion_group) {
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>&
+        fusion_group) {
   if (!FLAGS_cinn_enable_map_expr) {
     return;
   }
   const auto& map_expr = GenerateMapExpr(fusion_group);
   VLOG(4) << "Generate MapExpr: \n"
-          << ToTxtString(map_expr, fusion_group->group_id);
+          << ToTxtString(map_expr, fusion_group->group_id());
   fusion_group->set_map_expr_ctx(std::make_shared<MapExprCtx>(map_expr));
 }
 

diff --git a/paddle/cinn/adt/generate_map_expr.h b/paddle/cinn/adt/generate_map_expr.h
@@ -20,17 +20,16 @@
 
 namespace cinn::hlir::framework::pir {
 
-struct Group;
-using GroupList = std::vector<std::shared_ptr<Group>>;
+struct OpLoweringGroup;
 
 }  // namespace cinn::hlir::framework::pir
 
 namespace cinn::adt {
 
 MapExpr GenerateMapExpr(
-    const std::shared_ptr<cinn::hlir::framework::pir::Group>& group);
+    const std::shared_ptr<cinn::hlir::framework::pir::OpLoweringGroup>& group);
 
 void TryGenerateMapExprFromGroup(
-    const std::shared_ptr<hlir::framework::pir::Group>& fusion_group);
+    const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& fusion_group);
 
 }  // namespace cinn::adt
diff --git a/paddle/cinn/adt/kgroup.h b/paddle/cinn/adt/kgroup.h
@@ -21,7 +21,7 @@
 
 namespace cinn::hlir::framework::pir {
 
-struct Group;
+struct OpLoweringGroup;
 
 }  // namespace cinn::hlir::framework::pir
 
@@ -39,11 +39,11 @@ using cinn::adt::LoopDescriptors;
 class KGroup final {
  public:
   explicit KGroup(
-      const std::shared_ptr<hlir::framework::pir::Group>& cinn_group,
+      const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& cinn_group,
       const std::vector<std::shared_ptr<IGroup>>& igroups)
       : cinn_group_(cinn_group), igroups_(igroups) {}
 
-  std::shared_ptr<hlir::framework::pir::Group> cinn_group() const {
+  std::shared_ptr<hlir::framework::pir::OpLoweringGroup> cinn_group() const {
     return CHECK_NOTNULL(cinn_group_.lock());
   }
 
@@ -58,7 +58,7 @@ class KGroup final {
       const std::shared_ptr<IGroup>& igroup) const;
 
  private:
-  std::weak_ptr<hlir::framework::pir::Group> cinn_group_;
+  std::weak_ptr<hlir::framework::pir::OpLoweringGroup> cinn_group_;
   // NOTE: Use single igroup temporarily. Actually KGroup contains
   // multiple IGroups
   std::vector<std::shared_ptr<IGroup>> igroups_;