Skip to content

Commit

Permalink
fix conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
zeroRains committed Mar 29, 2024
2 parents fb315d2 + 351ed7d commit b9d874f
Show file tree
Hide file tree
Showing 442 changed files with 14,260 additions and 4,060 deletions.
12 changes: 8 additions & 4 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
<!-- TemplateReference: https://github.com/PaddlePaddle/Paddle/wiki/PULL-REQUEST-TEMPLATE--REFERENCE -->
<!-- Demo: https://github.com/PaddlePaddle/Paddle/pull/24810 -->
### PR types
<!-- One of [ New features | Bug fixes | Function optimization | Performance optimization | Breaking changes | Others ] -->

### PR changes
<!-- One of [ OPs | APIs | Docs | Others ] -->
### PR Category
<!-- One of [ User Experience | Execute Infrastructure | Operator Mechanism | CINN | Custom Device | Performance Optimization | Distributed Strategy | Parameter Server | Communication Library | Auto Parallel | Inference | Environment Adaptation | Others ] -->


### PR Types
<!-- One of [ New features | Bug fixes | Improvements | Performance | BC Breaking | Deprecations | Docs | Devs | Not User Facing | Security | Deprecations | Others ] -->


### Description
<!-- Describe what you’ve done -->
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,10 @@ endif()
if(WIN32)
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
message("Build static library of PHI")
set(CMAKE_SUPPRESS_REGENERATION ON)
# (Note xuxinyi04): If CMAKE_SUPPRESS_REGENERATION is OFF, which is default, then CMake adds a
# special target on which all other targets depend that checks the build system and optionally
# re-runs CMake to regenerate the build system when the target specification source changes.
set(CMAKE_SUPPRESS_REGENERATION OFF)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
set(WITH_SHARED_PHI
OFF
Expand Down
4 changes: 2 additions & 2 deletions cmake/coveralls.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ endfunction()

if(WITH_COVERAGE)
if(WITH_INCREMENTAL_COVERAGE)
# if *.h changed, generate coverage report totaly.
# if pybind.cc changed, generate coverage report totaly.
# if *.h changed, generate coverage report totally.
# if pybind.cc changed, generate coverage report totally.
# Because if pybind.cc add '-g -O0 -fprofile-arcs -ftest-coverage' only, some testcase will fail.
if((NOT ("$ENV{PADDLE_GIT_DIFF_H_FILE}" STREQUAL ""))
OR ("$ENV{PADDLE_GIT_DIFF_CC_FILE}" MATCHES "pybind.cc"))
Expand Down
2 changes: 1 addition & 1 deletion cmake/coverallsGcovJsons.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ foreach(GCOV_FILE ${GCOV_FILES})
# Instead of trying to parse the source from the
# gcov file, simply read the file contents from the source file.
# (Parsing it from the gcov is hard because C-code uses ; in many places
# which also happens to be the same as the CMake list delimeter).
# which also happens to be the same as the CMake list delimiter).
file(READ ${GCOV_SRC_PATH} GCOV_FILE_SOURCE)

string(REPLACE "\\" "\\\\" GCOV_FILE_SOURCE "${GCOV_FILE_SOURCE}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/experiments/cuda_module_loading_lazy.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# cuda module lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.

if(LINUX)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/eigen.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ if(WIN32)
elseif(LINUX)
if(WITH_ROCM)
# For HIPCC Eigen::internal::device::numeric_limits is not EIGEN_DEVICE_FUNC
# which will cause compiler error of using __host__ funciont
# which will cause compiler error of using __host__ function
# in __host__ __device__
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/eigen/Meta.h native_src)
file(TO_NATIVE_PATH ${SOURCE_DIR}/Eigen/src/Core/util/Meta.h native_dst)
Expand Down
41 changes: 28 additions & 13 deletions cmake/external/lapack.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,34 @@ elseif(WIN32)
set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran-3.dll")
set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dll")
set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dll")
else()
set(LAPACK_FILE
"lapack_mac_v3.10.0.20210628.tar.gz"
CACHE STRING "" FORCE)
set(LAPACK_URL
"https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
CACHE STRING "" FORCE)
set(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7)
set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib")
set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib")
else() # MacOS
if(APPLE AND WITH_ARM)
set(LAPACK_FILE
"lapack_mac_arm64_v0.3.26.tar.gz"
CACHE STRING "" FORCE)
set(LAPACK_URL
"https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
CACHE STRING "" FORCE)
set(LAPACK_URL_MD5 3f6412105ae2b7465e5ee90c8673e6d4)
set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.dylib")
set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.dylib")
else()
set(LAPACK_FILE
"lapack_mac_v3.10.0.20210628.tar.gz"
CACHE STRING "" FORCE)
set(LAPACK_URL
"https://paddlepaddledeps.bj.bcebos.com/${LAPACK_FILE}"
CACHE STRING "" FORCE)
set(LAPACK_URL_MD5 427aecf8dee8523de3566ca8e47944d7)
set(GNU_RT_LIB_1 "${LAPACK_LIB_DIR}/libquadmath.0.dylib")
set(GNU_RT_LIB_2 "${LAPACK_LIB_DIR}/libgcc_s.1.dylib")
set(GFORTRAN_LIB "${LAPACK_LIB_DIR}/libgfortran.5.dylib")
set(BLAS_LIB "${LAPACK_LIB_DIR}/libblas.3.dylib")
set(LAPACK_LIB "${LAPACK_LIB_DIR}/liblapack.3.dylib")
endif()
endif()

function(download_lapack)
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include(python_module)

check_py_version(${PY_VERSION})

# Find Python with mnimum PY_VERSION specified or will raise error!
# Find Python with minimum PY_VERSION specified or will raise error!
find_package(PythonInterp ${PY_VERSION} REQUIRED)
find_package(PythonLibs ${PY_VERSION} REQUIRED)

Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20240104")
endif()
if(NOT DEFINED XPU_XHPC_BASE_DATE)
set(XPU_XHPC_BASE_DATE "20240315")
set(XPU_XHPC_BASE_DATE "20240328")
endif()
set(XPU_XCCL_BASE_VERSION "1.1.8.1")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
Expand Down
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ function(op_library TARGET)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n")
# why change TARGET here?
# when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
# when building paddle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
# in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add
# and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h
# however, grad_add has no mkldnn kernel.
Expand Down
2 changes: 1 addition & 1 deletion cmake/simd.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# This file is use to check all support level of AVX on your machine
# so that PaddlePaddle can unleash the vectorization power of muticore.
# so that PaddlePaddle can unleash the vectorization power of multicore.

include(CheckCXXSourceRuns)
include(CheckCXXSourceCompiles)
Expand Down
4 changes: 2 additions & 2 deletions paddle/cinn/adt/adapter_dynamic_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
#include "paddle/cinn/adt/adt.h"
#include "paddle/cinn/adt/dim_expr.h"
#include "paddle/cinn/adt/symbolic_dim.h"
#include "paddle/cinn/hlir/framework/pir/group.h"
#include "paddle/cinn/hlir/framework/pir/op_lowering_group.h"

namespace cinn::adt::adapter {

struct DynamicTensor final {
::pir::Value node_data;
const hlir::framework::pir::Group* group;
const hlir::framework::pir::OpLoweringGroup* group;

bool operator==(const DynamicTensor& other) const {
return this->node_data == other.node_data;
Expand Down
34 changes: 19 additions & 15 deletions paddle/cinn/adt/generate_map_expr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ bool HasDynamicShape(const ::pir::Value& tensor) {
return false;
}

List<Arg> MakeOpStmtInputList(const ::pir::Operation* op,
const hlir::framework::pir::Group* group) {
List<Arg> MakeOpStmtInputList(
const ::pir::Operation* op,
const hlir::framework::pir::OpLoweringGroup* group) {
List<Arg> ret{};

VisitEachInputTensor(op, [&](const ::pir::Value& tensor) {
Expand All @@ -131,8 +132,9 @@ void VisitEachOutputTensor(const ::pir::Operation* op, const DoEachT& DoEach) {
}
}

List<Arg> MakeOpStmtOutputList(const ::pir::Operation* op,
const hlir::framework::pir::Group* group) {
List<Arg> MakeOpStmtOutputList(
const ::pir::Operation* op,
const hlir::framework::pir::OpLoweringGroup* group) {
List<Arg> ret{};

VisitEachOutputTensor(op, [&](const ::pir::Value& tensor) {
Expand All @@ -147,9 +149,10 @@ List<Arg> MakeOpStmtOutputList(const ::pir::Operation* op,
}

template <typename DoEachT>
void VisitEachOpStmt(const std::shared_ptr<hlir::framework::pir::Group>& group,
const DoEachT& DoEach) {
for (const auto* op : group->CollectOps()) {
void VisitEachOpStmt(
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group,
const DoEachT& DoEach) {
for (const auto* op : group->ops()) {
DoEach(OpStmt{MakeOp(op),
MakeOpStmtInputList(op, group.get()),
MakeOpStmtOutputList(op, group.get())});
Expand Down Expand Up @@ -187,7 +190,7 @@ void CollectRewrittenOpStmts(const OpStmt& op_stmt, List<OpStmt>* ret) {
}

List<OpStmt> MakeOpStmts(
const std::shared_ptr<hlir::framework::pir::Group>& group) {
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
List<OpStmt> ret{};

VisitEachOpStmt(group, [&](const auto& op_stmt) {
Expand Down Expand Up @@ -223,7 +226,7 @@ std::shared_ptr<IGroup> MakeIGroup(const AnchorGroup& igroup_spec) {
}

std::vector<std::shared_ptr<IGroup>> GenerateIGroups(
const std::shared_ptr<hlir::framework::pir::Group>& group) {
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
std::vector<std::shared_ptr<IGroup>> ret{};

List<OpStmt> op_stmts = MakeOpStmts(group);
Expand All @@ -237,7 +240,7 @@ std::vector<std::shared_ptr<IGroup>> GenerateIGroups(
}

std::shared_ptr<KGroup> GenerateKGroups(
const std::shared_ptr<hlir::framework::pir::Group>& group,
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group,
const std::vector<std::shared_ptr<IGroup>>& igroups) {
CHECK_EQ(igroups.size(), 1);
return std::make_shared<KGroup>(group, igroups);
Expand Down Expand Up @@ -352,15 +355,15 @@ Tensor GetAnchorTensor(const std::shared_ptr<IGroup>& igroup) {
}

template <typename DoEachT>
void VisitInputTensor(const hlir::framework::pir::Group& group,
void VisitInputTensor(const hlir::framework::pir::OpLoweringGroup& group,
const DoEachT& DoEach) {
for (const ::pir::Value& node_data : group.GetInputOpValues()) {
DoEach(node_data);
}
}

template <typename DoEachT>
void VisitOutputTensor(const hlir::framework::pir::Group& group,
void VisitOutputTensor(const hlir::framework::pir::OpLoweringGroup& group,
const DoEachT& DoEach) {
for (const ::pir::Value& node_data : group.GetOutputOpValues()) {
DoEach(node_data);
Expand Down Expand Up @@ -444,7 +447,7 @@ MapExpr GenerateMapExpr(const std::shared_ptr<KGroup>& kgroup) {
} // namespace

MapExpr GenerateMapExpr(
const std::shared_ptr<hlir::framework::pir::Group>& group) {
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& group) {
const auto& igroups = GenerateIGroups(group);

const auto& kgroup = GenerateKGroups(group, igroups);
Expand All @@ -453,13 +456,14 @@ MapExpr GenerateMapExpr(
}

void TryGenerateMapExprFromGroup(
const std::shared_ptr<hlir::framework::pir::Group>& fusion_group) {
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>&
fusion_group) {
if (!FLAGS_cinn_enable_map_expr) {
return;
}
const auto& map_expr = GenerateMapExpr(fusion_group);
VLOG(4) << "Generate MapExpr: \n"
<< ToTxtString(map_expr, fusion_group->group_id);
<< ToTxtString(map_expr, fusion_group->group_id());
fusion_group->set_map_expr_ctx(std::make_shared<MapExprCtx>(map_expr));
}

Expand Down
7 changes: 3 additions & 4 deletions paddle/cinn/adt/generate_map_expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,16 @@

namespace cinn::hlir::framework::pir {

struct Group;
using GroupList = std::vector<std::shared_ptr<Group>>;
struct OpLoweringGroup;

} // namespace cinn::hlir::framework::pir

namespace cinn::adt {

MapExpr GenerateMapExpr(
const std::shared_ptr<cinn::hlir::framework::pir::Group>& group);
const std::shared_ptr<cinn::hlir::framework::pir::OpLoweringGroup>& group);

void TryGenerateMapExprFromGroup(
const std::shared_ptr<hlir::framework::pir::Group>& fusion_group);
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& fusion_group);

} // namespace cinn::adt
8 changes: 4 additions & 4 deletions paddle/cinn/adt/kgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

namespace cinn::hlir::framework::pir {

struct Group;
struct OpLoweringGroup;

} // namespace cinn::hlir::framework::pir

Expand All @@ -39,11 +39,11 @@ using cinn::adt::LoopDescriptors;
class KGroup final {
public:
explicit KGroup(
const std::shared_ptr<hlir::framework::pir::Group>& cinn_group,
const std::shared_ptr<hlir::framework::pir::OpLoweringGroup>& cinn_group,
const std::vector<std::shared_ptr<IGroup>>& igroups)
: cinn_group_(cinn_group), igroups_(igroups) {}

std::shared_ptr<hlir::framework::pir::Group> cinn_group() const {
std::shared_ptr<hlir::framework::pir::OpLoweringGroup> cinn_group() const {
return CHECK_NOTNULL(cinn_group_.lock());
}

Expand All @@ -58,7 +58,7 @@ class KGroup final {
const std::shared_ptr<IGroup>& igroup) const;

private:
std::weak_ptr<hlir::framework::pir::Group> cinn_group_;
std::weak_ptr<hlir::framework::pir::OpLoweringGroup> cinn_group_;
// NOTE: Use single igroup temporarily. Actually KGroup contains
// multiple IGroups
std::vector<std::shared_ptr<IGroup>> igroups_;
Expand Down
Loading

0 comments on commit b9d874f

Please sign in to comment.