Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… concat
  • Loading branch information
Patrick-Star125 committed Oct 19, 2023
2 parents fac7400 + 4dbd3f7 commit 8a051f8
Show file tree
Hide file tree
Showing 888 changed files with 46,986 additions and 8,404 deletions.
8 changes: 4 additions & 4 deletions .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ bugprone-integer-division,
bugprone-misplaced-widening-cast,
-bugprone-move-forwarding-reference,
-bugprone-multiple-statement-macro,
-bugprone-narrowing-conversions,
bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-parent-virtual-call,
-bugprone-posix-return,
Expand Down Expand Up @@ -155,7 +155,7 @@ cppcoreguidelines-avoid-c-arrays,
-cppcoreguidelines-avoid-goto,
cppcoreguidelines-c-copy-assignment-signature,
cppcoreguidelines-explicit-virtual-functions,
-cppcoreguidelines-init-variables,
cppcoreguidelines-init-variables,
cppcoreguidelines-narrowing-conversions,
cppcoreguidelines-no-malloc,
-cppcoreguidelines-pro-type-const-cast,
Expand Down Expand Up @@ -189,12 +189,12 @@ modernize-use-override,
modernize-use-transparent-functors,
-modernize-use-uncaught-exceptions,
performance-faster-string-find,
-performance-for-range-copy,
performance-for-range-copy,
-performance-implicit-conversion-in-loop,
-performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
-performance-inefficient-vector-operation,
-performance-move-const-arg,
performance-move-const-arg,
-performance-move-constructor-init,
-performance-no-automatic-move,
performance-noexcept-move-constructor,
Expand Down
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ per-file-ignores =
# These files need tabs for testing.
test/dygraph_to_static/test_error.py:E101,W191

# Ignore compare with True in sot unittest
test/sot/test_dup_top.py:E712

# temp ignore base directory
python/paddle/base/*:
E712,
Expand Down
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,7 @@
path = third_party/jitify
url = https://github.com/NVIDIA/jitify.git
ignore = dirty
[submodule "third_party/cccl"]
path = third_party/cccl
url = https://github.com/NVIDIA/cccl.git
ignore = dirty
31 changes: 31 additions & 0 deletions cmake/external/cccl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
include(ExternalProject)

set(CCCL_PATH
"${THIRD_PARTY_PATH}/cccl"
CACHE STRING "A path setting for external_cccl path.")
set(CCCL_PREFIX_DIR ${CCCL_PATH})
set(CCCL_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/cccl)

# The latest commit has bugs in windows, so we set a fix commit.
set(CCCL_TAG 1f6e4bcae0fbf1bbed87f88544d8d2161c490fc1)
execute_process(COMMAND git --git-dir=${CCCL_SOURCE_DIR}/.git
--work-tree=${CCCL_SOURCE_DIR} checkout ${CCCL_TAG})

set(CCCL_INCLUDE_DIR ${CCCL_SOURCE_DIR})
message("CCCL_INCLUDE_DIR is ${CCCL_INCLUDE_DIR}")
include_directories(${CCCL_INCLUDE_DIR})

ExternalProject_Add(
extern_cccl
${EXTERNAL_PROJECT_LOG_ARGS}
SOURCE_DIR ${CCCL_SOURCE_DIR}
PREFIX ${CCCL_PREFIX_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND "")

add_library(cccl INTERFACE)

add_dependencies(cccl extern_cccl)
14 changes: 9 additions & 5 deletions cmake/external/openblas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,16 @@ set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
set(CBLAS_SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/openblas)
set(CBLAS_TAG v0.3.7)

# OpenBLAS support Raptor Lake from v0.3.22
if(UNIX
AND NOT APPLE
AND NOT WITH_ROCM
# Why use v0.3.18? The IDG business line encountered a random openblas error,
# which can be resolved after upgrading openblas.
# And why compile when gcc>8.2? Please refer to
# https://github.com/spack/spack/issues/19932#issuecomment-733452619
# v0.3.18 only support gcc>=8.3 or gcc>=7.4
if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.2
AND NOT WITH_XPU)
set(CBLAS_TAG v0.3.23)
# We only compile with openblas 0.3.18 when gcc >= 8.3
set(CBLAS_TAG v0.3.18)
endif()

if(APPLE AND WITH_ARM)
Expand Down
3 changes: 3 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,9 @@ function(math_library TARGET)
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
list(APPEND math_common_deps cub)
elseif(${CMAKE_CUDA_COMPILER_VERSION} EQUAL 12.0
OR ${CMAKE_CUDA_COMPILER_VERSION} GREATER 12.0)
list(APPEND math_common_deps cccl)
else()
list(APPEND math_common_deps)
endif()
Expand Down
12 changes: 12 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ if(NOT DEFINED WITH_MKLDNN)
endif()
endif()

if(WIN32)
if(MSVC)
if(MSVC_VERSION LESS 1920)
set(WITH_MKLDNN OFF)
endif()
endif()
endif()

if(WIN32
OR APPLE
OR NOT WITH_GPU
Expand Down Expand Up @@ -375,6 +383,10 @@ if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
elseif(${CMAKE_CUDA_COMPILER_VERSION} EQUAL 12.0
OR ${CMAKE_CUDA_COMPILER_VERSION} GREATER 12.0)
include(external/cccl)
list(APPEND third_party_deps extern_cccl)
endif()
set(URL
"https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz"
Expand Down
2 changes: 2 additions & 0 deletions paddle/cinn/backends/compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ void Compiler::CompileCudaModule(const Module& module,
auto fn_kernel = cuda_module_->GetFunction(0, kernel_fn_name);
CHECK(fn_kernel);

fn_ptr_.push_back(reinterpret_cast<void*>(fn_kernel));

symbols.RegisterVar(kernel_fn_name + "_ptr_",
reinterpret_cast<void*>(fn_kernel));
}
Expand Down
3 changes: 3 additions & 0 deletions paddle/cinn/backends/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class Compiler final {
*/
void* Lookup(absl::string_view fn_name);

std::vector<void*> GetFnPtr() const { return fn_ptr_; }

private:
void CompileCudaModule(const ir::Module& module,
const std::string& code = "");
Expand All @@ -136,6 +138,7 @@ class Compiler final {
Target target_;
std::unique_ptr<ExecutionEngine> engine_;

std::vector<void*> fn_ptr_;
#ifdef CINN_WITH_CUDA
std::unique_ptr<runtime::cuda::CUDAModule> cuda_module_;
#endif
Expand Down
1 change: 1 addition & 0 deletions paddle/cinn/hlir/dialect/operator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
add_subdirectory(ir)
add_subdirectory(transforms)
1 change: 1 addition & 0 deletions paddle/cinn/hlir/dialect/operator/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ if(NOT CINN_ONLY)
COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
COMMAND ${PYTHON_EXECUTABLE} ${cinn_op_gen_parsed_yaml_file} --op_yaml_path
${cinn_op_yaml_file} --output_path ${cinn_op_parsed_yaml_file}
DEPENDS ${cinn_op_gen_parsed_yaml_file} ${cinn_op_yaml_file}
VERBATIM)

add_custom_command(
Expand Down
22 changes: 22 additions & 0 deletions paddle/cinn/hlir/dialect/operator/ir/attribute_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,27 @@ struct GroupInfoAttributeStorage : public pir::AttributeStorage {
ParamKey data_;
};

struct JITInfoAttributeStorage : public pir::AttributeStorage {
using ParamKey = cinn::hlir::framework::newir::CUDAJITInfo;
explicit JITInfoAttributeStorage(const ParamKey& key) : data_(key) {}

static JITInfoAttributeStorage* Construct(const ParamKey& key) {
return new JITInfoAttributeStorage(key);
}

static std::size_t HashValue(const ParamKey& key) {
return std::hash<int64_t>()(*(reinterpret_cast<int64_t*>(key.fn_ptr)));
}

bool operator==(const ParamKey& key) const {
return data_.fn_ptr == key.fn_ptr;
}

const ParamKey& GetAsKey() const { return data_; }

private:
ParamKey data_;
};

} // namespace dialect
} // namespace cinn
2 changes: 1 addition & 1 deletion paddle/cinn/hlir/dialect/operator/ir/manual_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ std::vector<pir::Operation *> GroupOp::ops() {
inner_block->end());
}

void GroupOp::Verify() {}
void GroupOp::VerifySig() {}

void GroupOp::Print(pir::IrPrinter &printer) {
auto &os = printer.os;
Expand Down
2 changes: 1 addition & 1 deletion paddle/cinn/hlir/dialect/operator/ir/manual_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class GroupOp : public pir::Op<GroupOp> {
pir::Block *block();
std::vector<pir::Operation *> ops();

void Verify();
void VerifySig();
void Print(pir::IrPrinter &printer); // NOLINT
};

Expand Down
6 changes: 6 additions & 0 deletions paddle/cinn/hlir/dialect/operator/ir/op_attribute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,13 @@ namespace dialect {
const GroupInfo &GroupInfoAttribute::data() const {
return storage()->GetAsKey();
}

const cinn::hlir::framework::newir::CUDAJITInfo &CUDAJITInfoAttribute::data()
const {
return storage()->GetAsKey();
}
} // namespace dialect
} // namespace cinn

IR_DEFINE_EXPLICIT_TYPE_ID(cinn::dialect::GroupInfoAttribute)
IR_DEFINE_EXPLICIT_TYPE_ID(cinn::dialect::CUDAJITInfoAttribute)
15 changes: 15 additions & 0 deletions paddle/cinn/hlir/dialect/operator/ir/op_attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,22 @@ class GroupInfoAttribute : public pir::Attribute {
const GroupInfo& data() const;
};

class CUDAJITInfoAttribute : public pir::Attribute {
public:
using Attribute::Attribute;

DECLARE_ATTRIBUTE_UTILITY_FUNCTOR(CUDAJITInfoAttribute,
JITInfoAttributeStorage);

bool operator<(const CUDAJITInfoAttribute& right) const {
return storage() < right.storage();
}

const cinn::hlir::framework::newir::CUDAJITInfo& data() const;
};

} // namespace dialect
} // namespace cinn

IR_DECLARE_EXPLICIT_TYPE_ID(cinn::dialect::GroupInfoAttribute)
IR_DECLARE_EXPLICIT_TYPE_ID(cinn::dialect::CUDAJITInfoAttribute)
25 changes: 18 additions & 7 deletions paddle/cinn/hlir/dialect/operator/ir/op_dialect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,31 @@ void OperatorDialect::initialize() {
>();
RegisterOp<GroupOp>();
RegisterAttribute<GroupInfoAttribute>();
RegisterAttribute<CUDAJITInfoAttribute>();
}

void OperatorDialect::PrintType(pir::Type type, std::ostream &os) const {}

void OperatorDialect::PrintAttribute(pir::Attribute attr,
std::ostream &os) const {
os << "(" << attr.dialect().name();
os << '.';
if (auto group_info_attr = attr.dyn_cast<GroupInfoAttribute>()) {
const GroupInfo &data = group_info_attr.data();
os << "GroupInfo)"
<< "[" << data.fn_name << "]";
if (attr.isa<GroupInfoAttribute>()) {
os << "(" << attr.dialect().name();
os << '.';
if (auto group_info_attr = attr.dyn_cast<GroupInfoAttribute>()) {
const GroupInfo &data = group_info_attr.data();
os << "GroupInfo)"
<< "[" << data.fn_name << "]";
}
{ os << "<#AttrNotImplemented>"; }
} else if (attr.isa<CUDAJITInfoAttribute>()) {
auto cuda_jit_info = attr.dyn_cast<CUDAJITInfoAttribute>();

os << "(" << cuda_jit_info.data().fn_ptr;
os << ')';
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"cinn dialect only support GrupInfo and CUDAJITInfo"));
}
{ os << "<#AttrNotImplemented>"; }
}

void OperatorDialect::PrintOperation(pir::Operation *op,
Expand Down
27 changes: 22 additions & 5 deletions paddle/cinn/hlir/dialect/operator/ir/ops.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
- op : add
args : (Tensor x, Tensor y)
- op : broadcast
args : (Tensor x, int64_t[] broadcast_axes, int64_t[] out_shape)
output : Tensor(out)
infer_meta :
func : ElementwiseInferMeta
func : CINNBroadcastInferMeta
param : [x, broadcast_axes, out_shape]
kernel :
func : add
inplace : (x -> out)
func : expand
param : [x, broadcast_axes]

- op : reduce_max
args : (Tensor x, int64_t[] axis, bool keep_dim)
output : Tensor(out)
infer_meta :
func : ReduceInferMeta
kernel :
func : frobenius_norm

- op : reduce_sum
args : (Tensor x, int64_t[] axis, bool keep_dim)
output : Tensor(out)
infer_meta :
func : ReduceInferMeta
kernel :
func : frobenius_norm
10 changes: 10 additions & 0 deletions paddle/cinn/hlir/dialect/operator/transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
if(NOT CINN_ONLY)
cinn_cc_library(
op_with_group_merge_pass
SRCS
group_with_group_merge_pass.cc
op_with_group_merge_pass.cc
tensor_node.cc
DEPS
pd_op_dialect)
endif()
Loading

0 comments on commit 8a051f8

Please sign in to comment.