Skip to content

Commit

Permalink
Merge commit 'a384baf530dc711d56322e77bcb71aaea87d5666' into complex_…
Browse files Browse the repository at this point in the history
…inv_op
  • Loading branch information
BeingGod committed Oct 10, 2023
2 parents 6d40646 + a384baf commit b63ad04
Show file tree
Hide file tree
Showing 948 changed files with 21,202 additions and 32,282 deletions.
3 changes: 2 additions & 1 deletion cmake/cinn.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ if(PUBLISH_LIBS)
set(core_includes
"${core_includes};paddle/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh")
set(core_includes
"${core_includes};paddle/utils/flags.h;paddle/utils/flags_native.h")
"${core_includes};paddle/utils/flags.h;paddle/utils/flags_native.h;paddle/utils/test_macros.h"
)
foreach(header ${core_includes})
get_filename_component(prefix ${header} DIRECTORY)
file(COPY ${header}
Expand Down
4 changes: 3 additions & 1 deletion cmake/external/brpc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
# limitations under the License.

include(ExternalProject)
set(OPENSSL_USE_STATIC_LIBS ON)
if(NOT WITH_ARM)
set(OPENSSL_USE_STATIC_LIBS ON)
endif()
find_package(OpenSSL REQUIRED)

message(STATUS "ssl:" ${OPENSSL_SSL_LIBRARY})
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")

if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20230907")
set(XPU_BASE_DATE "20230926")
endif()
set(XPU_XCCL_BASE_VERSION "1.0.53.6")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
Expand Down
62 changes: 62 additions & 0 deletions cmake/generic.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@
#
# go_library(example SHARED)
#
# To build a unit test binary, which is an executable binary with libpaddle.so
# automatically linked:
#
# paddle_test(example SRCS example_test.cc)
#

# including binary directory for generated headers.
include_directories(${CMAKE_CURRENT_BINARY_DIR})
Expand Down Expand Up @@ -466,6 +471,7 @@ function(cc_test_build TARGET_NAME)
list(REMOVE_ITEM cc_test_DEPS python)
target_link_libraries(${TARGET_NAME} ${PYTHON_LIBRARIES})
endif()
target_compile_definitions(${TARGET_NAME} PUBLIC STATIC_PADDLE)
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}
Expand Down Expand Up @@ -578,6 +584,62 @@ function(cc_test_old TARGET_NAME)
endif()
endfunction()

function(paddle_test_build TARGET_NAME)
if(WITH_TESTING)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(paddle_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${paddle_test_SRCS})
get_property(paddle_lib GLOBAL PROPERTY PADDLE_LIB_NAME)
target_link_libraries(${TARGET_NAME} $<TARGET_LINKER_FILE:${paddle_lib}>
${paddle_test_DEPS} paddle_gtest_main_new)
add_dependencies(${TARGET_NAME} ${paddle_lib} ${paddle_test_DEPS}
paddle_gtest_main_new)
if(WITH_SHARED_PHI)
target_link_libraries(${TARGET_NAME} $<TARGET_LINKER_FILE:phi>)
add_dependencies(${TARGET_NAME} phi)
endif()
if(WITH_SHARED_IR)
target_link_libraries(${TARGET_NAME} $<TARGET_LINKER_FILE:pir>)
add_dependencies(${TARGET_NAME} pir)
endif()
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
target_link_libraries(${TARGET_NAME} ${PYTHON_LIBRARIES})
endif()
if(WITH_CINN AND NOT CINN_ONLY)
target_link_libraries(${TARGET_NAME} $<TARGET_LINKER_FILE:cinnapi>)
add_dependencies(${TARGET_NAME} cinnapi)
endif()
if(WITH_XPU)
target_link_libraries(${TARGET_NAME} xpulib)
endif()
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
if(APPLE)
target_link_libraries(
${TARGET_NAME}
"-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}> -Wl,-rpath,$<TARGET_FILE_DIR:phi> -Wl,-rpath,$<TARGET_FILE_DIR:pir>"
)
endif()
common_link(${TARGET_NAME})
check_coverage_opt(${TARGET_NAME} ${paddle_test_SRCS})
endif()
endfunction()

function(paddle_test TARGET_NAME)
if(WITH_TESTING)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(paddle_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
paddle_test_build(${TARGET_NAME} SRCS ${paddle_test_SRCS} DEPS
${paddle_test_DEPS})
cc_test_run(${TARGET_NAME} COMMAND ${TARGET_NAME} ARGS ${paddle_test_ARGS})
endif()
endfunction()

function(nv_library TARGET_NAME)
if(WITH_GPU)
set(options STATIC static SHARED shared)
Expand Down
4 changes: 4 additions & 0 deletions cmake/inference_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flags.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/test_macros.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
Expand Down
85 changes: 84 additions & 1 deletion paddle/cinn/ast_gen_ius/ast_gen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
#include "paddle/cinn/ast_gen_ius/ast_gen.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/operation.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/ir/utils/ir_printer.h"
#include "paddle/cinn/lang/compute.h"
#include "paddle/cinn/optim/replace_var_with_expr.h"

namespace cinn {
namespace ast_gen_ius {
Expand Down Expand Up @@ -84,11 +85,75 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
tensor_group->MarkShareMemBuffer(tensor, init_tensor);
tensor_group->CtrlDepend(tensor, init_tensor);
Expr init_body = ir::Store::Make(init_tensor, init_value, axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> block_vars;
std::vector<ir::Expr> iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
block_vars.push_back(Var(Expr(0),
shape[i],
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
optim::ReplaceVarWithExpr(&init_body, axis[i], block_vars[i]);
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
}
}
init_body = ir::ScheduleBlockRealize::Make(
iter_values,
ir::ScheduleBlock::Make(
block_vars, {}, {}, reduce_init_name, init_body));

// For the remaining reduce axis, make reduce body
const std::vector<ir::Var>& reduce_axis = tensor->reduce_axis;
ir::Expr reduce_body =
ConvertReduceBody(tensor->body(), tensor, axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> reduce_block_vars;
std::vector<ir::Expr> reduce_iter_values;
// reduce body and reduce init schedule block should have different objects
// for same axis so we re-create objects
std::vector<Var> reduce_axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
reduce_block_vars.push_back(Var(Expr(0),
shape[i],
cinn::UniqName("i" + std::to_string(i)),
/*is_reduce = */ false));
reduce_axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
reduce_iter_values.push_back(Expr(0));
} else {
reduce_iter_values.push_back(axis_vars[i]);
}
}
for (int i = 0; i < reduce_axis.size(); ++i) {
int count = shape.size() + i;
reduce_block_vars.push_back(
Var(reduce_axis[i]->lower_bound,
reduce_axis[i]->upper_bound,
cinn::UniqName("i" + std::to_string(count)),
/*is_reduce = */ true));
ir::Var reduce_axis_var = reduce_axis[i];
reduce_axis_var->is_reduce_axis = true;
reduce_iter_values.push_back(reduce_axis_var);
}
for (int i = 0; i < axis.size(); ++i) {
optim::ReplaceVarWithExpr(&reduce_body, axis[i], reduce_block_vars[i]);
}
for (int i = axis.size(); i < reduce_block_vars.size(); ++i) {
optim::ReplaceVarWithExpr(
&reduce_body, reduce_axis[i - axis.size()], reduce_block_vars[i]);
}

reduce_body = ir::ScheduleBlockRealize::Make(
reduce_iter_values,
ir::ScheduleBlock::Make(
reduce_block_vars, {}, {}, tensor->name, reduce_body));
for (int i = static_cast<int>(reduce_axis.size()) - 1; i >= 0; --i) {
reduce_body = ir::For::Make(reduce_axis[i],
reduce_axis[i]->lower_bound,
Expand All @@ -114,6 +179,24 @@ ir::Expr AstGen::Build(const ir::Tensor& tensor, TensorGroup* tensor_group) {
return body;
} else {
ir::Expr body = ir::Store::Make(tensor, tensor->body(), axis_exprs);
// create schedule block itervars, i0,i1...
std::vector<ir::Var> block_vars;
std::vector<ir::Expr> iter_values;
std::vector<Var> axis_vars = common::GenDefaultAxis(axis_len);
for (int i = 0; i < shape.size(); ++i) {
block_vars.push_back(Var(
Expr(0), shape[i], cinn::UniqName("i" + std::to_string(i)), false));
optim::ReplaceVarWithExpr(&body, axis[i], block_vars[i]);
axis_vars[i]->is_reduce_axis = false;
if (shape[i] == Expr(1)) {
iter_values.push_back(Expr(0));
} else {
iter_values.push_back(axis_vars[i]);
}
}
body = ir::ScheduleBlockRealize::Make(
iter_values,
ir::ScheduleBlock::Make(block_vars, {}, {}, tensor->name, body));
for (int i = static_cast<int>(axis_len) - 1; i >= 0; --i) {
ir::Var loop_var = axis[i];
ir::Expr loop_extent = shape[i];
Expand Down
94 changes: 81 additions & 13 deletions paddle/cinn/ast_gen_ius/tensor_group.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,37 @@
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/ir/utils/ir_nodes_collector.h"
#include "paddle/cinn/poly/stage.h"

namespace cinn {
namespace ast_gen_ius {

TensorGroup::TensorGroup(const std::vector<ir::Tensor>& tensors) {
std::set<ir::Tensor> all_tensors(tensors.begin(), tensors.end());

for (auto& tensor : tensors) {
for (const ir::Tensor& tensor : tensors) {
output_tensor_names_.insert(tensor->name);
std::set<ir::Expr> used_tensors = ir::ir_utils::CollectIRNodes(
tensor->body(), [](const Expr* x) { return x->as_tensor(); });
for (const Expr& x : used_tensors) {
const ir::Tensor to_dep = x.as_tensor_ref();
all_tensors.insert(to_dep);
this->CtrlDepend(tensor, to_dep);
this->Insert(tensor);
}
}

void TensorGroup::ShowLog() const {
VLOG(6) << "Showing log for TensorGroup";
for (auto& p : name_to_tensor_) {
VLOG(6) << "Tensor name = " << p.first << " depends on {";
if (ctrl_dep_.count(p.first)) {
for (auto& dep_name : ctrl_dep_.at(p.first)) {
VLOG(6) << dep_name;
}
}
VLOG(6) << "}";
}
}

for (const ir::Tensor& t : all_tensors) {
name_to_tensor_.insert({t->name, t});
TensorGroup::TensorGroup(
const std::unordered_map<std::string, ir::Tensor>& tensor_map) {
for (const auto& map_pair : tensor_map) {
const ir::Tensor& tensor = map_pair.second;
output_tensor_names_.insert(tensor->name);
this->Insert(tensor);
}
}

Expand All @@ -51,7 +62,23 @@ bool TensorGroup::Contain(const std::string& name) const {
}

void TensorGroup::Insert(const ir::Tensor& tensor) {
name_to_tensor_.insert({tensor->name, tensor});
if (!name_to_tensor_.count(tensor->name)) {
name_to_tensor_.insert({tensor->name, tensor});
}

// Using set to de-duplicate
std::set<ir::Tensor> dep_tensors;
std::set<ir::Expr> used_tensors = ir::ir_utils::CollectIRNodes(
tensor->body(), [](const Expr* x) { return x->as_tensor(); });
for (const Expr& x : used_tensors) {
const ir::Tensor to_dep = x.as_tensor_ref();
dep_tensors.insert(to_dep);
this->CtrlDepend(tensor, to_dep);
}

for (const ir::Tensor& t : dep_tensors) {
this->Insert(t);
}
}

ir::Tensor TensorGroup::Get(const std::string& name) {
Expand All @@ -72,6 +99,8 @@ std::vector<ir::Tensor> TensorGroup::GetGenFuncTopoOrder(
for (const auto& dep_pair : ctrl_dep_) {
const std::unordered_set<std::string>& dep_tensor_names = dep_pair.second;
in_degree[dep_pair.first] = dep_tensor_names.size();
VLOG(6) << "indegree[" << dep_pair.first
<< "] = " << dep_tensor_names.size();
}

std::vector<ir::Tensor> ret;
Expand All @@ -95,7 +124,6 @@ std::vector<ir::Tensor> TensorGroup::GetGenFuncTopoOrder(
while (!node_set.empty()) {
const std::string cur = *(node_set.begin());
node_set.erase(node_set.begin());

if (!input_arg_names.count(cur)) {
ret.push_back(name_to_tensor_[cur]);
}
Expand Down Expand Up @@ -187,5 +215,45 @@ absl::flat_hash_map<std::string, ir::Tensor> TensorGroup::AllocateBuffers() {
return name_to_tensor_;
}

void StageMapShareMemory(const poly::StageMap& stages) {
absl::flat_hash_map<std::string, ir::_Tensor_*> tensor_map;
for (auto& stage : stages) {
tensor_map[stage.second->tensor()->name] = stage.second->tensor();
}
for (auto& stage : stages) {
if (!stage.second->tensor()->buffer.defined() &&
!stage.second->meta.tensors_to_share_buffer_with.empty()) {
for (auto& str : stage.second->meta.tensors_to_share_buffer_with) {
if (tensor_map[str]->buffer.defined()) {
auto edited_shape = tensor_map[str]->buffer->shape;
stage.second->tensor()->Bind(tensor_map[str]->buffer);
tensor_map[str]->buffer->shape = edited_shape;
VLOG(3) << "Stage Tensor " << stage.second->tensor()->name
<< " bind buffer to " << tensor_map[str]->name << " , "
<< tensor_map[str]->buffer->name;
}
}
}
}
}

TensorGroup ConvertStageMapToTensorGroup(const poly::StageMap& stage_map) {
std::vector<ir::Tensor> stage_tensors;
std::set<ir::Tensor> reshape_tensors;
for (auto iter = stage_map.begin(); iter != stage_map.end(); ++iter) {
if (iter->second->has_expression()) {
const std::string& tensor_name = iter->first;
stage_tensors.push_back(ir::Tensor(iter->second->tensor()));
if (utils::Endswith(tensor_name, "_reshape")) {
reshape_tensors.insert(ir::Tensor(iter->second->tensor()));
}
}
}

ast_gen_ius::TensorGroup tensor_group(stage_tensors);
StageMapShareMemory(stage_map);
return tensor_group;
}

} // namespace ast_gen_ius
} // namespace cinn
Loading

0 comments on commit b63ad04

Please sign in to comment.