-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Intermediate Kernel API for refactor Tensor Lib #36914
Changes from all commits
3f545c4
1f4ea40
44bf926
b20689d
79d2a1a
434136f
6c6ee22
013c3fb
33bba06
d895a11
62ebf01
7c09726
7ae7f2f
288efc2
be3ddd5
3386c49
4ef6be5
b69066e
1d4f90e
c732d57
374345f
bbb6473
0e18ff4
805896b
fc4442b
cefe30a
aa3e79b
a1753a0
05a82e7
90e9090
a94eefd
19da152
021a505
f24e45e
44acc84
2b66ab4
2a5ce9b
39b7d06
d4dec61
461f146
35aee9a
ddfbbdd
57bcd67
7d82352
d55bb4b
d9476dd
193ee9d
f2db581
db6ff09
80bf6b8
9031ab3
f7bbaca
cea19d0
568bebd
0eedc92
509d13e
7146f92
321b141
57a14c6
c3ebfea
b67de9c
13c02aa
1987ce9
33a4c41
c32fde9
a4e53ef
1d9f33f
b0cf02c
95a612e
83d6f77
dad5e61
027f0b2
8add5e4
01b5ded
71a3403
4663033
be15b02
8371096
f1f6c8e
5547b44
dd3323d
65e68c6
caaed19
c9a3f38
46b7762
4253f49
4e871ea
395a50f
817f052
7f640a6
bf0f99b
73de891
e9b219d
9789890
9b33270
aa6ed57
9db8e4a
12c1178
c882b5c
e30ca2a
46ba70c
073aef3
06789ba
3f5f789
2309149
6ce92e5
e0322d5
d3ab655
ddc7de8
37791f7
28a6374
e3e2b50
e0710fd
ff19bd0
1dd0145
b77d1ee
320b5f1
5b2999f
466ce03
beec280
a49fd44
373f9c1
ce210b4
4e71d15
04cf058
ab8db2d
f1c9661
d3674e9
4e2c0dd
bbe59bc
76a588e
fb224ab
252fb79
19b1095
24ef6c5
1685b67
7b7e988
7c41b15
52fead0
2ff2721
e3ed2c6
b5c77e5
471ae40
5240ac0
5fb285c
e2731a0
16e6bf1
dfec7a0
3b2e950
aa8a3dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,5 +14,26 @@ | |
|
||
#pragma once | ||
|
||
#include "paddle/pten/api/include/infershape.h" | ||
#include "paddle/pten/hapi/lib/utils/allocator.h" | ||
#include "paddle/pten/kernels/cpu/creation.h" | ||
#include "paddle/pten/kernels/cuda/creation.h" | ||
|
||
namespace pten { | ||
|
||
// TODO(YuanRisheng) This function name should be same as User API name. | ||
// TODO(zyfncg) Automatic code generation | ||
template <typename T, typename ContextT> | ||
DenseTensor FillAnyLike(const ContextT& dev_ctx, | ||
const DenseTensor& x, | ||
const Scalar& val) { | ||
auto out_meta = UnchangedInferShape(x.meta()); | ||
const auto allocator = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [TODO] 这里后续我改为全局单例,减小开销 |
||
std::make_shared<paddle::experimental::DefaultAllocator>( | ||
dev_ctx.GetPlace()); | ||
pten::DenseTensor dense_out(allocator, out_meta); | ||
FillAnyLike<T>(dev_ctx, x, val, &dense_out); | ||
return dense_out; | ||
} | ||
|
||
} // namespace pten |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,5 +15,62 @@ limitations under the License. */ | |
#pragma once | ||
|
||
// See Note: [ How do we organize the kernel directory ] | ||
#include "paddle/pten/api/include/infershape.h" | ||
#include "paddle/pten/hapi/lib/utils/allocator.h" | ||
#include "paddle/pten/kernels/cpu/math.h" | ||
#include "paddle/pten/kernels/cuda/math.h" | ||
|
||
namespace pten { | ||
|
||
template <typename T, typename ContextT> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [TODO] 后续这里的代码也需要自动生成,需要综合考虑 @zyfncg |
||
DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) { | ||
auto out_meta = UnchangedInferShape(x.meta()); | ||
const auto allocator = | ||
std::make_shared<paddle::experimental::DefaultAllocator>( | ||
dev_ctx.GetPlace()); | ||
pten::DenseTensor dense_out(allocator, out_meta); | ||
Sign<T>(dev_ctx, x, &dense_out); | ||
return dense_out; | ||
} | ||
|
||
template <typename T, typename ContextT> | ||
DenseTensor Mean(const ContextT& dev_ctx, const DenseTensor& x) { | ||
auto out_meta = ReductionInferShape(x.meta()); | ||
const auto allocator = | ||
std::make_shared<paddle::experimental::DefaultAllocator>( | ||
dev_ctx.GetPlace()); | ||
pten::DenseTensor dense_out(allocator, out_meta); | ||
Mean<T>(dev_ctx, x, &dense_out); | ||
return dense_out; | ||
} | ||
|
||
template <typename T, typename ContextT> | ||
DenseTensor Scale(const ContextT& dev_ctx, | ||
const DenseTensor& x, | ||
float scale, | ||
float bias, | ||
bool bias_after_scale) { | ||
auto out_meta = UnchangedInferShape(x.meta()); | ||
const auto allocator = | ||
std::make_shared<paddle::experimental::DefaultAllocator>( | ||
dev_ctx.GetPlace()); | ||
pten::DenseTensor dense_out(allocator, out_meta); | ||
Scale<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); | ||
return dense_out; | ||
} | ||
|
||
template <typename T, typename ContextT> | ||
DenseTensor Scale(const ContextT& dev_ctx, | ||
const DenseTensor& x, | ||
const DenseTensor& scale, | ||
float bias, | ||
bool bias_after_scale) { | ||
auto out_meta = UnchangedInferShape(x.meta()); | ||
const auto allocator = | ||
std::make_shared<paddle::experimental::DefaultAllocator>( | ||
dev_ctx.GetPlace()); | ||
pten::DenseTensor dense_out(allocator, out_meta); | ||
ScaleHost<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); | ||
return dense_out; | ||
} | ||
} // namespace pten |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,8 @@ limitations under the License. */ | |
#include "paddle/pten/core/kernel_registry.h" | ||
#include "paddle/pten/hapi/lib/utils/allocator.h" | ||
|
||
#include "paddle/pten/api/include/linalg.h" | ||
|
||
PT_DECLARE_MODULE(LinalgCPU); | ||
|
||
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) | ||
|
@@ -82,3 +84,55 @@ TEST(API, dot) { | |
ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f); | ||
ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f); | ||
} | ||
|
||
// TODO(YuanRisheng) This unitest should be created in other file. | ||
// It is convenient to make compilation decoupling. | ||
TEST(DEV_API, dot) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [TODO] 外部API的单测和内部API的单测需要分开管理,便于编译解耦 |
||
// 1. create tensor | ||
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>( | ||
paddle::platform::CPUPlace()); | ||
pten::DenseTensor dense_x(alloc, | ||
pten::DenseTensorMeta(pten::DataType::FLOAT32, | ||
framework::make_ddim({3, 10}), | ||
pten::DataLayout::NCHW)); | ||
auto* dense_x_data = dense_x.mutable_data<float>(); | ||
|
||
pten::DenseTensor dense_y(alloc, | ||
pten::DenseTensorMeta(pten::DataType::FLOAT32, | ||
framework::make_ddim({3, 10}), | ||
pten::DataLayout::NCHW)); | ||
auto* dense_y_data = dense_y.mutable_data<float>(); | ||
|
||
float sum[3] = {0.0, 0.0, 0.0}; | ||
for (size_t i = 0; i < 3; ++i) { | ||
for (size_t j = 0; j < 10; ++j) { | ||
dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; | ||
dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0; | ||
sum[i] += (i * 10 + j) * (i * 10 + j) * 1.0; | ||
} | ||
} | ||
|
||
paddle::platform::DeviceContextPool& pool = | ||
paddle::platform::DeviceContextPool::Instance(); | ||
auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); | ||
|
||
// 2. test API | ||
auto out = pten::Dot<float>( | ||
*(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)), | ||
dense_x, | ||
dense_y); | ||
|
||
// 3. check result | ||
ASSERT_EQ(out.dims().size(), 2); | ||
ASSERT_EQ(out.dims()[0], 3); | ||
ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); | ||
ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); | ||
|
||
auto expect_result = sum; | ||
auto actual_result0 = out.data<float>()[0]; | ||
auto actual_result1 = out.data<float>()[1]; | ||
auto actual_result2 = out.data<float>()[2]; | ||
ASSERT_NEAR(expect_result[0], actual_result0, 1e-6f); | ||
ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f); | ||
ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[TODO] 后续我们需要解决这个问题,API和Kernel函数命名尽可能一致,不参照原先op的命名