diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 3a87826337465b..9f021c45e9b54c 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -1482,8 +1482,8 @@ func : matrix_power_grad - backward_op : max_pool2d_with_index_grad - forward : max_pool2d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false) -> Tensor(out), Tensor(mask) - args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive) + forward : max_pool2d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false, bool fractional = false, float random_u = 0.0) -> Tensor(out), Tensor(mask) + args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive, bool fractional, float random_u) output : Tensor(x_grad) infer_meta : func : MaxPoolWithIndexGradInferMeta @@ -1491,8 +1491,8 @@ func : max_pool2d_with_index_grad - backward_op : max_pool3d_with_index_grad - forward : max_pool3d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false) -> Tensor(out), Tensor(mask) - args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive) + forward : max_pool3d_with_index(Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false, bool fractional = false, float random_u = 0.0) -> Tensor(out), Tensor(mask) + args : (Tensor x, Tensor mask, Tensor out_grad, int[] kernel_size, int[] strides, int[] paddings, bool global_pooling, bool adaptive, bool fractional, float random_u) output : Tensor(x_grad) infer_meta : func : MaxPoolWithIndexGradInferMeta diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 3c57adc52e1a1b..d3b483764dddd5 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -1682,7 +1682,7 @@ backward : matrix_power_grad - op : max_pool2d_with_index - args : (Tensor x, int[] kernel_size, int[] strides= {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false) + args : (Tensor x, int[] kernel_size, int[] strides= {1, 1}, int[] paddings = {0, 0}, bool global_pooling = false, bool adaptive = false, bool fractional = false, float random_u = 0.0) output : Tensor(out), Tensor(mask) infer_meta : func : MaxPoolWithIndexInferMeta @@ -1691,7 +1691,7 @@ backward : max_pool2d_with_index_grad - op : max_pool3d_with_index - args : (Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false) + args : (Tensor x, int[] kernel_size, int[] strides = {1, 1, 1}, int[] paddings = {0, 0, 0}, bool global_pooling = false, bool adaptive = false, bool fractional = false, float random_u = 0.0) output : Tensor(out), Tensor(mask) infer_meta : func : MaxPoolWithIndexInferMeta diff --git a/paddle/phi/infermeta/backward.cc b/paddle/phi/infermeta/backward.cc index 606ca86fad99ed..e48b3401d5f865 100644 --- a/paddle/phi/infermeta/backward.cc +++ b/paddle/phi/infermeta/backward.cc @@ -658,6 +658,8 @@ void MaxPoolWithIndexGradInferMeta(const MetaTensor& x, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, MetaTensor* dx) { dx->share_meta(x); } diff --git a/paddle/phi/infermeta/backward.h b/paddle/phi/infermeta/backward.h index c1d79f2378926e..3085f56e5145a4 100644 --- a/paddle/phi/infermeta/backward.h +++ b/paddle/phi/infermeta/backward.h @@ -310,6 +310,8 @@ void MaxPoolWithIndexGradInferMeta(const MetaTensor& x, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, MetaTensor* dx); void MeshgridGradInferMeta(const std::vector& inputs, diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 93a70d918ff775..92fd86bae9310e 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -2263,6 +2263,8 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, MetaTensor* out, MetaTensor* mask, MetaConfig config) { @@ -2309,7 +2311,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, kernel_size_.size())); std::vector output_shape({x_dims[0], x_dims[1]}); - if (adaptive) { + if (adaptive || fractional) { output_shape.insert( output_shape.end(), kernel_size_.begin(), kernel_size_.end()); } else { diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 82398a08c8b9bc..638a53247d431b 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -344,6 +344,8 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, MetaTensor* out, MetaTensor* mask, MetaConfig config = MetaConfig()); diff --git a/paddle/phi/kernels/funcs/pooling.cc b/paddle/phi/kernels/funcs/pooling.cc index 0573430c2010c5..891d2c8eea3e0c 100644 --- a/paddle/phi/kernels/funcs/pooling.cc +++ b/paddle/phi/kernels/funcs/pooling.cc @@ -1571,6 +1571,8 @@ class MaxPool2dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask) { const int batch_size = static_cast(input.dims()[0]); @@ -1592,6 +1594,26 @@ class MaxPool2dWithIndexFunctor { T1* output_data = context.template Alloc(output); T2* mask_data = context.template Alloc(mask); + float alpha_height = 0, alpha_width = 0; + float u_height = 0, u_width = 0; + if (fractional) { + float u = 0; + if (random_u == 0) { + std::uniform_real_distribution dist(0, 1); + auto engine = phi::GetCPURandomEngine(0); + u = dist(*engine); + } else { + u = random_u; + } + + alpha_height = static_cast(input_height) / output_height; + alpha_width = static_cast(input_width) / output_width; + + u_height = + FractionalRationalU(u, alpha_height, input_height, output_height); + u_width = FractionalRationalU(u, alpha_width, input_width, output_width); + } + int hstart = 0, hend = 0; int wstart = 0, wend = 0; for (int i = 0; i < batch_size; i++) { @@ -1600,6 +1622,11 @@ class MaxPool2dWithIndexFunctor { if (adaptive) { hstart = AdaptStartIndex(ph, input_height, output_height); hend = AdaptEndIndex(ph, input_height, output_height); + } else if (fractional) { + hstart = FractionalStartIndex(ph, alpha_height, u_height); + hend = FractionalEndIndex(ph, alpha_height, u_height); + hstart = std::max(hstart, 0); + hend = std::min(hend, input_height); } else { hstart = ph * stride_height - padding_height; hend = std::min(hstart + ksize_height, input_height); @@ -1609,6 +1636,11 @@ class MaxPool2dWithIndexFunctor { if (adaptive) { wstart = AdaptStartIndex(pw, input_width, output_width); wend = AdaptEndIndex(pw, input_width, output_width); + } else if (fractional) { + wstart = FractionalStartIndex(pw, alpha_width, u_width); + wend = FractionalEndIndex(pw, alpha_width, u_width); + wstart = std::max(wstart, 0); + wend = std::min(wend, input_width); } else { wstart = pw * stride_width - padding_width; wend = std::min(wstart + ksize_width, input_width); @@ -1653,6 +1685,8 @@ class MaxPool2dWithIndexGradFunctor { const std::vector& strides UNUSED, const std::vector& paddings UNUSED, bool adaptive UNUSED, + bool fractional UNUSED, + float random_u UNUSED, DenseTensor* input_grad) { const int batch_size = static_cast(input_grad->dims()[0]); const int input_height = static_cast(input_grad->dims()[2]); @@ -1704,6 +1738,8 @@ class MaxPool3dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask) { const int batch_size = static_cast(input.dims()[0]); @@ -1730,6 +1766,28 @@ class MaxPool3dWithIndexFunctor { T1* output_data = context.template Alloc(output); T2* mask_data = context.template Alloc(mask); + float alpha_height = 0, alpha_width = 0, alpha_depth = 0; + float u_height = 0, u_width = 0, u_depth = 0; + if (fractional) { + float u = 0; + if (random_u == 0) { + std::uniform_real_distribution dist(0, 1); + auto engine = phi::GetCPURandomEngine(0); + u = dist(*engine); + } else { + u = random_u; + } + + alpha_depth = static_cast(input_depth) / output_depth; + alpha_height = static_cast(input_height) / output_height; + alpha_width = static_cast(input_width) / output_width; + + u_depth = FractionalRationalU(u, alpha_depth, input_depth, output_depth); + u_height = + FractionalRationalU(u, alpha_height, input_height, output_height); + u_width = FractionalRationalU(u, alpha_width, input_width, output_width); + } + int dstart = 0, dend = 0; int hstart = 0, hend = 0; int wstart = 0, wend = 0; @@ -1739,6 +1797,11 @@ class MaxPool3dWithIndexFunctor { if (adaptive) { dstart = AdaptStartIndex(pd, input_depth, output_depth); dend = AdaptEndIndex(pd, input_depth, output_depth); + } else if (fractional) { + dstart = FractionalStartIndex(pd, alpha_depth, u_depth); + dend = FractionalEndIndex(pd, alpha_depth, u_depth); + dstart = std::max(dstart, 0); + dend = std::min(dend, input_depth); } else { dstart = pd * stride_depth - padding_depth; dend = std::min(dstart + ksize_depth, input_depth); @@ -1748,6 +1811,11 @@ class MaxPool3dWithIndexFunctor { if (adaptive) { hstart = AdaptStartIndex(ph, input_height, output_height); hend = AdaptEndIndex(ph, input_height, output_height); + } else if (fractional) { + hstart = FractionalStartIndex(ph, alpha_height, u_height); + hend = FractionalEndIndex(ph, alpha_height, u_height); + hstart = std::max(hstart, 0); + hend = std::min(hend, input_height); } else { hstart = ph * stride_height - padding_height; hend = std::min(hstart + ksize_height, input_height); @@ -1757,6 +1825,11 @@ class MaxPool3dWithIndexFunctor { if (adaptive) { wstart = AdaptStartIndex(pw, input_width, output_width); wend = AdaptEndIndex(pw, input_width, output_width); + } else if (fractional) { + wstart = FractionalStartIndex(pw, alpha_width, u_width); + wend = FractionalEndIndex(pw, alpha_width, u_width); + wstart = std::max(wstart, 0); + wend = std::min(wend, input_width); } else { wstart = pw * stride_width - padding_width; wend = std::min(wstart + ksize_width, input_width); @@ -1806,6 +1879,8 @@ class MaxPool3dWithIndexGradFunctor { const std::vector& strides UNUSED, const std::vector& paddings UNUSED, bool adaptive UNUSED, + bool fractional UNUSED, + float random_u UNUSED, DenseTensor* input_grad) { const int batch_size = static_cast(input_grad->dims()[0]); const int input_depth = static_cast(input_grad->dims()[2]); diff --git a/paddle/phi/kernels/funcs/pooling.cu b/paddle/phi/kernels/funcs/pooling.cu index 2f89b51815e64f..b50de21893c2f7 100644 --- a/paddle/phi/kernels/funcs/pooling.cu +++ b/paddle/phi/kernels/funcs/pooling.cu @@ -14,6 +14,12 @@ limitations under the License. */ #include #include +#ifdef __NVCC__ +#include +#endif +#ifdef __HIPCC__ +#include +#endif #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" @@ -21,6 +27,9 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/reduce_function.h" #include "paddle/phi/kernels/primitive/datamover_primitives.h" +#include "paddle/phi/kernels/funcs/distribution_helper.h" +#include "paddle/phi/kernels/funcs/random.cuh" + namespace phi { namespace funcs { @@ -1927,9 +1936,42 @@ __global__ void KernelMaxPool2dWithIdx(const int nthreads, const int padding_height, const int padding_width, bool adaptive, + bool fractional, + float random_u, + uint64_t seed, + uint64_t offset, T1* output_data, T2* mask_data, FastDivModForPooling divmods) { + float alpha_height = 0, alpha_width = 0, alpha_depth = 0; + float u_height = 0, u_width = 0, u_depth = 0; + if (fractional) { + float u = 0; + if (random_u == 0) { + size_t thread_idx = + static_cast(blockIdx.x * blockDim.x + threadIdx.x); +#if defined(__NVCC__) + curandStatePhilox4_32_10_t state; + curand_init(seed, thread_idx, offset, &state); +#else + hiprandStatePhilox4_32_10_t state; + hiprand_init(seed, thread_idx, offset, &state); +#endif + phi::funcs::uniform_distribution dist; + float4 rand = dist(&state); + u = (&rand.x)[0]; + } else { + u = random_u; + } + + alpha_height = static_cast(input_height) / output_height; + alpha_width = static_cast(input_width) / output_width; + + u_height = + FractionalRationalU(u, alpha_height, input_height, output_height); + u_width = FractionalRationalU(u, alpha_width, input_width, output_width); + } + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int hstart, hend, wstart, wend; @@ -1953,6 +1995,16 @@ __global__ void KernelMaxPool2dWithIdx(const int nthreads, wstart = AdaptStartIndex(w_offset, input_width, output_width); wend = AdaptEndIndex(w_offset, input_width, output_width); + } else if (fractional) { + hstart = FractionalStartIndex(h_offset, alpha_height, u_height); + hend = FractionalEndIndex(h_offset, alpha_height, u_height); + hstart = std::max(hstart, 0); + hend = std::min(hend, input_height); + + wstart = FractionalStartIndex(w_offset, alpha_width, u_width); + wend = FractionalEndIndex(w_offset, alpha_width, u_width); + wstart = std::max(wstart, 0); + wend = std::min(wend, input_width); } else { hstart = h_offset * stride_height - padding_height; hend = min(hstart + ksize_height, input_height); @@ -2048,8 +2100,41 @@ __global__ void KernelMaxPool2DWithIdxGrad(const int nthreads, const int padding_height, const int padding_width, bool adaptive, + bool fractional, + float random_u, + uint64_t seed, + uint64_t offset, T1* input_grad, FastDivModForPooling divmods) { + float alpha_height = 0, alpha_width = 0, alpha_depth = 0; + float u_height = 0, u_width = 0, u_depth = 0; + if (fractional) { + float u = 0; + if (random_u == 0) { + size_t thread_idx = + static_cast(blockIdx.x * blockDim.x + threadIdx.x); +#if defined(__NVCC__) + curandStatePhilox4_32_10_t state; + curand_init(seed, thread_idx, offset, &state); +#else + hiprandStatePhilox4_32_10_t state; + hiprand_init(seed, thread_idx, offset, &state); +#endif + phi::funcs::uniform_distribution dist; + float4 rand = dist(&state); + u = (&rand.x)[0]; + } else { + u = random_u; + } + + alpha_height = static_cast(input_height) / output_height; + alpha_width = static_cast(input_width) / output_width; + + u_height = + FractionalRationalU(u, alpha_height, input_height, output_height); + u_width = FractionalRationalU(u, alpha_width, input_width, output_width); + } + for (int index = blockIdx.x * blockDim.x + threadIdx.x; index < nthreads; index += blockDim.x * gridDim.x) { int phstart, phend, pwstart, pwend; @@ -2075,6 +2160,16 @@ __global__ void KernelMaxPool2DWithIdxGrad(const int nthreads, pwstart = w_offset * output_width / input_width; pwend = min((w_offset + 1) * output_width / input_width + 1, output_width); + } else if (fractional) { + phstart = FractionalStartIndex(h_offset, alpha_height, u_height); + phend = FractionalEndIndex(h_offset, alpha_height, u_height); + phstart = std::max(phstart, 0); + phend = std::min(phend, input_height); + + pwstart = FractionalStartIndex(w_offset, alpha_width, u_width); + pwend = FractionalEndIndex(w_offset, alpha_width, u_width); + pwstart = std::max(pwstart, 0); + pwend = std::min(pwend, input_width); } else { phstart = (h_offset + padding_height < ksize_height) @@ -2115,6 +2210,8 @@ class MaxPool2dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask) { const int batch_size = input.dims()[0]; @@ -2172,6 +2269,18 @@ class MaxPool2dWithIndexFunctor { int blocks = (nthreads + thread_num - 1) / thread_num; dim3 threads(thread_num, 1); dim3 grid(blocks, 1); + + uint64_t seed = 0; + uint64_t offset = 0; + if (fractional) { + // generate seed for fractional pool + auto gen_cuda = context.GetGenerator(); + constexpr int increment_offset = 1 * 4; // one seed with multiple of 4 + auto seed_offset = gen_cuda->IncrementOffset(increment_offset); + seed = seed_offset.first; + offset = seed_offset.second; + } + KernelMaxPool2dWithIdx <<>>(nthreads, input_data, @@ -2187,6 +2296,10 @@ class MaxPool2dWithIndexFunctor { padding_height, padding_width, adaptive, + fractional, + random_u, + seed, + offset, output_data, mask_data, pool_divmods); @@ -2209,6 +2322,8 @@ class MaxPool2dWithIndexGradFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1]; @@ -2234,6 +2349,18 @@ class MaxPool2dWithIndexGradFunctor { auto pool_divmods = FastDivModForPooling(input_channels, input_width, input_height); + + uint64_t seed = 0; + uint64_t offset = 0; + if (fractional) { + // generate seed for fractional pool + auto gen_cuda = context.GetGenerator(); + constexpr int increment_offset = 1 * 4; // one seed with multiple of 4 + auto seed_offset = gen_cuda->IncrementOffset(increment_offset); + seed = seed_offset.first; + offset = seed_offset.second; + } + KernelMaxPool2DWithIdxGrad <<>>(nthreads, output_grad_data, @@ -2250,6 +2377,10 @@ class MaxPool2dWithIndexGradFunctor { padding_height, padding_width, adaptive, + fractional, + random_u, + seed, + offset, input_grad_data, pool_divmods); } @@ -2288,9 +2419,44 @@ __global__ void KernelMaxPool3DWithIdx(const int ncd, const int padding_height, const int padding_width, bool adaptive, + bool fractional, + float random_u, + uint64_t seed, + uint64_t offset, T1* output_data, T2* mask_data, FastDivModForPooling3D divmods_output) { + float alpha_height = 0, alpha_width = 0, alpha_depth = 0; + float u_height = 0, u_width = 0, u_depth = 0; + if (fractional) { + float u = 0; + if (random_u == 0) { + size_t thread_idx = + static_cast(blockIdx.x * blockDim.x + threadIdx.x); +#if defined(__NVCC__) + curandStatePhilox4_32_10_t state; + curand_init(seed, thread_idx, offset, &state); +#else + hiprandStatePhilox4_32_10_t state; + hiprand_init(seed, thread_idx, offset, &state); +#endif + phi::funcs::uniform_distribution dist; + float4 rand = dist(&state); + u = (&rand.x)[0]; + } else { + u = random_u; + } + + alpha_depth = static_cast(input_depth) / output_depth; + alpha_height = static_cast(input_height) / output_height; + alpha_width = static_cast(input_width) / output_width; + + u_depth = FractionalRationalU(u, alpha_depth, input_depth, output_depth); + u_height = + FractionalRationalU(u, alpha_height, input_height, output_height); + u_width = FractionalRationalU(u, alpha_width, input_width, output_width); + } + int w_offset, h_offset, d_offset, nc_offset; int dstart, dend, hstart, hend, wstart, wend; const T1* input_data_cur; @@ -2320,6 +2486,21 @@ __global__ void KernelMaxPool3DWithIdx(const int ncd, wstart = AdaptStartIndex(w_offset, input_width, output_width); wend = AdaptEndIndex(w_offset, input_width, output_width); + } else if (fractional) { + dstart = FractionalStartIndex(d_offset, alpha_depth, u_depth); + dend = FractionalEndIndex(d_offset, alpha_depth, u_depth); + dstart = std::max(dstart, 0); + dend = std::min(dend, input_depth); + + hstart = FractionalStartIndex(h_offset, alpha_height, u_height); + hend = FractionalEndIndex(h_offset, alpha_height, u_height); + hstart = std::max(hstart, 0); + hend = std::min(hend, input_height); + + wstart = FractionalStartIndex(w_offset, alpha_width, u_width); + wend = FractionalEndIndex(w_offset, alpha_width, u_width); + wstart = std::max(wstart, 0); + wend = std::min(wend, input_width); } else { dstart = d_offset * stride_depth - padding_depth; hstart = h_offset * stride_height - padding_height; @@ -2373,6 +2554,8 @@ __global__ void KernelMaxPool3DWithIdxGrad( const int padding_height, const int padding_width, bool adaptive, + bool fractional, + float random_u, T1* input_grad, FastDivModForPooling3D divmods_output) { int w_offset, h_offset, d_offset, nc_offset; @@ -2415,6 +2598,8 @@ class MaxPool3dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask) { const int batch_size = input.dims()[0]; @@ -2457,6 +2642,17 @@ class MaxPool3dWithIndexFunctor { auto pool_divmods_output = FastDivModForPooling3D( input_channels, output_width, output_height, output_depth); + uint64_t seed = 0; + uint64_t offset = 0; + if (fractional) { + // generate seed for fractional pool + auto gen_cuda = context.GetGenerator(); + constexpr int increment_offset = 1 * 4; // one seed with multiple of 4 + auto seed_offset = gen_cuda->IncrementOffset(increment_offset); + seed = seed_offset.first; + offset = seed_offset.second; + } + KernelMaxPool3DWithIdx <<>>(ncd, input_data, @@ -2477,6 +2673,10 @@ class MaxPool3dWithIndexFunctor { padding_height, padding_width, adaptive, + fractional, + random_u, + seed, + offset, output_data, mask_data, pool_divmods_output); @@ -2498,6 +2698,8 @@ class MaxPool3dWithIndexGradFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* input_grad) { const int batch_size = input_grad->dims()[0]; const int input_channels = input_grad->dims()[1]; @@ -2559,6 +2761,8 @@ class MaxPool3dWithIndexGradFunctor { padding_height, padding_width, adaptive, + fractional, + random_u, input_grad_data, pool_divmods_output); } diff --git a/paddle/phi/kernels/funcs/pooling.h b/paddle/phi/kernels/funcs/pooling.h index 1ffd747735543c..420ae9b4c862fc 100644 --- a/paddle/phi/kernels/funcs/pooling.h +++ b/paddle/phi/kernels/funcs/pooling.h @@ -100,6 +100,31 @@ HOSTDEVICE inline int AdaptEndIndex(int ph, int input_size, int output_size) { ceil(static_cast((ph + 1) * input_size) / output_size)); } +/* used for fractional pool to calculate start and end index of each divided + * grid + */ +HOSTDEVICE inline float FractionalRationalU(float u, + float alpha, + int input, + int output) { + int base = input / output; + + float u_max1 = static_cast(base + 2) / alpha - 1; + float u_max2 = static_cast(input + 1 - base) / alpha - + static_cast(output - 1); + float max_u = std::min(u_max1, u_max2); + + return u * max_u; +} + +HOSTDEVICE inline int FractionalStartIndex(int idx, float alpha, float u) { + return static_cast(ceil(alpha * (idx + u) - 1)); +} + +HOSTDEVICE inline int FractionalEndIndex(int idx, float alpha, float u) { + return static_cast(ceil(alpha * (idx + 1 + u) - 1)); +} + /* * \brief Getting pooling results, and calculating gradient. * @@ -322,6 +347,8 @@ class MaxPool2dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask); }; @@ -336,6 +363,8 @@ class MaxPool2dWithIndexGradFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* input_grad); }; @@ -348,6 +377,8 @@ class MaxPool3dWithIndexFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* output, DenseTensor* mask); }; @@ -362,6 +393,8 @@ class MaxPool3dWithIndexGradFunctor { const std::vector& strides, const std::vector& paddings, bool adaptive, + bool fractional, + float random_u, DenseTensor* input_grad); }; diff --git a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h index cf00a9b82b8dd8..cc156351743cf1 100644 --- a/paddle/phi/kernels/impl/pool_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_grad_kernel_impl.h @@ -150,6 +150,8 @@ void MaxPoolWithIndexGradRawKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* dx) { std::vector paddings_ = paddings; std::vector kernel_size_ = kernel_size; @@ -168,13 +170,29 @@ void MaxPoolWithIndexGradRawKernel(const Context& ctx, switch (kernel_size_.size()) { case 2: { funcs::MaxPool2dWithIndexGradFunctor pool2d_backward; - pool2d_backward( - ctx, dout, mask, kernel_size_, strides, paddings_, adaptive, dx); + pool2d_backward(ctx, + dout, + mask, + kernel_size_, + strides, + paddings_, + adaptive, + fractional, + random_u, + dx); } break; case 3: { funcs::MaxPool3dWithIndexGradFunctor pool3d_backward; - pool3d_backward( - ctx, dout, mask, kernel_size_, strides, paddings_, adaptive, dx); + pool3d_backward(ctx, + dout, + mask, + kernel_size_, + strides, + paddings_, + adaptive, + fractional, + random_u, + dx); } break; default: { PADDLE_THROW( @@ -262,6 +280,8 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* dx) { MaxPoolWithIndexGradRawKernel(ctx, x, @@ -272,6 +292,8 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx, paddings, global_pooling, adaptive, + fractional, + random_u, dx); } @@ -317,6 +339,8 @@ void MaxPool3dWithIndexGradKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* dx) { MaxPoolWithIndexGradRawKernel(ctx, x, @@ -327,6 +351,8 @@ void MaxPool3dWithIndexGradKernel(const Context& ctx, paddings, global_pooling, adaptive, + fractional, + random_u, dx); } diff --git a/paddle/phi/kernels/impl/pool_kernel_impl.h b/paddle/phi/kernels/impl/pool_kernel_impl.h index dc0b7ad2108ac5..d7f998b3420d16 100644 --- a/paddle/phi/kernels/impl/pool_kernel_impl.h +++ b/paddle/phi/kernels/impl/pool_kernel_impl.h @@ -192,6 +192,8 @@ void MaxPoolWithIndexRawKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask) { std::vector paddings_ = paddings; @@ -207,13 +209,29 @@ void MaxPoolWithIndexRawKernel(const Context& ctx, switch (kernel_size_.size()) { case 2: { funcs::MaxPool2dWithIndexFunctor pool2d_forward; - pool2d_forward( - ctx, x, kernel_size_, strides, paddings_, adaptive, out, mask); + pool2d_forward(ctx, + x, + kernel_size_, + strides, + paddings_, + adaptive, + fractional, + random_u, + out, + mask); } break; case 3: { funcs::MaxPool3dWithIndexFunctor pool3d_forward; - pool3d_forward( - ctx, x, kernel_size_, strides, paddings_, adaptive, out, mask); + pool3d_forward(ctx, + x, + kernel_size_, + strides, + paddings_, + adaptive, + fractional, + random_u, + out, + mask); } break; default: { PADDLE_THROW( @@ -260,6 +278,8 @@ void MaxPool2dWithIndexKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask) { MaxPoolWithIndexRawKernel(ctx, @@ -269,6 +289,8 @@ void MaxPool2dWithIndexKernel(const Context& ctx, paddings, global_pooling, adaptive, + fractional, + random_u, out, mask); } @@ -309,6 +331,8 @@ void MaxPool3dWithIndexKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask) { MaxPoolWithIndexRawKernel(ctx, @@ -318,6 +342,8 @@ void MaxPool3dWithIndexKernel(const Context& ctx, paddings, global_pooling, adaptive, + fractional, + random_u, out, mask); } diff --git a/paddle/phi/kernels/pool_grad_kernel.h b/paddle/phi/kernels/pool_grad_kernel.h index 64ad99a6d3eae1..96349d663c0c62 100644 --- a/paddle/phi/kernels/pool_grad_kernel.h +++ b/paddle/phi/kernels/pool_grad_kernel.h @@ -96,6 +96,8 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fracional, + float random_u, DenseTensor* dx); template @@ -142,6 +144,8 @@ void MaxPool3dWithIndexGradKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fracional, + float random_u, DenseTensor* dx); } // namespace phi diff --git a/paddle/phi/kernels/pool_kernel.h b/paddle/phi/kernels/pool_kernel.h index c1a7dd471a02f7..8363890eaeff20 100644 --- a/paddle/phi/kernels/pool_kernel.h +++ b/paddle/phi/kernels/pool_kernel.h @@ -60,6 +60,8 @@ void MaxPool2dWithIndexKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask); @@ -101,6 +103,8 @@ void MaxPool3dWithIndexKernel(const Context& ctx, const std::vector& paddings, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask); diff --git a/paddle/phi/kernels/xpu/pool_grad_kernel.cc b/paddle/phi/kernels/xpu/pool_grad_kernel.cc index b03be5dd9449cb..531079ff1d12d2 100644 --- a/paddle/phi/kernels/xpu/pool_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/pool_grad_kernel.cc @@ -386,6 +386,8 @@ void MaxPool2dWithIndexGradKernel(const Context& ctx, const std::vector& paddings_t, bool global_pooling, bool adaptive, + bool fracional, + float random_u, DenseTensor* dx) { using XPUType = typename XPUTypeTrait::Type; diff --git a/paddle/phi/kernels/xpu/pool_kernel.cc b/paddle/phi/kernels/xpu/pool_kernel.cc index 466adade072c7a..c6076e20b35daf 100644 --- a/paddle/phi/kernels/xpu/pool_kernel.cc +++ b/paddle/phi/kernels/xpu/pool_kernel.cc @@ -308,6 +308,8 @@ void MaxPool2dWithIndexKernel(const Context& ctx, const std::vector& paddings_t, bool global_pooling, bool adaptive, + bool fractional, + float random_u, DenseTensor* out, DenseTensor* mask) { using XPUType = typename XPUTypeTrait::Type; diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index f96b5e9b8c42bb..1956190161a18e 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -129,6 +129,8 @@ AdaptiveMaxPool1D, AdaptiveMaxPool2D, AdaptiveMaxPool3D, + FractionalMaxPool2D, + FractionalMaxPool3D, AvgPool1D, AvgPool2D, AvgPool3D, @@ -215,6 +217,8 @@ 'SmoothL1Loss', 'MaxPool3D', 'AdaptiveMaxPool2D', + 'FractionalMaxPool2D', + 'FractionalMaxPool3D', 'Hardshrink', 'Softplus', 'KLDivLoss', diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index d9b9e56210842a..9ad36dbe3ad560 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -134,6 +134,8 @@ adaptive_max_pool1d, adaptive_max_pool2d, adaptive_max_pool3d, + fractional_max_pool2d, + fractional_max_pool3d, avg_pool1d, avg_pool2d, avg_pool3d, @@ -225,6 +227,8 @@ 'adaptive_max_pool1d', 'adaptive_max_pool2d', 'adaptive_max_pool3d', + 'fractional_max_pool2d', + 'fractional_max_pool3d', 'binary_cross_entropy', 'binary_cross_entropy_with_logits', 'cross_entropy', diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index e43daa332aea3a..caa29048040b1d 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -632,7 +632,7 @@ def max_pool1d( if in_dynamic_or_pir_mode(): if return_mask: pool_out = _C_ops.max_pool2d_with_index( - x, kernel_size, stride, padding, False, False + x, kernel_size, stride, padding, False, False, False, 0.0 ) return ( (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2])) @@ -1259,7 +1259,7 @@ def max_pool2d( if in_dynamic_or_pir_mode(): if return_mask: output = _C_ops.max_pool2d_with_index( - x, kernel_size, stride, padding, False, False + x, kernel_size, stride, padding, False, False, False, 0.0 ) return output if return_mask else output[0] else: @@ -1426,7 +1426,7 @@ def max_pool3d( if in_dynamic_or_pir_mode(): if return_mask: output = _C_ops.max_pool3d_with_index( - x, kernel_size, stride, padding, False, False + x, kernel_size, stride, padding, False, False, False, 0.0 ) return output if return_mask else output[0] else: @@ -1877,7 +1877,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None): x = unsqueeze(x, [2]) if in_dygraph_mode(): pool_out = _C_ops.max_pool2d_with_index( - x, pool_size, [1, 1], [0, 0], False, True + x, pool_size, [1, 1], [0, 0], False, True, False, 0.0 ) return ( (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2])) @@ -1971,7 +1971,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None): output_size[1] = in_w if in_dygraph_mode(): pool_out = _C_ops.max_pool2d_with_index( - x, output_size, [1, 1], [0, 0], False, True + x, output_size, [1, 1], [0, 0], False, True, False, 0.0 ) return pool_out if return_mask else pool_out[0] else: @@ -2063,7 +2063,7 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): if in_dygraph_mode(): # By default, strides is [1,1,1] and paddings is [0, 0, 0] pool_out = _C_ops.max_pool3d_with_index( - x, output_size, [1, 1, 1], [0, 0, 0], False, True + x, output_size, [1, 1, 1], [0, 0, 0], False, True, False, 0.0 ) return pool_out if return_mask else pool_out[0] else: @@ -2094,3 +2094,275 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None): ) return (pool_out, mask) if return_mask else pool_out + + +def fractional_max_pool2d( + x, output_size, random_u=None, return_mask=False, name=None +): + """ + This operation applies 2D fractional max pooling on input tensor, which is described in the paper: + + [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071 + + The h and w dimensions of the output tensor are determined by the parameter output_size. + + For each dimension, the fractional max pooling: + + .. math:: + + alpha &= size_{input} / size_{output} + + index_{start} &= ceil( \alpha * (i + u) - 1) + + index_{end} &= ceil( \alpha * (i + 1 + u) - 1) + + Output &= max(Input[index_{start}:index_{end}]) + + where, u in range (0, 1), i = 0,1,2...size_{output} + + The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0`` + instead of ``1`` where ``ceil`` works. + + For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``. + The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the + random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to + the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``. + + Parameters: + x (Tensor): The input tensor of fractional max pool2d operator, which is a 4-D tensor. The data type can be float32, float64. + output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain + two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of + the input. + random_u(float): A random float number in range (0, 1) for the fractional pooling. + Default None, means randomly generated by framework which can be fixed by ``paddle.seed``. + return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Returns: + Tensor: The output tensor of fractional max pool2d result which is a 4-D tensor.. The data type is same as input tensor. + + Examples: + .. code-block:: python + + >>> # fractional max pool2d + >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + >>> # output shape is [N, C, m, n], fractional pool divide H and W dimensions + >>> # of input data into m * n grids and performs poolings in each + >>> # grid to get output. + + >>> import paddle + + >>> x = paddle.rand([2, 3, 32, 32]) + + >>> pool_out = paddle.nn.functional.fractional_max_pool2d(x, output_size=3) + >>> print(pool_out.shape) + [2, 3, 3, 3] + + >>> pool_out, indices = paddle.nn.functional.fractional_max_pool2d(x, output_size=[2, 3], return_mask=True) + >>> print(pool_out.shape) + [2, 3, 2, 3] + >>> print(indices.shape) + [2, 3, 2, 3] + """ + _check_input(x, 4) + + if random_u is None: + random_u = 0.0 + else: + if random_u <= 0 or random_u >= 1: + raise ValueError( + "The param `random_u` should be a `float` in (0, 1)." + ) + + in_h, in_w = x.shape[2:4] + if isinstance(output_size, int): + output_size = convert_to_list(output_size, 2, 'output_size') + else: + output_size = list(output_size) + if output_size[0] is None: + output_size[0] = in_h + if output_size[1] is None: + output_size[1] = in_w + + if in_dygraph_mode(): + pool_out = _C_ops.max_pool2d_with_index( + x, output_size, [1, 1], [0, 0], False, False, True, float(random_u) + ) + return pool_out if return_mask else pool_out[0] + else: + l_type = 'max_pool2d_with_index' + + check_variable_and_dtype( + x, 'x', ['float32', 'float64'], 'fractional_max_pool2d' + ) + check_type(return_mask, 'return_mask', bool, 'fractional_max_pool2d') + + check_type( + random_u, + 'random_u', + float, + 'fractional_max_pool2d', + ) + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype(input_param_name='x') + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference('int32') + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "fractional": True, + "random_u": float(random_u), + }, + ) + + return (pool_out, mask) if return_mask else pool_out + + +def fractional_max_pool3d( + x, output_size, random_u=None, return_mask=False, name=None +): + """ + This operation applies 3D fractional max pooling on input tensor, which is described in the paper: + + [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071 + + The d, h and w dimensions of the output tensor are determined by the parameter output_size. + + For each dimension, the fractional max pooling: + + .. math:: + + alpha &= size_{input} / size_{output} + + index_{start} &= ceil( \alpha * (i + u) - 1) + + index_{end} &= ceil( \alpha * (i + 1 + u) - 1) + + Output &= max(Input[index_{start}:index_{end}]) + + where, u in range (0, 1), i = 0,1,2...size_{output} + + The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0`` + instead of ``1`` where ``ceil`` works. + + For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``. + The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the + random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to + the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``. + + Parameters: + x (Tensor): The input tensor of fractional max pool3d operator, which is a 5-D tensor. The data type can be float32, float64. + output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain + three element, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of + the input. + random_u(float): A random float number in range (0, 1) for the fractional pooling. + Default None, means randomly generated by framework which can be fixed by ``paddle.seed``. + return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Returns: + Tensor: The output tensor of fractional max pool3d result which is a 5-D tensor.. The data type is same as input tensor. + + Examples: + .. code-block:: python + + >>> # fractional max pool3d + >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + >>> # output shape is [N, C, l, m, n], fractional pool divide D, H and W dimensions + >>> # of input data into l * m * n grids and performs poolings in each + >>> # grid to get output. + + >>> import paddle + + >>> x = paddle.rand([2, 3, 8, 32, 32]) + + >>> pool_out = paddle.nn.functional.fractional_max_pool3d(x, output_size=3) + >>> print(pool_out.shape) + [2, 3, 3, 3, 3] + + >>> pool_out, indices = paddle.nn.functional.fractional_max_pool3d(x, output_size=[2, 3, 3], return_mask=True) + >>> print(pool_out.shape) + [2, 3, 2, 3, 3] + >>> print(indices.shape) + [2, 3, 2, 3, 3] + """ + _check_input(x, 5) + + if random_u is None: + random_u = 0.0 + else: + if random_u <= 0 or random_u >= 1: + raise ValueError( + "The param `random_u` should be a `float` in (0, 1)." + ) + + in_l, in_h, in_w = x.shape[2:5] + if isinstance(output_size, int): + output_size = convert_to_list(output_size, 3, 'output_size') + else: + output_size = list(output_size) + if output_size[0] is None: + output_size[0] = in_l + if output_size[1] is None: + output_size[1] = in_h + if output_size[2] is None: + output_size[2] = in_w + + if in_dygraph_mode(): + # By default, strides is [1,1,1] and paddings is [0, 0, 0] + pool_out = _C_ops.max_pool3d_with_index( + x, + output_size, + [1, 1, 1], + [0, 0, 0], + False, + False, + True, + float(random_u), + ) + return pool_out if return_mask else pool_out[0] + else: + l_type = 'max_pool3d_with_index' + + check_variable_and_dtype( + x, 'x', ['float32', 'float64'], 'fractional_max_pool3d' + ) + check_type(return_mask, 'return_mask', bool, 'fractional_max_pool3d') + + check_type( + random_u, + 'random_u', + float, + 'fractional_max_pool3d', + ) + + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype(input_param_name='x') + pool_out = helper.create_variable_for_type_inference(dtype) + + mask = helper.create_variable_for_type_inference('int32') + outputs = {"Out": pool_out, "Mask": mask} + + helper.append_op( + type=l_type, + inputs={"X": x}, + outputs=outputs, + attrs={ + "pooling_type": 'max', + "ksize": output_size, + "fractional": True, + "random_u": float(random_u), + }, + ) + + return (pool_out, mask) if return_mask else pool_out diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index 9271c5ecc10e11..6eda37c48ba5b0 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -97,6 +97,8 @@ AdaptiveMaxPool1D, AdaptiveMaxPool2D, AdaptiveMaxPool3D, + FractionalMaxPool2D, + FractionalMaxPool3D, AvgPool1D, AvgPool2D, AvgPool3D, diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 3108aeebeded4d..ebb98c567a461b 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -1141,6 +1141,204 @@ def extra_repr(self): ) +class FractionalMaxPool2D(Layer): + """ + This operation applies 2D fractional max pooling on input tensor, which is described in the paper: + + [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071 + + The h and w dimensions of the output tensor are determined by the parameter output_size. + + For each dimension, the fractional max pooling: + + .. math:: + + alpha &= size_{input} / size_{output} + + index_{start} &= ceil( \alpha * (i + u) - 1) + + index_{end} &= ceil( \alpha * (i + 1 + u) - 1) + + Output &= max(Input[index_{start}:index_{end}]) + + where, u in range (0, 1), i = 0,1,2...size_{output} + + The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0`` + instead of ``1`` where ``ceil`` works. + + For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``. + The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the + random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to + the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``. + + Parameters: + output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain + two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of + the input. + random_u(float): A random float number in range (0, 1) for the fractional pooling. + Default None, means randomly generated by framework which can be fixed by ``paddle.seed``. + return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Shape: + - x(Tensor): The input tensor of fractional max pool2d operator, which is a 4-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of fractional max pool2d operator, which is a 4-D tensor. + The data type is same as input x. + + Returns: + A callable object of FractionalMaxPool2D. + + Examples: + .. code-block:: python + + >>> # fractional max pool2d + >>> # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], + >>> # output shape is [N, C, m, n], fractional pool divide H and W dimensions + >>> # of input data into m * n grids and performs poolings in each + >>> # grid to get output. + + >>> import paddle + + >>> x = paddle.rand([2, 3, 32, 32]) + + >>> fractional_max_pool = paddle.nn.FractionalMaxPool2D(output_size=3) + >>> pool_out = fractional_max_pool(x=x) + >>> print(pool_out.shape) + [2, 3, 3, 3] + + >>> fractional_max_pool = paddle.nn.FractionalMaxPool2D(output_size=[2, 3], return_mask=True) + >>> pool_out, indices = fractional_max_pool(x=x) + >>> print(pool_out.shape) + [2, 3, 2, 3] + >>> print(indices.shape) + [2, 3, 2, 3] + """ + + def __init__( + self, output_size, random_u=None, return_mask=False, name=None + ): + super().__init__() + self._output_size = output_size + self._random_u = random_u + self._return_mask = return_mask + self._name = name + + def forward(self, x): + return F.fractional_max_pool2d( + x, + output_size=self._output_size, + random_u=self._random_u, + return_mask=self._return_mask, + name=self._name, + ) + + def extra_repr(self): + return ( + f'output_size={self._output_size}, return_mask={self._return_mask}' + ) + + +class FractionalMaxPool3D(Layer): + """ + This operation applies 3D fractional max pooling on input tensor, which is described in the paper: + + [1] Ben Graham, Fractional Max-Pooling. 2015. http://arxiv.org/abs/1412.6071 + + The d, h and w dimensions of the output tensor are determined by the parameter output_size. + + For each dimension, the fractional max pooling: + + .. math:: + + alpha &= size_{input} / size_{output} + + index_{start} &= ceil( \alpha * (i + u) - 1) + + index_{end} &= ceil( \alpha * (i + 1 + u) - 1) + + Output &= max(Input[index_{start}:index_{end}]) + + where, u in range (0, 1), i = 0,1,2...size_{output} + + The ``u`` from the formula is the parameter ``random_u``, and subtract ``1`` for the index starts from ``0`` + instead of ``1`` where ``ceil`` works. + + For instance, giving a sequence of length ``7`` is ``[2, 4, 3, 1, 5, 2, 3]``, ``output_size`` is ``5`` and ``random_u`` is ``0.3``. + The ``alpha = 7/5 = 1.4``, the starts of index is ``[0, 1, 3, 4, 6]``, the ends of index is ``[1, 3, 4, 6, 7]`` and makes the + random sequence in the paper is ``index_end - index_start = [1, 2, 1, 2, 1]``. The strides and kernel_sizes are both equal to + the random sequence, giving the final pooling output is ``[2, 4, 1, 5, 3]``. + + Parameters: + output_size(int|list|tuple): The output size. If output size is a tuple or list, it must contain + three element, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of + the input. + random_u(float): A random float number in range (0, 1) for the fractional pooling. + Default None, means randomly generated by framework which can be fixed by ``paddle.seed``. + return_mask(bool, optional): If true, the index of max pooling point will be returned along with outputs. Default False. + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Shape: + - x(Tensor): The input tensor of fractional max pool3d operator, which is a 5-D tensor. + The data type can be float32, float64. + - output(Tensor): The output tensor of fractional max pool3d operator, which is a 5-D tensor. + The data type is same as input x. + + Returns: + A callable object of FractionalMaxPool3D. + + Examples: + .. code-block:: python + + >>> # fractional max pool3d + >>> # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], + >>> # output shape is [N, C, l, m, n], fractional pool divide D, H and W dimensions + >>> # of input data into l * m * n grids and performs poolings in each + >>> # grid to get output. + + >>> import paddle + + >>> x = paddle.rand([2, 3, 8, 32, 32]) + + >>> fractional_max_pool = paddle.nn.FractionalMaxPool3D(output_size=3) + >>> pool_out = fractional_max_pool(x=x) + >>> print(pool_out.shape) + [2, 3, 3, 3, 3] + + >>> fractional_max_pool = paddle.nn.FractionalMaxPool3D(output_size=[2, 3, 3], return_mask=True) + >>> pool_out, indices = fractional_max_pool(x=x) + >>> print(pool_out.shape) + [2, 3, 2, 3, 3] + >>> print(indices.shape) + [2, 3, 2, 3, 3] + """ + + def __init__( + self, output_size, random_u=None, return_mask=False, name=None + ): + super().__init__() + self._output_size = output_size + self._random_u = random_u + self._return_mask = return_mask + self._name = name + + def forward(self, x): + return F.fractional_max_pool3d( + x, + output_size=self._output_size, + random_u=self._random_u, + return_mask=self._return_mask, + name=self._name, + ) + + def extra_repr(self): + return ( + f'output_size={self._output_size}, return_mask={self._return_mask}' + ) + + class MaxUnPool1D(Layer): r""" This API implements max unpooling 1d opereation. diff --git a/test/legacy_test/test_fractional_max_pool2d.py b/test/legacy_test/test_fractional_max_pool2d.py new file mode 100644 index 00000000000000..fc79d78ad88d59 --- /dev/null +++ b/test/legacy_test/test_fractional_max_pool2d.py @@ -0,0 +1,413 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import check_out_dtype + +import paddle +import paddle.nn.functional as F +from paddle import base +from paddle.base import core + + +def fractional_rational_u(u, alpha, input, output): + base = input // output + + u_max1 = (base + 2) / alpha - 1 + u_max2 = (input + 1 - base) / alpha - (output - 1) + max_u = min(u_max1, u_max2) + + return u * max_u + + +def fractional_start_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + u) - 1)) + + +def fractional_end_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + 1 + u) - 1)) + + +def fractional_pool2d_forward( + x, + output_size, + random_u=None, + data_format='NCHW', + pool_type="max", +): + N = x.shape[0] + C, H, W = ( + [x.shape[1], x.shape[2], x.shape[3]] + if data_format == 'NCHW' + else [x.shape[3], x.shape[1], x.shape[2]] + ) + + if isinstance(output_size, int) or output_size is None: + H_out = output_size + W_out = output_size + output_size = [H_out, W_out] + else: + H_out, W_out = output_size + + if output_size[0] is None: + output_size[0] = H + H_out = H + if output_size[1] is None: + output_size[1] = W + W_out = W + + out = ( + np.zeros((N, C, H_out, W_out)) + if data_format == 'NCHW' + else np.zeros((N, H_out, W_out, C)) + ) + + u = random_u + + alpha_height = H / H_out + alpha_width = W / W_out + + u_height = fractional_rational_u(u, alpha_height, H, H_out) + u_width = fractional_rational_u(u, alpha_width, W, W_out) + + for i in range(H_out): + h_start = fractional_start_index(i, alpha_height, u_height) + h_end = fractional_end_index(i, alpha_height, u_height) + h_start = max(h_start, 0) + h_end = min(h_end, H) + + for j in range(W_out): + w_start = fractional_start_index(j, alpha_width, u_width) + w_end = fractional_end_index(j, alpha_width, u_width) + w_start = max(w_start, 0) + w_end = min(w_end, W) + + if data_format == 'NCHW': + x_masked = x[:, :, h_start:h_end, w_start:w_end] + if pool_type == 'avg': + field_size = (h_end - h_start) * (w_end - w_start) + out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size + elif pool_type == 'max': + out[:, :, i, j] = np.max(x_masked, axis=(2, 3)) + elif data_format == 'NHWC': + x_masked = x[:, h_start:h_end, w_start:w_end, :] + if pool_type == 'avg': + field_size = (h_end - h_start) * (w_end - w_start) + out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size + elif pool_type == 'max': + out[:, i, j, :] = np.max(x_masked, axis=(1, 2)) + return out + + +class TestFractionalMaxPool2DAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = fractional_pool2d_forward( + x=self.x_np, output_size=[3, 3], random_u=0.3 + ) + + self.res_2_np = fractional_pool2d_forward( + x=self.x_np, output_size=5, random_u=0.5 + ) + + self.res_3_np = fractional_pool2d_forward( + x=self.x_np, output_size=[2, 5], random_u=0.7 + ) + + # self.res_4_np = fractional_pool2d_forward( + # x=self.x_np, + # output_size=[3, 3], + # pool_type="max", + # data_format="NHWC", + # random_u=0.1) + + self.res_5_np = fractional_pool2d_forward( + x=self.x_np, output_size=[None, 3], random_u=0.6 + ) + + def test_static_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 7, 7], dtype="float32" + ) + + out_1 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[3, 3], random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=5, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[2, 5], random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool2d( + # x=x, output_size=[3, 3], data_format="NHWC", random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[None, 3], random_u=0.6 + ) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + np.testing.assert_allclose(res_1, self.res_1_np) + + np.testing.assert_allclose(res_2, self.res_2_np) + + np.testing.assert_allclose(res_3, self.res_3_np) + + # np.testing.assert_allclose(res_4, self.res_4_np) + + np.testing.assert_allclose(res_5, self.res_5_np) + + def test_static_graph_return_mask(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 7, 7], dtype="float32" + ) + + out_1 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[3, 3], return_mask=True, random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=5, return_mask=True, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[2, 5], return_mask=True, random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool2d( + # x=x, output_size=[3, 3], data_format="NHWC", return_mask=True, random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[None, 3], return_mask=True, random_u=0.6 + ) + + exe = paddle.static.Executor(place=place) + [ + res_1, + mask_1, + res_2, + mask_2, + res_3, + mask_3, + res_5, + mask_5, + ] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + self.assertEqual(res_1.shape, mask_1.shape) + + self.assertEqual(res_2.shape, mask_2.shape) + + self.assertEqual(res_3.shape, mask_3.shape) + + # self.assertEqual(res_4.shape, mask_4.shape) + + self.assertEqual(res_5.shape, mask_5.shape) + + def test_dynamic_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_tensor(self.x_np) + + out_1 = paddle.nn.functional.fractional_max_pool2d( + x=x, return_mask=False, output_size=[3, 3], random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=5, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[2, 5], random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool2d( + # x=x, output_size=[3, 3], data_format="NHWC", random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool2d( + x=x, output_size=[None, 3], random_u=0.6 + ) + + np.testing.assert_allclose(out_1.numpy(), self.res_1_np) + + np.testing.assert_allclose(out_2.numpy(), self.res_2_np) + + np.testing.assert_allclose(out_3.numpy(), self.res_3_np) + + # np.testing.assert_allclose(out_4.numpy(), self.res_4_np) + + np.testing.assert_allclose(out_5.numpy(), self.res_5_np) + + +class TestFractionalMaxPool2DClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 7, 7]).astype("float32") + self.res_1_np = fractional_pool2d_forward( + x=self.x_np, output_size=[3, 3], random_u=0.3 + ) + + self.res_2_np = fractional_pool2d_forward( + x=self.x_np, output_size=5, random_u=0.5 + ) + + self.res_3_np = fractional_pool2d_forward( + x=self.x_np, output_size=[2, 5], random_u=0.7 + ) + + # self.res_4_np = fractional_pool2d_forward( + # x=self.x_np, + # output_size=[3, 3], + # pool_type="max", + # data_format="NHWC", + # random_u=0.1) + + self.res_5_np = fractional_pool2d_forward( + x=self.x_np, output_size=[None, 3], random_u=0.6 + ) + + def test_static_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 7, 7], dtype="float32" + ) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[3, 3], random_u=0.3 + ) + out_1 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=5, random_u=0.5 + ) + out_2 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[2, 5], random_u=0.7 + ) + out_3 = fractional_max_pool(x=x) + + # fractional_max_pool = paddle.nn.FractionalMaxPool2D( + # output_size=[3, 3], data_format="NHWC", random_u=0.1) + # out_4 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[None, 3], random_u=0.6 + ) + out_5 = fractional_max_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + np.testing.assert_allclose(res_1, self.res_1_np) + + np.testing.assert_allclose(res_2, self.res_2_np) + + np.testing.assert_allclose(res_3, self.res_3_np) + + # np.testing.assert_allclose(res_4, self.res_4_np) + + np.testing.assert_allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_tensor(self.x_np) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[3, 3], random_u=0.3 + ) + out_1 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=5, random_u=0.5 + ) + out_2 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[2, 5], random_u=0.7 + ) + out_3 = fractional_max_pool(x=x) + + # fractional_max_pool = paddle.nn.FractionalMaxPool2D( + # output_size=[3, 3], data_format="NHWC", random_u=0.1) + # out_4 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool2D( + output_size=[None, 3], random_u=0.6 + ) + out_5 = fractional_max_pool(x=x) + + np.testing.assert_allclose(out_1.numpy(), self.res_1_np) + + np.testing.assert_allclose(out_2.numpy(), self.res_2_np) + + np.testing.assert_allclose(out_3.numpy(), self.res_3_np) + + # np.testing.assert_allclose(out_4.numpy(), self.res_4_np) + + np.testing.assert_allclose(out_5.numpy(), self.res_5_np) + + +class TestOutDtype(unittest.TestCase): + def test_max_pool(self): + api_fn = F.fractional_max_pool2d + shape = [1, 3, 32, 32] + check_out_dtype( + api_fn, + in_specs=[(shape,)], + expect_dtypes=['float32', 'float64'], + output_size=16, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_fractional_max_pool3d.py b/test/legacy_test/test_fractional_max_pool3d.py new file mode 100644 index 00000000000000..8c454282f9c32d --- /dev/null +++ b/test/legacy_test/test_fractional_max_pool3d.py @@ -0,0 +1,440 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import check_out_dtype + +import paddle +import paddle.nn.functional as F +from paddle import base +from paddle.base import core + + +def fractional_rational_u(u, alpha, input, output): + base = input // output + + u_max1 = (base + 2) / alpha - 1 + u_max2 = (input + 1 - base) / alpha - (output - 1) + max_u = min(u_max1, u_max2) + + return u * max_u + + +def fractional_start_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + u) - 1)) + + +def fractional_end_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + 1 + u) - 1)) + + +def fractional_pool3d_forward( + x, output_size, random_u=None, data_format='NCDHW', pool_type='max' +): + N = x.shape[0] + C, D, H, W = ( + [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] + if data_format == 'NCDHW' + else [x.shape[4], x.shape[1], x.shape[2], x.shape[3]] + ) + + if isinstance(output_size, int) or output_size is None: + H_out = output_size + W_out = output_size + D_out = output_size + output_size = [D_out, H_out, W_out] + else: + D_out, H_out, W_out = output_size + + if output_size[0] is None: + output_size[0] = D + D_out = D + if output_size[1] is None: + output_size[1] = H + H_out = H + if output_size[2] is None: + output_size[2] = W + W_out = W + + out = ( + np.zeros((N, C, D_out, H_out, W_out)) + if data_format == 'NCDHW' + else np.zeros((N, D_out, H_out, W_out, C)) + ) + + u = random_u + + alpha_depth = D / D_out + alpha_height = H / H_out + alpha_width = W / W_out + + u_depth = fractional_rational_u(u, alpha_depth, D, D_out) + u_height = fractional_rational_u(u, alpha_height, H, H_out) + u_width = fractional_rational_u(u, alpha_width, W, W_out) + + for k in range(D_out): + d_start = fractional_start_index(k, alpha_depth, u_depth) + d_end = fractional_end_index(k, alpha_depth, u_depth) + d_start = max(d_start, 0) + d_end = min(d_end, D) + + for i in range(H_out): + h_start = fractional_start_index(i, alpha_height, u_height) + h_end = fractional_end_index(i, alpha_height, u_height) + h_start = max(h_start, 0) + h_end = min(h_end, H) + + for j in range(W_out): + w_start = fractional_start_index(j, alpha_width, u_width) + w_end = fractional_end_index(j, alpha_width, u_width) + w_start = max(w_start, 0) + w_end = min(w_end, W) + + if data_format == 'NCDHW': + x_masked = x[ + :, :, d_start:d_end, h_start:h_end, w_start:w_end + ] + if pool_type == 'avg': + field_size = ( + (d_end - d_start) + * (h_end - h_start) + * (w_end - w_start) + ) + out[:, :, k, i, j] = ( + np.sum(x_masked, axis=(2, 3, 4)) / field_size + ) + elif pool_type == 'max': + out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4)) + + elif data_format == 'NDHWC': + x_masked = x[ + :, d_start:d_end, h_start:h_end, w_start:w_end, : + ] + if pool_type == 'avg': + field_size = ( + (d_end - d_start) + * (h_end - h_start) + * (w_end - w_start) + ) + out[:, k, i, j, :] = ( + np.sum(x_masked, axis=(1, 2, 3)) / field_size + ) + elif pool_type == 'max': + out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3)) + return out + + +class TestFractionalMaxPool3DAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = fractional_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], random_u=0.3 + ) + + self.res_2_np = fractional_pool3d_forward( + x=self.x_np, output_size=5, random_u=0.5 + ) + + self.res_3_np = fractional_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], random_u=0.7 + ) + + self.res_4_np = fractional_pool3d_forward( + x=self.x_np, + output_size=[3, 3, 3], + pool_type="max", + data_format="NDHWC", + random_u=0.1, + ) + + self.res_5_np = fractional_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], random_u=0.6 + ) + + def test_static_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 5, 7, 7], dtype="float32" + ) + + out_1 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[3, 3, 3], random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=5, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[2, 3, 5], random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool3d( + # x=x, output_size=[3, 3, 3], data_format="NDHWC", random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[None, 3, None], random_u=0.6 + ) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + np.testing.assert_allclose(res_1, self.res_1_np) + + np.testing.assert_allclose(res_2, self.res_2_np) + + np.testing.assert_allclose(res_3, self.res_3_np) + + # np.testing.assert_allclose(res_4, self.res_4_np) + + np.testing.assert_allclose(res_5, self.res_5_np) + + def test_static_graph_return_mask(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 5, 7, 7], dtype="float32" + ) + + out_1 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[3, 3, 3], return_mask=True, random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=5, return_mask=True, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[2, 3, 5], return_mask=True, random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool3d( + # x=x, output_size=[3, 3, 3], data_format="NHWC", return_mask=True, random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[None, 3, None], return_mask=True, random_u=0.6 + ) + + exe = paddle.static.Executor(place=place) + [ + res_1, + mask_1, + res_2, + mask_2, + res_3, + mask_3, + res_5, + mask_5, + ] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + self.assertEqual(res_1.shape, mask_1.shape) + + self.assertEqual(res_2.shape, mask_2.shape) + + self.assertEqual(res_3.shape, mask_3.shape) + + # self.assertEqual(res_4.shape, mask_4.shape) + + self.assertEqual(res_5.shape, mask_5.shape) + + def test_dynamic_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_tensor(self.x_np) + + out_1 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[3, 3, 3], random_u=0.3 + ) + + out_2 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=5, random_u=0.5 + ) + + out_3 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[2, 3, 5], random_u=0.7 + ) + + # out_4 = paddle.nn.functional.fractional_max_pool3d( + # x=x, output_size=[3, 3, 3], data_format="NDHWC", random_u=0.1) + + out_5 = paddle.nn.functional.fractional_max_pool3d( + x=x, output_size=[None, 3, None], random_u=0.6 + ) + + np.testing.assert_allclose(out_1.numpy(), self.res_1_np) + + np.testing.assert_allclose(out_2.numpy(), self.res_2_np) + + np.testing.assert_allclose(out_3.numpy(), self.res_3_np) + + # np.testing.assert_allclose(out_4.numpy(), self.res_4_np) + + np.testing.assert_allclose(out_5.numpy(), self.res_5_np) + + +class TestFractionalMaxPool3DClassAPI(unittest.TestCase): + def setUp(self): + self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32") + self.res_1_np = fractional_pool3d_forward( + x=self.x_np, output_size=[3, 3, 3], random_u=0.3 + ) + + self.res_2_np = fractional_pool3d_forward( + x=self.x_np, output_size=5, random_u=0.5 + ) + + self.res_3_np = fractional_pool3d_forward( + x=self.x_np, output_size=[2, 3, 5], random_u=0.7 + ) + + # self.res_4_np = fractional_pool3d_forward( + # x=self.x_np, + # output_size=[3, 3, 3], + # pool_type="max", + # data_format="NDHWC", + # random_u=0.1 + # ) + + self.res_5_np = fractional_pool3d_forward( + x=self.x_np, output_size=[None, 3, None], random_u=0.6 + ) + + def test_static_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.enable_static() + x = paddle.static.data( + name="x", shape=[2, 3, 5, 7, 7], dtype="float32" + ) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[3, 3, 3], random_u=0.3 + ) + out_1 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=5, random_u=0.5 + ) + out_2 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[2, 3, 5], random_u=0.7 + ) + out_3 = fractional_max_pool(x=x) + + # fractional_max_pool = paddle.nn.FractionalMaxPool3D( + # output_size=[3, 3, 3], data_format="NDHWC", random_u=0.1) + # out_4 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[None, 3, None], random_u=0.6 + ) + out_5 = fractional_max_pool(x=x) + + exe = paddle.static.Executor(place=place) + [res_1, res_2, res_3, res_5] = exe.run( + base.default_main_program(), + feed={"x": self.x_np}, + fetch_list=[out_1, out_2, out_3, out_5], + ) + + np.testing.assert_allclose(res_1, self.res_1_np) + + np.testing.assert_allclose(res_2, self.res_2_np) + + np.testing.assert_allclose(res_3, self.res_3_np) + + # assert np.allclose(res_4, self.res_4_np) + + np.testing.assert_allclose(res_5, self.res_5_np) + + def test_dynamic_graph(self): + for use_cuda in ( + [False, True] if core.is_compiled_with_cuda() else [False] + ): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + paddle.disable_static(place=place) + x = paddle.to_tensor(self.x_np) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[3, 3, 3], random_u=0.3 + ) + out_1 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=5, random_u=0.5 + ) + out_2 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[2, 3, 5], random_u=0.7 + ) + out_3 = fractional_max_pool(x=x) + + # fractional_max_pool = paddle.nn.FractionalMaxPool3D( + # output_size=[3, 3, 3], data_format="NDHWC", random_u=0.1) + # out_4 = fractional_max_pool(x=x) + + fractional_max_pool = paddle.nn.FractionalMaxPool3D( + output_size=[None, 3, None], random_u=0.6 + ) + out_5 = fractional_max_pool(x=x) + + np.testing.assert_allclose(out_1.numpy(), self.res_1_np) + + np.testing.assert_allclose(out_2.numpy(), self.res_2_np) + + np.testing.assert_allclose(out_3.numpy(), self.res_3_np) + + # assert np.allclose(out_4.numpy(), self.res_4_np) + + np.testing.assert_allclose(out_5.numpy(), self.res_5_np) + + +class TestOutDtype(unittest.TestCase): + def test_max_pool(self): + api_fn = F.fractional_max_pool3d + shape = [1, 3, 32, 32, 32] + check_out_dtype( + api_fn, + in_specs=[(shape,)], + expect_dtypes=['float32', 'float64'], + output_size=16, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_pool_max_op.py b/test/legacy_test/test_pool_max_op.py index 23740d39b8ef31..be12c58ec34872 100644 --- a/test/legacy_test/test_pool_max_op.py +++ b/test/legacy_test/test_pool_max_op.py @@ -35,26 +35,68 @@ def adaptive_end_index(index, input_size, output_size): return int(np.ceil((index + 1) * input_size / output_size)) +def fractional_rational_u(u, alpha, input, output): + base = input // output + + u_max1 = (base + 2) / alpha - 1 + u_max2 = (input + 1 - base) / alpha - (output - 1) + max_u = min(u_max1, u_max2) + + return u * max_u + + +def fractional_start_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + u) - 1)) + + +def fractional_end_index(idx, alpha, u): + return int(np.ceil(alpha * (idx + 1 + u) - 1)) + + def max_pool3D_forward_naive( - x, ksize, strides, paddings, global_pool=False, adaptive=False + x, + ksize, + strides, + paddings, + global_pool=False, + adaptive=False, + fractional=False, + random_u=None, ): N, C, D, H, W = x.shape if global_pool: ksize = [D, H, W] paddings = [0, 0, 0] - if adaptive: + if adaptive or fractional: D_out, H_out, W_out = ksize else: D_out = (D - ksize[0] + 2 * paddings[0]) // strides[0] + 1 H_out = (H - ksize[1] + 2 * paddings[1]) // strides[1] + 1 W_out = (W - ksize[2] + 2 * paddings[2]) // strides[2] + 1 + + if fractional: + u = random_u + + alpha_depth = D / D_out + alpha_height = H / H_out + alpha_width = W / W_out + + u_depth = fractional_rational_u(u, alpha_depth, D, D_out) + u_height = fractional_rational_u(u, alpha_height, H, H_out) + u_width = fractional_rational_u(u, alpha_width, W, W_out) + out = np.zeros((N, C, D_out, H_out, W_out)) mask = np.zeros((N, C, D_out, H_out, W_out)) for k in range(D_out): if adaptive: d_start = adaptive_start_index(k, D, ksize[0]) d_end = adaptive_end_index(k, D, ksize[0]) + elif fractional: + d_start = fractional_start_index(k, alpha_depth, u_depth) + d_end = fractional_end_index(k, alpha_depth, u_depth) + d_start = max(d_start, 0) + d_end = min(d_end, D) else: d_start = np.max((k * strides[0] - paddings[0], 0)) d_end = np.min((k * strides[0] + ksize[0] - paddings[0], D)) @@ -62,6 +104,11 @@ def max_pool3D_forward_naive( if adaptive: h_start = adaptive_start_index(i, H, ksize[1]) h_end = adaptive_end_index(i, H, ksize[1]) + elif fractional: + h_start = fractional_start_index(i, alpha_height, u_height) + h_end = fractional_end_index(i, alpha_height, u_height) + h_start = max(h_start, 0) + h_end = min(h_end, H) else: h_start = np.max((i * strides[1] - paddings[1], 0)) h_end = np.min((i * strides[1] + ksize[1] - paddings[1], H)) @@ -69,6 +116,11 @@ def max_pool3D_forward_naive( if adaptive: w_start = adaptive_start_index(j, W, ksize[2]) w_end = adaptive_end_index(j, W, ksize[2]) + elif fractional: + w_start = fractional_start_index(j, alpha_width, u_width) + w_end = fractional_end_index(j, alpha_width, u_width) + w_start = max(w_start, 0) + w_end = min(w_end, W) else: w_start = np.max((j * strides[2] - paddings[2], 0)) w_end = np.min((j * strides[2] + ksize[2] - paddings[2], W)) @@ -94,18 +146,35 @@ def max_pool3D_forward_naive( def max_pool2D_forward_naive( - x, ksize, strides, paddings, global_pool=False, adaptive=False + x, + ksize, + strides, + paddings, + global_pool=False, + adaptive=False, + fractional=False, + random_u=None, ): N, C, H, W = x.shape if global_pool: ksize = [H, W] paddings = [0, 0] - if adaptive: + if adaptive or fractional: H_out, W_out = ksize else: H_out = (H - ksize[0] + 2 * paddings[0]) // strides[0] + 1 W_out = (W - ksize[1] + 2 * paddings[1]) // strides[1] + 1 + + if fractional: + u = random_u + + alpha_height = H / H_out + alpha_width = W / W_out + + u_height = fractional_rational_u(u, alpha_height, H, H_out) + u_width = fractional_rational_u(u, alpha_width, W, W_out) + out = np.zeros((N, C, H_out, W_out)) mask = np.zeros((N, C, H_out, W_out)) for i in range(H_out): @@ -115,6 +184,16 @@ def max_pool2D_forward_naive( r_end = adaptive_end_index(i, H, ksize[0]) c_start = adaptive_start_index(j, W, ksize[1]) c_end = adaptive_end_index(j, W, ksize[1]) + elif fractional: + r_start = fractional_start_index(i, alpha_height, u_height) + r_end = fractional_end_index(i, alpha_height, u_height) + r_start = max(r_start, 0) + r_end = min(r_end, H) + + c_start = fractional_start_index(j, alpha_width, u_width) + c_end = fractional_end_index(j, alpha_width, u_width) + c_start = max(c_start, 0) + c_end = min(c_end, W) else: r_start = np.max((i * strides[0] - paddings[0], 0)) r_end = np.min((i * strides[0] + ksize[0] - paddings[0], H)) @@ -143,9 +222,18 @@ def max_pool3d_with_index_wapper( paddings=[], global_pooling=False, adaptive=False, + fractional=False, + random_u=None, ): return paddle._C_ops.max_pool3d_with_index( - x, kernel_size, strides, paddings, global_pooling, adaptive + x, + kernel_size, + strides, + paddings, + global_pooling, + adaptive, + fractional, + random_u, ) @@ -154,6 +242,7 @@ def setUp(self): self.init_test_case() self.init_global() self.init_adaptive() + self.init_fractional() self.init_dtype() if self.is_bfloat16_op(): @@ -173,6 +262,8 @@ def setUp(self): self.paddings, self.global_pool, self.adaptive, + self.fractional, + self.random_u, ) mask = mask.astype("int32") if self.is_bfloat16_op(): @@ -186,6 +277,8 @@ def setUp(self): 'ksize': self.ksize, 'global_pooling': self.global_pool, 'adaptive': self.adaptive, + 'fractional': self.fractional, + 'random_u': self.random_u, } if self.is_bfloat16_op(): @@ -224,6 +317,10 @@ def init_global(self): def init_adaptive(self): self.adaptive = False + def init_fractional(self): + self.fractional = False + self.random_u = 0.3 + class TestCase1(TestMaxPoolWithIndex_Op): def init_global(self): @@ -254,6 +351,12 @@ def init_adaptive(self): self.adaptive = True +class TestCastFractional3d(TestMaxPoolWithIndex_Op): + def init_fractional(self): + self.fractional = True + self.random_u = 0.3 + + # ----------------max_pool3d_with_index_fp16---------------- def create_test_fp16_class(parent): @unittest.skipIf( @@ -284,6 +387,7 @@ def test_check_grad(self): create_test_fp16_class(TestCase2) create_test_fp16_class(TestCase3) create_test_fp16_class(TestCastAdaptive3d) +create_test_fp16_class(TestCastFractional3d) # ----------------max_pool3d_with_index_bf16---------------- @@ -332,6 +436,7 @@ def test_check_grad(self): create_test_bf16_class(TestCase2) create_test_bf16_class(TestCase3) create_test_bf16_class(TestCastAdaptive3d) +create_test_bf16_class(TestCastFractional3d) # ----------------max_pool2d_with_index---------------- @@ -342,9 +447,18 @@ def max_pool2d_with_index_wapper( paddings=[], global_pooling=False, adaptive=False, + fractional=False, + random_u=None, ): return paddle._C_ops.max_pool2d_with_index( - x, kernel_size, strides, paddings, global_pooling, adaptive + x, + kernel_size, + strides, + paddings, + global_pooling, + adaptive, + fractional, + random_u, ) @@ -391,6 +505,12 @@ def init_adaptive(self): self.adaptive = True +class TestCastFractional2d(TestCase6): + def init_fractional(self): + self.fractional = True + self.random_u = 0.3 + + # ----------------max_pool2d_with_index_fp16---------------- def create_test_fp16_class(parent): @unittest.skipIf( @@ -421,6 +541,7 @@ def test_check_grad(self): create_test_fp16_class(TestCase6) create_test_fp16_class(TestCase7) create_test_fp16_class(TestCastAdaptive2d) +create_test_fp16_class(TestCastFractional2d) # ----------------max_pool2d_with_index_bf16---------------- @@ -467,6 +588,7 @@ def test_check_grad(self): create_test_bf16_class(TestCase6) create_test_bf16_class(TestCase7) create_test_bf16_class(TestCastAdaptive2d) +create_test_bf16_class(TestCastFractional2d) if __name__ == '__main__':