PaddlePaddle · hedaoyuan · Jun 19, 2017 · May 26, 2017 · May 26, 2017 · May 26, 2017
diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt
@@ -14,8 +14,8 @@ add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
 add_dependencies(paddle_function ${external_project_dependencies})
 add_dependencies(paddle_function gen_proto_cpp)
 
-if(WITH_GPU)
 if(WITH_TESTING)
+if(WITH_GPU)
     # TODO:
     # file(GLOB test_files . *OpTest.cpp)
     # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
@@ -29,6 +29,8 @@ if(WITH_TESTING)
     add_simple_unittest(MulOpTest)
     add_simple_unittest(CosSimOpTest)
 endif()
+
+add_simple_unittest(ConvOpTest)
 endif()
 
 add_style_check_target(paddle_function ${h_files})

diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/function/ContextProjectionOpTest.cpp
@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start,
                std::max(0, (int)(context_start + context_length - 1));
   if (pad == 0) is_padding = false;
 
-  FunctionCompare test(
+  CpuGpuFuncCompare test(
       "ContextProjectionForward",
       FuncConfig()
           .set("context_length", context_length)
@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start,
                std::max(0, (int)(context_start + context_length - 1));
   if (pad == 0) is_padding = false;
 
-  FunctionCompare test(
+  CpuGpuFuncCompare test(
       "ContextProjectionBackward",
       FuncConfig()
           .set("context_length", context_length)

diff --git a/paddle/function/ConvOp.h b/paddle/function/ConvOp.h
@@ -0,0 +1,122 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "Function.h"
+
+namespace paddle {
+
+/*
+ * \brief Based on the ConvFunctionBase class, the forward calculation,
+ *        backward input calculation and backward filter calculation
+ *        of convolution operations can be implemented.
+ *
+ * Arguments of forward and backward calculation:
+ *   1. Forward calculation of convolution.
+ *      inputs = {INPUT, FILTER}, outputs = {OUTPUT}
+ *      The first and second input arguments are input image and filter data.
+ *      The output argument is output image.
+ *
+ *   2. Backward input calculation of convolution.
+ *      inputs = {OUTPUT_GRAD, FILTER}, outputs = {INPUT_GRAD}
+ *      The first and second input arguments are output grad image
+ *      and filter data.
+ *      The output argument is input grad image.
+ *
+ *   3. Backward filter calculation of convolution.
+ *      inputs = {OUTPUT_GRAD, INPUT}, outputs = {FILTER_GRAD}
+ *      The first and second input arguments are output grad image
+ *      and input image.
+ *      The output argument is filter grad.
+ *
+ * Arguments format of input, filter and output:
+ *   1. Input image, output image, input image gradient, output image gradient
+ *      are all NCHW format. Where N is batch size, C is the number of channels,
+ *      H and W is the height and width of image or image gradient.
+ *
+ *   2. The format of the filter data is MCHW, where M is the number of
+ *      output image channels, C is the number of input image channels,
+ *      H and W is height and width of filter.
+ */
+class ConvFunctionBase : public FunctionBase {
+public:
+  void init(const FuncConfig& config) override {
+    // function arguments
+    strides_ = config.get<std::vector<size_t>>("strides");
+    paddings_ = config.get<std::vector<size_t>>("paddings");
+    groups_ = config.get<size_t>("groups");
+
+    // number of inputs and outputs
+    numInputs_ = 2;
+    numOutputs_ = 1;
+  }
+
+  virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {}
+
+  // input can be INPUT and INPUT_GRAD
+  // filter can be FILTER and FILTER_GRAD
+  // output can be OUTPUT and OUTPUT_GRAD
+  void check(const TensorShape& input,
+             const TensorShape& filter,
+             const TensorShape& output) {
+    // inputs and outputs arguments should be 4-dimensional.
+    CHECK_EQ(input.ndims(), (size_t)4);
+    CHECK_EQ(filter.ndims(), (size_t)4);
+    CHECK_EQ(output.ndims(), (size_t)4);
+
+    // The batchSize of the input needs to be equal to
+    // the batchSize of the output.
+    CHECK_EQ(input[0], output[0]);
+
+    // The input and output channel dimensions are the second and first
+    // dimensions of the filter shape.
+    CHECK_EQ(input[1] / groups_, filter[1]);
+    CHECK_EQ(output[1], filter[0]);
+  }
+
+protected:
+  std::vector<size_t> strides_;
+  std::vector<size_t> paddings_;
+
+  /// Group size, refer to grouped convolution in
+  /// Alex Krizhevsky's paper: when group=2, the first half of the
+  /// filters are only connected to the first half of the input channels,
+  /// and the second half only connected to the second half.
+  size_t groups_;
+
+  inline int strideH() const { return strides_[0]; }
+
+  inline int strideW() const { return strides_[1]; }
+
+  inline int paddingH() const { return paddings_[0]; }
+
+  inline int paddingW() const { return paddings_[1]; }
+
+  // A temporary memory in convolution calculation.
+  MemoryHandlePtr memory_;
+
+  template <DeviceType Device>
+  void resizeBuffer(size_t newSize) {
+    if (!memory_ || newSize * sizeof(real) > memory_->getAllocSize()) {
+      if (Device == DEVICE_TYPE_CPU) {
+        memory_ = std::make_shared<CpuMemoryHandle>(newSize * sizeof(real));
+      } else {
+        memory_ = std::make_shared<GpuMemoryHandle>(newSize * sizeof(real));
+      }
+    }
+  }
+};
+
+}  // namespace paddle
diff --git a/paddle/function/ConvOpTest.cpp b/paddle/function/ConvOpTest.cpp
@@ -0,0 +1,123 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gtest/gtest.h>
+#include <memory>
+#include "Function.h"
+#include "FunctionTest.h"
+
+namespace paddle {
+
+enum TestType {
+  FORWARD_TEST = 0,
+  BACKWARD_INPUT_TEST = 1,
+  BACKWARD_FILTER_TEST = 2,
+};
+
+template <DeviceType DType1, DeviceType DType2>
+class ConvolutionTest {
+public:
+  ConvolutionTest(const std::string& conv1,
+                  const std::string& conv2,
+                  TestType type,
+                  std::string algo = "auto") {
+    for (size_t batchSize : {1, 32}) {
+      for (size_t inputSize : {7, 14, 54}) {
+        for (size_t filterSize : {1, 3, 5}) {
+          for (size_t inputChannels : {3, 64}) {
+            for (size_t outputChannels : {3, 64, 128}) {
+              if (inputChannels < outputChannels) break;
+              for (size_t stride : {1, 2}) {
+                for (size_t padding : {0, 1}) {
+                  if (padding >= filterSize) break;
+                  size_t outputSize =
+                      (inputSize - filterSize + 2 * padding + stride) / stride;
+                  LOG(INFO) << " batchSize=" << batchSize
+                            << " inputChannels=" << inputChannels
+                            << " inputHeight=" << inputSize
+                            << " inputWidth=" << inputSize
+                            << " outputChannels=" << outputChannels
+                            << " filterHeight=" << filterSize
+                            << " filterWidth=" << filterSize
+                            << " outputHeight=" << outputSize
+                            << " outputWidth=" << outputSize
+                            << " stride=" << stride << " padding=" << padding;
+
+                  std::vector<size_t> paddings = {padding, padding};
+                  std::vector<size_t> strides = {stride, stride};
+                  Compare2Function<DType1, DType2> test(
+                      conv1,
+                      conv2,
+                      FuncConfig()
+                          .set("paddings", paddings)
+                          .set("strides", strides)
+                          .set("groups", (size_t)1)
+                          .set("algo", algo));
+
+                  TensorShape input{
+                      batchSize, inputChannels, inputSize, inputSize};
+                  TensorShape filter{
+                      outputChannels, inputChannels, filterSize, filterSize};
+                  TensorShape output{
+                      batchSize, outputChannels, outputSize, outputSize};
+
+                  if (type == FORWARD_TEST) {
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
+                    test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, output));
+                    test.run();
+                  } else if (type == BACKWARD_INPUT_TEST) {
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, filter));
+                    test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, input));
+                    test.run();
+                  } else if (type == BACKWARD_FILTER_TEST) {
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, output));
+                    test.addInputs(BufferArg(VALUE_TYPE_FLOAT, input));
+                    test.addOutputs(BufferArg(VALUE_TYPE_FLOAT, filter));
+                    test.run();
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+};
+
+TEST(Forward, GEMM) {
+  ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_CPU> test(
+      "NaiveConv-CPU", "GemmConv-CPU", FORWARD_TEST);
+}
+
+#ifndef PADDLE_ONLY_CPU
+TEST(Forward, GEMM2) {
+  ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
+      "GemmConv-CPU", "GemmConv-GPU", FORWARD_TEST);
+}
+
+TEST(BackwardInput, GEMM) {
+  ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
+      "GemmConvGradInput-CPU", "GemmConvGradInput-GPU", BACKWARD_INPUT_TEST);
+}
+
+TEST(BackwardFilter, GEMM) {
+  ConvolutionTest<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> test(
+      "GemmConvGradFilter-CPU", "GemmConvGradFilter-GPU", BACKWARD_FILTER_TEST);
+}
+#endif
+
+}  // namespace paddle
diff --git a/paddle/function/CosSimOpTest.cpp b/paddle/function/CosSimOpTest.cpp
@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x,
                        size_t height_y,
                        size_t width,
                        real scale) {
-  FunctionCompare test("CosSimForward", FuncConfig().set("scale", scale));
+  CpuGpuFuncCompare test("CosSimForward", FuncConfig().set("scale", scale));
   // prepare input arguments
   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}));
   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}));
@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x,
                         size_t height_y,
                         size_t width,
                         real scale) {
-  FunctionCompare test("CosSimBackward", FuncConfig().set("scale", scale));
+  CpuGpuFuncCompare test("CosSimBackward", FuncConfig().set("scale", scale));
   // prepare input arguments
   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));
   test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));

diff --git a/paddle/function/CrossMapNormalOpTest.cpp b/paddle/function/CrossMapNormalOpTest.cpp
@@ -28,11 +28,11 @@ TEST(CrossMapNormal, real) {
                     << " size=" << size;
 
             // init Test object
-            FunctionCompare test("CrossMapNormal",
-                                 FuncConfig()
-                                     .set("size", size)
-                                     .set("scale", (real)1.5)
-                                     .set("pow", (real)0.5));
+            CpuGpuFuncCompare test("CrossMapNormal",
+                                   FuncConfig()
+                                       .set("size", size)
+                                       .set("scale", (real)1.5)
+                                       .set("pow", (real)0.5));
             // prepare input arguments
             TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
             test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
@@ -57,11 +57,11 @@ TEST(CrossMapNormalGrad, real) {
                     << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
                     << " size=" << size;
 
-            FunctionCompare test("CrossMapNormalGrad",
-                                 FuncConfig()
-                                     .set("size", size)
-                                     .set("scale", (real)1.5)
-                                     .set("pow", (real)0.5));
+            CpuGpuFuncCompare test("CrossMapNormalGrad",
+                                   FuncConfig()
+                                       .set("size", size)
+                                       .set("scale", (real)1.5)
+                                       .set("pow", (real)0.5));
             TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
             test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
             test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));