boostorg · meshtag · Jan 12, 2021 · Apr 21, 2021 · Aug 14, 2021 · Aug 14, 2021
diff --git a/doc/image_processing/2D_Convolution_and_Correlation.rst b/doc/image_processing/2D_Convolution_and_Correlation.rst
@@ -0,0 +1,102 @@
+Two dimensional convolution and correlation
+-------------------------------------------
+
+
+What is the role of convolution in image processing?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Convolution forms a base pillar in a large number of image processing applications. It is a 
+mathematical operation which is applied in a same manner over the entire considered region in an image. 
+Accessing all pixels in a similar manner creates the room for optimization in this algorithm. 
+We have thus targeted our efforts towards improving the spatial access of pixel elements during 
+2D convolution and correlation.
+
+
+---------------------------------
+
+
+What are we doing differently?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+There are many ways of performing 2D convolution, the naive ones includes irregular access of source 
+image pixels. Due to this, a large number of cache misses occur which further in turn degrade the 
+performance. We have tried to improve this access pattern and hence reduce the number of overall 
+cache misses.
+
+
+Our strategy involves storing relevant 2D image data in a 1D buffer container. This container will 
+then be used for accessing required patches of pixels during convolution. These patches will be 
+accessed by a sliding window algorithm so that their dot product with 2D kernel can be stored 
+in destination image.
+
+
+---------------------------------
+
+
+How much data should be stored inside the buffer?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+We store only that much data which will be required by kernel during its one single horizontal slide. 
+In other words, the size of buffer container will be 
+
+``buffer size = kernel_size * img_width + boundary_offset``
+
+where ``boundary_offset = (extrapolation_required == 1) ? ((kernel_size - 1) * kernel_size) : 0``
+
+Here, boundary_offset is the addition in buffer size which will take place due to boundary 
+extrapolation for corner pixels.
+
+
+----------------------------------
+
+
+What to do after forming the buffer container?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Once our data is present inside the buffer container, a sliding window algorithm is applied for 
+calculating the dot product between kernel and image patch contained inside sliding window.
+Boundary extrapolation is taken care by changing elements of buffer container suitably.
+
+
+----------------------------------
+
+
+How do we take care of variable anchor point positions?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+While filling the buffer container, we will take care of this by starting/ending the filling process 
+at suitable points in source image.
+This will automatically take care of the problem and will achieve desired effect due to the followed 
+pattern of pixel assignment.
+
+
+-------------------------------------------------------
+
+
+What to do with spatial separable kernels?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+We have written a custom algorithm which can detect whether a two dimensional kernel can be 
+spatially separated. It does separation as well as checking for separability 
+simultaneously. If at any step during checking, the kernel is deemed non-separable, the separation 
+algorithm stops, otherwise horizontal and vertical separated kernels are returned.
+The algorithm uses the fact that a separable 2D matrix is essentially a rank1 matrix. It further 
+uses basic linear algebra for separating the kernel.
+
+
+-------------------------------------------------------
+
+
+A note regarding derivatives : 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Calculating derivatives with different boundary options may produce artifacts in some cases. Hence 
+it is advisable to compare outputs obtained with all methods of boundary extrapolation and then 
+select the one which seems most reasonable for the application.
diff --git a/example/convolve2d.cpp b/example/convolve2d.cpp
@@ -37,7 +37,7 @@ int main()
 
     std::vector<float> v(9, 1.0f / 9.0f);
     detail::kernel_2d<float> kernel(v.begin(), v.size(), 1, 1);
-    detail::convolve_2d(view(img), kernel, view(img_out1));
+    convolve_2d<gray8_pixel_t>(view(img), kernel, view(img_out1));
 
     write_view("out-convolve2d.png", view(img_out1), png_tag{});
 

diff --git a/example/harris.cpp b/example/harris.cpp
@@ -175,9 +175,9 @@ int main(int argc, char* argv[])
     auto x_gradient = gil::view(x_gradient_image);
     auto y_gradient = gil::view(y_gradient_image);
     auto scharr_x = gil::generate_dx_scharr();
-    gil::detail::convolve_2d(smoothed, scharr_x, x_gradient);
+    gil::convolve_2d<gil::gray8_pixel_t>(smoothed, scharr_x, x_gradient);
     auto scharr_y = gil::generate_dy_scharr();
-    gil::detail::convolve_2d(smoothed, scharr_y, y_gradient);
+    gil::convolve_2d<gil::gray8_pixel_t>(smoothed, scharr_y, y_gradient);
 
     gil::gray32f_image_t m11(x_gradient.dimensions());
     gil::gray32f_image_t m12_21(x_gradient.dimensions());

diff --git a/example/hessian.cpp b/example/hessian.cpp
@@ -8,6 +8,8 @@
 
 #include <boost/gil/image.hpp>
 #include <boost/gil/image_view.hpp>
+#include <boost/gil/extension/numeric/matrix.hpp>
+#include <boost/gil/extension/numeric/arithmetics.hpp>
 #include <boost/gil/image_processing/numeric.hpp>
 #include <boost/gil/image_processing/hessian.hpp>
 #include <boost/gil/extension/io/png.hpp>
@@ -29,126 +31,38 @@ namespace gil = boost::gil;
 // the algorithm here follows sRGB gamma definition
 // taken from here (luminance calculation):
 // https://en.wikipedia.org/wiki/Grayscale
-gil::gray8_image_t to_grayscale(gil::rgb8_view_t original)
-{
-    gil::gray8_image_t output_image(original.dimensions());
-    auto output = gil::view(output_image);
-    constexpr double max_channel_intensity = (std::numeric_limits<std::uint8_t>::max)();
-    for (long int y = 0; y < original.height(); ++y)
-    {
-        for (long int x = 0; x < original.width(); ++x)
-        {
-            // scale the values into range [0, 1] and calculate linear intensity
-            auto& p = original(x, y);
-            double red_intensity = p.at(std::integral_constant<int, 0>{})
-                / max_channel_intensity;
-            double green_intensity = p.at(std::integral_constant<int, 1>{})
-                / max_channel_intensity;
-            double blue_intensity = p.at(std::integral_constant<int, 2>{})
-                / max_channel_intensity;
-            auto linear_luminosity = 0.2126 * red_intensity
-                                    + 0.7152 * green_intensity
-                                    + 0.0722 * blue_intensity;
-
-            // perform gamma adjustment
-            double gamma_compressed_luminosity = 0;
-            if (linear_luminosity < 0.0031308)
-            {
-                gamma_compressed_luminosity = linear_luminosity * 12.92;
-            } else
-            {
-                gamma_compressed_luminosity = 1.055 * std::pow(linear_luminosity, 1 / 2.4) - 0.055;
-            }
-
-            // since now it is scaled, descale it back
-            output(x, y) = gamma_compressed_luminosity * max_channel_intensity;
-        }
-    }
-
-    return output_image;
-}
-
-void apply_gaussian_blur(gil::gray8_view_t input_view, gil::gray8_view_t output_view)
-{
-    constexpr static std::ptrdiff_t filter_height = 5ull;
-    constexpr static std::ptrdiff_t filter_width = 5ull;
-    constexpr static double filter[filter_height][filter_width] =
-    {
-        { 2,  4,  6,  4,  2 },
-        { 4, 9, 12, 9,  4 },
-        { 5, 12, 15, 12,  5 },
-        { 4, 9, 12, 9,  4 },
-        { 2,  4,  5,  4,  2 }
-    };
-    constexpr double factor = 1.0 / 159;
-    constexpr double bias = 0.0;
-
-    const auto height = input_view.height();
-    const auto width = input_view.width();
-    for (std::ptrdiff_t x = 0; x < width; ++x)
-    {
-        for (std::ptrdiff_t y = 0; y < height; ++y)
-        {
-            double intensity = 0.0;
-            for (std::ptrdiff_t filter_y = 0; filter_y < filter_height; ++filter_y)
-            {
-                for (std::ptrdiff_t filter_x = 0; filter_x < filter_width; ++filter_x)
-                {
-                    int image_x = x - filter_width / 2 + filter_x;
-                    int image_y = y - filter_height / 2 + filter_y;
-                    if (image_x >= input_view.width() || image_x < 0 ||
-                        image_y >= input_view.height() || image_y < 0)
-                    {
-                        continue;
-                    }
-                    const auto& pixel = input_view(image_x, image_y);
-                    intensity += pixel.at(std::integral_constant<int, 0>{})
-                        * filter[filter_y][filter_x];
-                }
-            }
-            auto& pixel = output_view(gil::point_t(x, y));
-            pixel = (std::min)((std::max)(int(factor * intensity + bias), 0), 255);
-        }
-
-    }
-}
-
 std::vector<gil::point_t> suppress(
-    gil::gray32f_view_t harris_response,
+    gil::matrix harris_response,
     double harris_response_threshold)
 {
     std::vector<gil::point_t> corner_points;
     for (gil::gray32f_view_t::coord_t y = 1; y < harris_response.height() - 1; ++y)
     {
         for (gil::gray32f_view_t::coord_t x = 1; x < harris_response.width() - 1; ++x)
         {
-            auto value = [](gil::gray32f_pixel_t pixel) {
-                return pixel.at(std::integral_constant<int, 0>{});
-            };
-            double values[9] = {
-                value(harris_response(x - 1, y - 1)),
-                value(harris_response(x, y - 1)),
-                value(harris_response(x + 1, y - 1)),
-                value(harris_response(x - 1, y)),
-                value(harris_response(x, y)),
-                value(harris_response(x + 1, y)),
-                value(harris_response(x - 1, y + 1)),
-                value(harris_response(x, y + 1)),
-                value(harris_response(x + 1, y + 1))
+            gil::elem_t values[9] = {
+                harris_response(x - 1, y - 1),
+                harris_response(x, y - 1),
+                harris_response(x + 1, y - 1),
+                harris_response(x - 1, y),
+                harris_response(x, y),
+                harris_response(x + 1, y),
+                harris_response(x - 1, y + 1),
+                harris_response(x, y + 1),
+                harris_response(x + 1, y + 1),
             };
 
             auto maxima = *std::max_element(
                 values,
                 values + 9,
-                [](double lhs, double rhs)
+                [](gil::elemc_ref_t lhs, gil::elemc_ref_t rhs)
                 {
-                    return lhs < rhs;
+                    return std::abs(lhs[0]) < std::abs(rhs[0]);
                 }
             );
 
-            if (maxima == value(harris_response(x, y))
-                && std::count(values, values + 9, maxima) == 1
-                && maxima >= harris_response_threshold)
+            if (maxima[0] == harris_response(x, y)[0]
+                && std::abs(maxima[0]) >= harris_response_threshold)
             {
                 corner_points.emplace_back(x, y);
             }
@@ -158,6 +72,13 @@ std::vector<gil::point_t> suppress(
     return corner_points;
 }
 
+std::pair<float, float> get_min_max_elements(const gil::matrix& m) {
+    auto min_element = *std::min_element(m.begin(), m.end());
+    auto max_element = *std::max_element(m.begin(), m.end());
+
+    return {min_element, max_element};
+}
+
 int main(int argc, char* argv[]) {
     if (argc != 5)
     {
@@ -167,39 +88,48 @@ int main(int argc, char* argv[]) {
     }
 
     std::size_t window_size = std::stoul(argv[2]);
-    long hessian_determinant_threshold = std::stol(argv[3]);
+    double hessian_determinant_threshold = std::stod(argv[3]);
 
     gil::rgb8_image_t input_image;
 
     gil::read_image(argv[1], input_image, gil::png_tag{});
 
     auto input_view = gil::view(input_image);
-    auto grayscaled = to_grayscale(input_view);
-    gil::gray8_image_t smoothed_image(grayscaled.dimensions());
-    auto smoothed = gil::view(smoothed_image);
-    apply_gaussian_blur(gil::view(grayscaled), smoothed);
-    gil::gray16s_image_t x_gradient_image(grayscaled.dimensions());
-    gil::gray16s_image_t y_gradient_image(grayscaled.dimensions());
-
-    auto x_gradient = gil::view(x_gradient_image);
-    auto y_gradient = gil::view(y_gradient_image);
-    auto scharr_x = gil::generate_dx_scharr();
-    gil::detail::convolve_2d(smoothed, scharr_x, x_gradient);
-    auto scharr_y = gil::generate_dy_scharr();
-    gil::detail::convolve_2d(smoothed, scharr_y, y_gradient);
-
-    gil::gray32f_image_t m11(x_gradient.dimensions());
-    gil::gray32f_image_t m12_21(x_gradient.dimensions());
-    gil::gray32f_image_t m22(x_gradient.dimensions());
+    gil::gray8_image_t grayscaled_image(input_image.dimensions());
+    auto grayscaled = gil::view(grayscaled_image);
+    gil::copy_and_convert_pixels(input_view, grayscaled);
+    gil::write_view("grayscale.png", grayscaled, gil::png_tag{});
+
+    gil::matrix_storage input_matrix_storage(grayscaled_image.dimensions());
+    auto input_matrix = gil::view(input_matrix_storage);
+    gil::value_cast(grayscaled, input_matrix);
+
+    gil::matrix_storage grayscaled_matrix_storage(grayscaled.dimensions());
+    auto grayscaled_matrix = gil::view(grayscaled_matrix_storage);
+    gil::value_cast(grayscaled, grayscaled_matrix);
+
+    gil::matrix_storage smoothed_matrix(grayscaled.dimensions());
+    auto smoothed = gil::view(smoothed_matrix);
+    gil::convolve_2d<gil::elem_t>(grayscaled_matrix, gil::generate_gaussian_kernel(3, 1.0), smoothed);
+    gil::matrix_storage dx_matrix(grayscaled.dimensions());
+    auto dx = gil::view(dx_matrix);
+    gil::matrix_storage dy_matrix(grayscaled.dimensions());
+    auto dy = gil::view(dy_matrix);
+    gil::convolve_2d<gil::elem_t>(smoothed, gil::generate_dx_scharr(), dx);
+    gil::convolve_2d<gil::elem_t>(smoothed, gil::generate_dy_sobel(), dy);
+
+    gil::matrix_storage m11(dx.dimensions());
+    gil::matrix_storage m12_21(dx.dimensions());
+    gil::matrix_storage m22(dy.dimensions());
     gil::compute_hessian_entries(
-        x_gradient,
-        y_gradient,
+        dx,
+        dy,
         gil::view(m11),
         gil::view(m12_21),
         gil::view(m22)
     );
 
-    gil::gray32f_image_t hessian_response(x_gradient.dimensions());
+    gil::matrix_storage hessian_response(dx.dimensions());
     auto gaussian_kernel = gil::generate_gaussian_kernel(window_size, 0.84089642);
     gil::compute_hessian_responses(
         gil::view(m11),
@@ -209,7 +139,12 @@ int main(int argc, char* argv[]) {
         gil::view(hessian_response)
     );
 
+    auto minmax_elements = get_min_max_elements(gil::view(hessian_response));
+    std::cout << "min Hessian response: " << minmax_elements.first
+              << " max Hessian response: " << minmax_elements.second << '\n';
+
     auto corner_points = suppress(gil::view(hessian_response), hessian_determinant_threshold);
+//    std::vector<gil::point_t> corner_points = {};
     for (auto point: corner_points) {
         input_view(point) = gil::rgb8_pixel_t(0, 0, 0);
         input_view(point).at(std::integral_constant<int, 1>{}) = 255;