kornia · edgarriba · Sep 11, 2024 · Sep 1, 2024 · Sep 1, 2024 · Sep 1, 2024
diff --git a/crates/kornia-imgproc/Cargo.toml b/crates/kornia-imgproc/Cargo.toml
@@ -46,3 +46,7 @@ harness = false
 [[bench]]
 name = "bench_warp"
 harness = false
+
+[[bench]]
+name = "bench_flip"
+harness = false
diff --git a/crates/kornia-imgproc/benches/bench_flip.rs b/crates/kornia-imgproc/benches/bench_flip.rs
@@ -0,0 +1,134 @@
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+
+use kornia::image::Image;
+use kornia::imgproc::flip;
+
+use rayon::{
+    iter::{IndexedParallelIterator, ParallelIterator},
+    slice::{ParallelSlice, ParallelSliceMut},
+};
+
+fn par_par_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
+    dst.as_slice_mut()
+        .par_chunks_exact_mut(src.cols() * 3)
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
+        .for_each(|(dst_row, src_row)| {
+            dst_row
+                .par_chunks_exact_mut(3)
+                .zip_eq(src_row.par_chunks_exact(3).rev())
+                .for_each(|(dst_pixel, src_pixel)| {
+                    dst_pixel.copy_from_slice(src_pixel);
+                })
+        });
+}
+
+fn par_loop_loop(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
+    dst.as_slice_mut()
+        .par_chunks_exact_mut(src.cols() * 3)
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
+        .for_each(|(dst_row, src_row)| {
+            let n = src.cols();
+            for i in 0..n / 2 {
+                for c in 0..3 {
+                    let (idx_i, idx_j) = (i * 3 + c, (n - 1 - i) * 3 + c);
+                    dst_row[idx_i] = src_row[idx_j];
+                    dst_row[idx_j] = src_row[idx_i];
+                }
+            }
+        });
+}
+
+fn par_loop_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
+    dst.as_slice_mut()
+        .par_chunks_exact_mut(src.cols() * 3)
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
+        .for_each(|(dst_row, src_row)| {
+            let n = src.cols();
+            for i in 0..n / 2 {
+                let (idx_i, idx_j) = (i * 3, (n - 1 - i) * 3);
+                dst_row[idx_i..idx_i + 3].copy_from_slice(&src_row[idx_j..idx_j + 3]);
+                dst_row[idx_j..idx_j + 3].copy_from_slice(&src_row[idx_i..idx_i + 3]);
+            }
+        });
+}
+
+fn par_seq_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
+    dst.as_slice_mut()
+        .par_chunks_exact_mut(src.cols() * 3)
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
+        .for_each(|(dst_row, src_row)| {
+            dst_row
+                .chunks_exact_mut(3)
+                .zip(src_row.chunks_exact(3).rev())
+                .for_each(|(dst_pixel, src_pixel)| {
+                    dst_pixel.copy_from_slice(src_pixel);
+                })
+        });
+}
+
+fn bench_flip(c: &mut Criterion) {
+    let mut group = c.benchmark_group("Flip");
+
+    for (width, height) in [(256, 224), (512, 448), (1024, 896)].iter() {
+        group.throughput(criterion::Throughput::Elements((*width * *height) as u64));
+
+        let parameter_string = format!("{}x{}", width, height);
+
+        // input image
+        let image_size = [*width, *height].into();
+        let image = Image::<u8, 3>::new(image_size, vec![0u8; width * height * 3]).unwrap();
+        let image_f32 = image.clone().cast::<f32>().unwrap();
+
+        // output image
+        let output = Image::<f32, 3>::from_size_val(image_size, 0.0).unwrap();
+
+        group.bench_with_input(
+            BenchmarkId::new("par_par_slicecopy", &parameter_string),
+            &(&image_f32, &output),
+            |b, i| {
+                let (src, mut dst) = (i.0.clone(), i.1.clone());
+                b.iter(|| black_box(par_par_slicecopy(&src, &mut dst)))
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("par_loop_loop", &parameter_string),
+            &(&image_f32, &output),
+            |b, i| {
+                let (src, mut dst) = (i.0.clone(), i.1.clone());
+                b.iter(|| black_box(par_loop_loop(&src, &mut dst)))
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("par_loop_slicecopy", &parameter_string),
+            &(&image_f32, &output),
+            |b, i| {
+                let (src, mut dst) = (i.0.clone(), i.1.clone());
+                b.iter(|| black_box(par_loop_slicecopy(&src, &mut dst)))
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("par_seq_slicecopy", &parameter_string),
+            &(&image_f32, &output),
+            |b, i| {
+                let (src, mut dst) = (i.0.clone(), i.1.clone());
+                b.iter(|| black_box(par_seq_slicecopy(&src, &mut dst)))
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("kornia", &parameter_string),
+            &(&image_f32, &output),
+            |b, i| {
+                let (src, mut dst) = (i.0, i.1.clone());
+                b.iter(|| black_box(flip::horizontal_flip(src, &mut dst)))
+            },
+        );
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_flip);
+criterion_main!(benches);
diff --git a/crates/kornia-imgproc/src/flip.rs b/crates/kornia-imgproc/src/flip.rs
@@ -1,16 +1,22 @@
 use kornia_core::SafeTensorType;
 use kornia_image::{Image, ImageError};
-use rayon::{iter::ParallelIterator, slice::ParallelSliceMut};
+use rayon::{
+    iter::{IndexedParallelIterator, ParallelIterator},
+    slice::{ParallelSlice, ParallelSliceMut},
+};
 
 /// Flip the input image horizontally.
 ///
 /// # Arguments
 ///
 /// * `src` - The input image with shape (H, W, C).
+/// * `dst` - The output image with shape (H, W, C).
 ///
-/// # Returns
+/// Precondition: the input and output images must have the same size.
 ///
-/// The flipped image.
+/// # Errors
+///
+/// Returns an error if the sizes of `src` and `dst` do not match.
 ///
 /// # Example
 ///
@@ -27,43 +33,53 @@ use rayon::{iter::ParallelIterator, slice::ParallelSliceMut};
 /// )
 /// .unwrap();
 ///
-/// let flipped: Image<f32, 3> = horizontal_flip(&image).unwrap();
+/// let mut flipped = Image::<f32, 3>::from_size_val(image.size(), 0.0).unwrap();
 ///
-/// assert_eq!(flipped.size().width, 2);
-/// assert_eq!(flipped.size().height, 3);
+/// horizontal_flip(&image, &mut flipped).unwrap();
 /// ```
-pub fn horizontal_flip<T, const C: usize>(src: &Image<T, C>) -> Result<Image<T, C>, ImageError>
+pub fn horizontal_flip<T, const C: usize>(
+    src: &Image<T, C>,
+    dst: &mut Image<T, C>,
+) -> Result<(), ImageError>
 where
     T: SafeTensorType,
 {
-    let mut dst = src.clone();
+    if src.size() != dst.size() {
+        return Err(ImageError::InvalidImageSize(
+            src.cols(),
+            src.rows(),
+            dst.cols(),
+            dst.rows(),
+        ));
+    }
 
     dst.as_slice_mut()
         .par_chunks_exact_mut(src.cols() * C)
-        .for_each(|row| {
-            let mut i = 0;
-            let mut j = src.cols() - 1;
-            while i < j {
-                for c in 0..C {
-                    row.swap(i * C + c, j * C + c);
-                }
-                i += 1;
-                j -= 1;
-            }
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * C))
+        .for_each(|(dst_row, src_row)| {
+            dst_row
+                .chunks_exact_mut(C)
+                .zip(src_row.chunks_exact(C).rev())
+                .for_each(|(dst_pixel, src_pixel)| {
+                    dst_pixel.copy_from_slice(src_pixel);
+                })
         });
 
-    Ok(dst)
+    Ok(())
 }
 
 /// Flip the input image vertically.
 ///
 /// # Arguments
 ///
 /// * `src` - The input image with shape (H, W, C).
+/// * `dst` - The output image with shape (H, W, C).
+///
+/// Precondition: the input and output images must have the same size.
 ///
-/// # Returns
+/// # Errors
 ///
-/// The flipped image.
+/// Returns an error if the sizes of `src` and `dst` do not match.
 ///
 /// # Example
 ///
@@ -80,31 +96,40 @@ where
 /// )
 /// .unwrap();
 ///
-/// let flipped: Image<f32, 3> = vertical_flip(&image).unwrap();
+/// let mut flipped = Image::<f32, 3>::from_size_val(image.size(), 0.0).unwrap();
+///
+/// vertical_flip(&image, &mut flipped).unwrap();
 ///
-/// assert_eq!(flipped.size().width, 2);
-/// assert_eq!(flipped.size().height, 3);
 /// ```
-pub fn vertical_flip<T, const C: usize>(src: &Image<T, C>) -> Result<Image<T, C>, ImageError>
+pub fn vertical_flip<T, const C: usize>(
+    src: &Image<T, C>,
+    dst: &mut Image<T, C>,
+) -> Result<(), ImageError>
 where
     T: SafeTensorType,
 {
-    let mut dst = src.clone();
-
-    // TODO: improve this implementation
-    for i in 0..src.cols() {
-        let mut j = src.rows() - 1;
-        for k in 0..src.rows() / 2 {
-            for c in 0..C {
-                let idx_i = i * C + c + k * src.cols() * C;
-                let idx_j = i * C + c + j * src.cols() * C;
-                dst.as_slice_mut().swap(idx_i, idx_j);
-            }
-            j -= 1;
-        }
+    if src.size() != dst.size() {
+        return Err(ImageError::InvalidImageSize(
+            src.cols(),
+            src.rows(),
+            dst.cols(),
+            dst.rows(),
+        ));
     }
 
-    Ok(dst)
+    dst.as_slice_mut()
+        .par_chunks_exact_mut(src.cols() * C)
+        .zip_eq(src.as_slice().par_chunks_exact(src.cols() * C).rev())
+        .for_each(|(dst_row, src_row)| {
+            dst_row
+                .chunks_exact_mut(C)
+                .zip(src_row.chunks_exact(C))
+                .for_each(|(dst_pixel, src_pixel)| {
+                    dst_pixel.copy_from_slice(src_pixel);
+                })
+        });
+
+    Ok(())
 }
 
 #[cfg(test)]
@@ -113,30 +138,42 @@ mod tests {
 
     #[test]
     fn test_hflip() -> Result<(), ImageError> {
-        let image = Image::<_, 1>::new(
-            ImageSize {
-                width: 2,
-                height: 3,
-            },
-            vec![0u8, 1, 2, 3, 4, 5],
+        let image_size = ImageSize {
+            width: 2,
+            height: 3,
+        };
+        let image = Image::<_, 3>::new(
+            image_size,
+            vec![
+                0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+            ],
         )?;
-        let data_expected = vec![1u8, 0, 3, 2, 5, 4];
-        let flipped = super::horizontal_flip(&image)?;
+        let data_expected = vec![
+            3u8, 4, 5, 0, 1, 2, 9, 10, 11, 6, 7, 8, 15, 16, 17, 12, 13, 14,
+        ];
+        let mut flipped = Image::<_, 3>::from_size_val(image_size, 0u8)?;
+        super::horizontal_flip(&image, &mut flipped)?;
         assert_eq!(flipped.as_slice(), &data_expected);
         Ok(())
     }
 
     #[test]
     fn test_vflip() -> Result<(), ImageError> {
-        let image = Image::<_, 1>::new(
-            ImageSize {
-                width: 2,
-                height: 3,
-            },
-            vec![0u8, 1, 2, 3, 4, 5],
+        let image_size = ImageSize {
+            width: 2,
+            height: 3,
+        };
+        let image = Image::<_, 3>::new(
+            image_size,
+            vec![
+                0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
+            ],
         )?;
-        let data_expected = vec![4u8, 5, 2, 3, 0, 1];
-        let flipped = super::vertical_flip(&image)?;
+        let data_expected = vec![
+            12u8, 13, 14, 15, 16, 17, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+        ];
+        let mut flipped = Image::<_, 3>::from_size_val(image_size, 0u8)?;
+        super::vertical_flip(&image, &mut flipped)?;
         assert_eq!(flipped.as_slice(), &data_expected);
         Ok(())
     }

diff --git a/examples/metrics/src/main.rs b/examples/metrics/src/main.rs
@@ -24,7 +24,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     ops::cast_and_scale(&image, &mut image_f32, 1.0 / 255.0)?;
 
     // modify the image to see the changes
-    let image_dirty = imgproc::flip::horizontal_flip(&image_f32)?;
+    let mut image_dirty = Image::<f32, 3>::from_size_val(image.size(), 0.0)?;
+    imgproc::flip::horizontal_flip(&image_f32, &mut image_dirty)?;
 
     // compute the mean squared error (mse) between the original and the modified image
     let mse = imgproc::metrics::mse(&image_f32, &image_dirty)?;