Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor hflip, vflip functions to allow preallocation #117

Merged
merged 6 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions crates/kornia-imgproc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,7 @@ harness = false
[[bench]]
name = "bench_warp"
harness = false

[[bench]]
name = "bench_flip"
harness = false
134 changes: 134 additions & 0 deletions crates/kornia-imgproc/benches/bench_flip.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};

use kornia::image::Image;
use kornia::imgproc::flip;

use rayon::{
iter::{IndexedParallelIterator, ParallelIterator},
slice::{ParallelSlice, ParallelSliceMut},
};

fn par_par_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * 3)
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
.for_each(|(dst_row, src_row)| {
dst_row
.par_chunks_exact_mut(3)
.zip_eq(src_row.par_chunks_exact(3).rev())
.for_each(|(dst_pixel, src_pixel)| {
dst_pixel.copy_from_slice(src_pixel);
})
});
}

fn par_loop_loop(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * 3)
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
.for_each(|(dst_row, src_row)| {
let n = src.cols();
for i in 0..n / 2 {
for c in 0..3 {
let (idx_i, idx_j) = (i * 3 + c, (n - 1 - i) * 3 + c);
dst_row[idx_i] = src_row[idx_j];
dst_row[idx_j] = src_row[idx_i];
}
}
});
}

fn par_loop_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * 3)
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
.for_each(|(dst_row, src_row)| {
let n = src.cols();
for i in 0..n / 2 {
let (idx_i, idx_j) = (i * 3, (n - 1 - i) * 3);
dst_row[idx_i..idx_i + 3].copy_from_slice(&src_row[idx_j..idx_j + 3]);
dst_row[idx_j..idx_j + 3].copy_from_slice(&src_row[idx_i..idx_i + 3]);
}
});
}

fn par_seq_slicecopy(src: &Image<f32, 3>, dst: &mut Image<f32, 3>) {
dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * 3)
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * 3))
.for_each(|(dst_row, src_row)| {
dst_row
.chunks_exact_mut(3)
.zip(src_row.chunks_exact(3).rev())
.for_each(|(dst_pixel, src_pixel)| {
dst_pixel.copy_from_slice(src_pixel);
})
});
}

fn bench_flip(c: &mut Criterion) {
let mut group = c.benchmark_group("Flip");

for (width, height) in [(256, 224), (512, 448), (1024, 896)].iter() {
group.throughput(criterion::Throughput::Elements((*width * *height) as u64));

let parameter_string = format!("{}x{}", width, height);

// input image
let image_size = [*width, *height].into();
let image = Image::<u8, 3>::new(image_size, vec![0u8; width * height * 3]).unwrap();
let image_f32 = image.clone().cast::<f32>().unwrap();

// output image
let output = Image::<f32, 3>::from_size_val(image_size, 0.0).unwrap();

group.bench_with_input(
BenchmarkId::new("par_par_slicecopy", &parameter_string),
&(&image_f32, &output),
|b, i| {
let (src, mut dst) = (i.0.clone(), i.1.clone());
b.iter(|| black_box(par_par_slicecopy(&src, &mut dst)))
},
);

group.bench_with_input(
BenchmarkId::new("par_loop_loop", &parameter_string),
&(&image_f32, &output),
|b, i| {
let (src, mut dst) = (i.0.clone(), i.1.clone());
b.iter(|| black_box(par_loop_loop(&src, &mut dst)))
},
);

group.bench_with_input(
BenchmarkId::new("par_loop_slicecopy", &parameter_string),
&(&image_f32, &output),
|b, i| {
let (src, mut dst) = (i.0.clone(), i.1.clone());
b.iter(|| black_box(par_loop_slicecopy(&src, &mut dst)))
},
);

group.bench_with_input(
BenchmarkId::new("par_seq_slicecopy", &parameter_string),
&(&image_f32, &output),
|b, i| {
let (src, mut dst) = (i.0.clone(), i.1.clone());
b.iter(|| black_box(par_seq_slicecopy(&src, &mut dst)))
},
);

group.bench_with_input(
BenchmarkId::new("kornia", &parameter_string),
&(&image_f32, &output),
|b, i| {
let (src, mut dst) = (i.0, i.1.clone());
b.iter(|| black_box(flip::horizontal_flip(src, &mut dst)))
},
);
}
group.finish();
}

criterion_group!(benches, bench_flip);
criterion_main!(benches);
147 changes: 92 additions & 55 deletions crates/kornia-imgproc/src/flip.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
use kornia_core::SafeTensorType;
use kornia_image::{Image, ImageError};
use rayon::{iter::ParallelIterator, slice::ParallelSliceMut};
use rayon::{
iter::{IndexedParallelIterator, ParallelIterator},
slice::{ParallelSlice, ParallelSliceMut},
};

/// Flip the input image horizontally.
///
/// # Arguments
///
/// * `src` - The input image with shape (H, W, C).
/// * `dst` - The output image with shape (H, W, C).
///
/// # Returns
/// Precondition: the input and output images must have the same size.
///
/// The flipped image.
/// # Errors
///
/// Returns an error if the sizes of `src` and `dst` do not match.
///
/// # Example
///
Expand All @@ -27,43 +33,53 @@ use rayon::{iter::ParallelIterator, slice::ParallelSliceMut};
/// )
/// .unwrap();
///
/// let flipped: Image<f32, 3> = horizontal_flip(&image).unwrap();
/// let mut flipped = Image::<f32, 3>::from_size_val(image.size(), 0.0).unwrap();
///
/// assert_eq!(flipped.size().width, 2);
/// assert_eq!(flipped.size().height, 3);
/// horizontal_flip(&image, &mut flipped).unwrap();
/// ```
pub fn horizontal_flip<T, const C: usize>(src: &Image<T, C>) -> Result<Image<T, C>, ImageError>
pub fn horizontal_flip<T, const C: usize>(
src: &Image<T, C>,
dst: &mut Image<T, C>,
) -> Result<(), ImageError>
where
T: SafeTensorType,
{
let mut dst = src.clone();
if src.size() != dst.size() {
return Err(ImageError::InvalidImageSize(
src.cols(),
src.rows(),
dst.cols(),
dst.rows(),
));
}

dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * C)
.for_each(|row| {
let mut i = 0;
let mut j = src.cols() - 1;
while i < j {
for c in 0..C {
row.swap(i * C + c, j * C + c);
}
i += 1;
j -= 1;
}
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * C))
emilmgeorge marked this conversation as resolved.
Show resolved Hide resolved
.for_each(|(dst_row, src_row)| {
dst_row
.chunks_exact_mut(C)
.zip(src_row.chunks_exact(C).rev())
.for_each(|(dst_pixel, src_pixel)| {
dst_pixel.copy_from_slice(src_pixel);
})
});

Ok(dst)
Ok(())
}

/// Flip the input image vertically.
///
/// # Arguments
///
/// * `src` - The input image with shape (H, W, C).
/// * `dst` - The output image with shape (H, W, C).
///
/// Precondition: the input and output images must have the same size.
///
/// # Returns
/// # Errors
///
/// The flipped image.
/// Returns an error if the sizes of `src` and `dst` do not match.
///
/// # Example
///
Expand All @@ -80,31 +96,40 @@ where
/// )
/// .unwrap();
///
/// let flipped: Image<f32, 3> = vertical_flip(&image).unwrap();
/// let mut flipped = Image::<f32, 3>::from_size_val(image.size(), 0.0).unwrap();
///
/// vertical_flip(&image, &mut flipped).unwrap();
///
/// assert_eq!(flipped.size().width, 2);
/// assert_eq!(flipped.size().height, 3);
/// ```
pub fn vertical_flip<T, const C: usize>(src: &Image<T, C>) -> Result<Image<T, C>, ImageError>
pub fn vertical_flip<T, const C: usize>(
src: &Image<T, C>,
dst: &mut Image<T, C>,
) -> Result<(), ImageError>
where
T: SafeTensorType,
{
let mut dst = src.clone();

// TODO: improve this implementation
for i in 0..src.cols() {
let mut j = src.rows() - 1;
for k in 0..src.rows() / 2 {
for c in 0..C {
let idx_i = i * C + c + k * src.cols() * C;
let idx_j = i * C + c + j * src.cols() * C;
dst.as_slice_mut().swap(idx_i, idx_j);
}
j -= 1;
}
if src.size() != dst.size() {
return Err(ImageError::InvalidImageSize(
src.cols(),
src.rows(),
dst.cols(),
dst.rows(),
));
}

Ok(dst)
dst.as_slice_mut()
.par_chunks_exact_mut(src.cols() * C)
.zip_eq(src.as_slice().par_chunks_exact(src.cols() * C).rev())
.for_each(|(dst_row, src_row)| {
dst_row
.chunks_exact_mut(C)
.zip(src_row.chunks_exact(C))
.for_each(|(dst_pixel, src_pixel)| {
dst_pixel.copy_from_slice(src_pixel);
})
});

Ok(())
}

#[cfg(test)]
Expand All @@ -113,30 +138,42 @@ mod tests {

#[test]
fn test_hflip() -> Result<(), ImageError> {
let image = Image::<_, 1>::new(
ImageSize {
width: 2,
height: 3,
},
vec![0u8, 1, 2, 3, 4, 5],
let image_size = ImageSize {
width: 2,
height: 3,
};
let image = Image::<_, 3>::new(
image_size,
vec![
0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
],
)?;
let data_expected = vec![1u8, 0, 3, 2, 5, 4];
let flipped = super::horizontal_flip(&image)?;
let data_expected = vec![
3u8, 4, 5, 0, 1, 2, 9, 10, 11, 6, 7, 8, 15, 16, 17, 12, 13, 14,
];
let mut flipped = Image::<_, 3>::from_size_val(image_size, 0u8)?;
super::horizontal_flip(&image, &mut flipped)?;
assert_eq!(flipped.as_slice(), &data_expected);
Ok(())
}

#[test]
fn test_vflip() -> Result<(), ImageError> {
let image = Image::<_, 1>::new(
ImageSize {
width: 2,
height: 3,
},
vec![0u8, 1, 2, 3, 4, 5],
let image_size = ImageSize {
width: 2,
height: 3,
};
let image = Image::<_, 3>::new(
image_size,
vec![
0u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
],
)?;
let data_expected = vec![4u8, 5, 2, 3, 0, 1];
let flipped = super::vertical_flip(&image)?;
let data_expected = vec![
12u8, 13, 14, 15, 16, 17, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
];
let mut flipped = Image::<_, 3>::from_size_val(image_size, 0u8)?;
super::vertical_flip(&image, &mut flipped)?;
assert_eq!(flipped.as_slice(), &data_expected);
Ok(())
}
Expand Down
3 changes: 2 additions & 1 deletion examples/metrics/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
ops::cast_and_scale(&image, &mut image_f32, 1.0 / 255.0)?;

// modify the image to see the changes
let image_dirty = imgproc::flip::horizontal_flip(&image_f32)?;
let mut image_dirty = Image::<f32, 3>::from_size_val(image.size(), 0.0)?;
imgproc::flip::horizontal_flip(&image_f32, &mut image_dirty)?;

// compute the mean squared error (mse) between the original and the modified image
let mse = imgproc::metrics::mse(&image_f32, &image_dirty)?;
Expand Down
Loading