Skip to content

Commit

Permalink
runtime: use a more efficient swizzling implementation without array_…
Browse files Browse the repository at this point in the history
…chunks_mut
  • Loading branch information
chyyran committed Aug 21, 2024
1 parent f8c0555 commit 5560c1e
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 100 deletions.
87 changes: 0 additions & 87 deletions librashader-runtime/src/array_chunks_mut.rs

This file was deleted.

92 changes: 82 additions & 10 deletions librashader-runtime/src/image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ pub use image::ImageError;
use librashader_common::Size;
use std::marker::PhantomData;

use crate::array_chunks_mut::ArrayChunksMut;
use std::path::Path;

/// An uncompressed raw image ready to upload to GPU buffers.
Expand Down Expand Up @@ -43,20 +42,15 @@ impl PixelFormat for RGBA8 {

impl PixelFormat for BGRA8 {
fn convert(pixels: &mut Vec<u8>) {
assert!(pixels.len() % 4 == 0);
for [r, _g, b, _a] in ArrayChunksMut::new(pixels) {
std::mem::swap(b, r)
}
const BGRA_SWIZZLE: &[usize; 32] = &generate_swizzle([2, 1, 0, 3]);
swizzle_pixels(pixels, BGRA_SWIZZLE);
}
}

impl PixelFormat for ARGB8 {
fn convert(pixels: &mut Vec<u8>) {
assert!(pixels.len() % 4 == 0);
for [r, _g, b, a] in ArrayChunksMut::new(pixels) {
std::mem::swap(r, a); // abgr
std::mem::swap(b, r); // argb
}
const ARGB_SWIZZLE: &[usize; 32] = &generate_swizzle([3, 0, 1, 2]);
swizzle_pixels(pixels, ARGB_SWIZZLE);
}
}

Expand Down Expand Up @@ -97,3 +91,81 @@ impl<P: PixelFormat> Image<P> {
})
}
}

fn swizzle_pixels(pixels: &mut Vec<u8>, swizzle: &'static [usize; 32]) {
assert!(pixels.len() % 4 == 0);
let mut chunks = pixels.chunks_exact_mut(32);

// This should vectorize faster than a naive mem swap
for chunk in &mut chunks {
let tmp = swizzle.map(|i| chunk[i]);
chunk.copy_from_slice(&tmp[..])
}

let remainder = chunks.into_remainder();
for chunk in remainder.chunks_exact_mut(4) {
let argb = [chunk[3], chunk[0], chunk[1], chunk[2]];
chunk.copy_from_slice(&argb[..])
}
}

const fn generate_swizzle<const LEN: usize>(swizzle: [usize; 4]) -> [usize; LEN] {
assert!(LEN % 4 == 0, "length of swizzle must be divisible by 4");
let mut out: [usize; LEN] = [0; LEN];

let mut index = 0;
while index < LEN {
let chunk = [index, index + 1, index + 2, index + 3];
out[index + 0] = chunk[swizzle[0]];
out[index + 1] = chunk[swizzle[1]];
out[index + 2] = chunk[swizzle[2]];
out[index + 3] = chunk[swizzle[3]];

index += 4;
}

out
}

#[cfg(test)]
mod test {
use crate::image::generate_swizzle;

#[test]
pub fn generate_normal_swizzle() {
let swizzle = generate_swizzle::<32>([0, 1, 2, 3]);
assert_eq!(
swizzle,
#[rustfmt::skip]
[
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31
]
)
}

#[test]
pub fn generate_argb_swizzle() {
let swizzle = generate_swizzle::<32>([3, 0, 1, 2]);
assert_eq!(
swizzle,
#[rustfmt::skip]
[
3, 0, 1, 2,
7, 4, 5, 6,
11, 8, 9, 10,
15, 12, 13, 14,
19, 16, 17, 18,
23, 20, 21, 22,
27, 24, 25, 26,
31, 28, 29, 30
]
)
}
}
3 changes: 0 additions & 3 deletions librashader-runtime/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,3 @@ pub mod render_target;

/// Helpers for handling framebuffers.
pub mod framebuffer;

/// array_chunks_mut polyfill
mod array_chunks_mut;

0 comments on commit 5560c1e

Please sign in to comment.