Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedup matrix ops #7

Merged
merged 3 commits into from
Aug 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions rustynum-rs/src/num_array/linalg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,35 @@ where
);

let rows = lhs.shape()[0];
let cols = if rhs.shape().len() > 1 {
rhs.shape()[1]
let rhs_is_matrix = rhs.shape().len() > 1;
let cols = if rhs_is_matrix { rhs.shape()[1] } else { 1 };

// Transpose the right-hand side matrix if it is indeed a matrix (not a vector)
let rhs_transposed = if rhs_is_matrix {
rhs.transpose()
} else {
1
rhs.clone()
};

let mut result = if rhs.shape().len() > 1 {
let mut result = if rhs_is_matrix {
NumArray::new_with_shape(vec![T::default(); rows * cols], vec![rows, cols])
} else {
NumArray::new(vec![T::default(); rows])
};

for j in 0..cols {
let rhs_col = if rhs.shape().len() > 1 {
rhs.column_slice(j)
let rhs_col = if rhs_is_matrix {
rhs_transposed.row_slice(j) // Use row_slice since rhs is transposed
} else {
rhs.get_data().to_vec()
&rhs.get_data().to_vec()
};

for i in 0..rows {
let lhs_row = lhs.row_slice(i);

let sum = Ops::dot_product(lhs_row, &rhs_col);

if rhs.shape().len() > 1 {
if rhs_is_matrix {
result.set(&[i, j], sum);
} else {
result.set(&[i], sum);
Expand Down
58 changes: 55 additions & 3 deletions rustynum-rs/src/num_array/num_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,29 @@ where
}
}

/// Transposes a 2D matrix from row-major to column-major format.
///
/// # Returns
/// A new `NumArray` instance that is the transpose of the original matrix.
pub fn transpose(&self) -> Self {
assert!(
self.shape.len() == 2,
"Transpose is only valid for 2D matrices."
);

let rows = self.shape[0];
let cols = self.shape[1];
let mut transposed_data = vec![T::default(); rows * cols];

for i in 0..rows {
for j in 0..cols {
transposed_data[j * rows + i] = self.data[i * cols + j];
}
}

NumArray::new_with_shape(transposed_data, vec![cols, rows])
}

/// Retrieves a reference to the underlying data vector.
///
/// # Returns
Expand Down Expand Up @@ -617,9 +640,10 @@ where
/// println!("Row slice: {:?}", row_slice);
/// ```
pub fn row_slice(&self, row: usize) -> &[T] {
assert_eq!(self.shape().len(), 2, "Only 2D arrays are supported.");
let start = row * self.shape()[1];
let end = start + self.shape()[1];
let shape = self.shape();
assert_eq!(shape.len(), 2, "Only 2D arrays are supported.");
let start = row * shape[1];
let end = start + shape[1];
&self.data[start..end]
}

Expand Down Expand Up @@ -829,6 +853,34 @@ mod tests {
assert_eq!(ones_array.get_data(), &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]);
}

#[test]
fn test_matrix_transpose() {
let matrix = NumArray32::new_with_shape(
vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
vec![3, 3],
);

let transposed = matrix.transpose();

let expected_data = vec![1.0, 4.0, 7.0, 2.0, 5.0, 8.0, 3.0, 6.0, 9.0];

assert_eq!(transposed.shape(), &[3, 3]);
assert_eq!(transposed.get_data(), &expected_data);
}

#[test]
fn test_non_square_matrix_transpose() {
let matrix =
NumArray32::new_with_shape(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], vec![2, 4]);

let transposed = matrix.transpose();

let expected_data = vec![1.0, 5.0, 2.0, 6.0, 3.0, 7.0, 4.0, 8.0];

assert_eq!(transposed.shape(), &[4, 2]);
assert_eq!(transposed.get_data(), &expected_data);
}

#[test]
fn test_reshape_successfully() {
let array = NumArray32::new(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
Expand Down
32 changes: 15 additions & 17 deletions rustynum-rs/src/simd_ops/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::simd::f32x16;
use std::simd::f64x8;
use std::simd::num::SimdFloat;
use std::simd::StdFloat;

const LANES_32: usize = 16;
const LANES_64: usize = 8;
Expand All @@ -16,13 +17,13 @@ impl SimdOps<f32> for f32x16 {
fn dot_product(a: &[f32], b: &[f32]) -> f32 {
assert_eq!(a.len(), b.len());
let len = a.len();
let mut sum = f32x16::splat(0.0);
let sum = f32x16::splat(0.0);

let chunks = len / LANES_32;
for i in 0..chunks {
let a_simd = f32x16::from_slice(&a[i * LANES_32..]);
let b_simd = f32x16::from_slice(&b[i * LANES_32..]);
sum += a_simd * b_simd;
let _ = sum.mul_add(a_simd, b_simd);
}

let mut scalar_sum = sum.reduce_sum();
Expand Down Expand Up @@ -87,25 +88,22 @@ impl SimdOps<f32> for f32x16 {
impl SimdOps<f64> for f64x8 {
fn dot_product(a: &[f64], b: &[f64]) -> f64 {
assert_eq!(a.len(), b.len());
let len = a.len();
let sum = f64x8::splat(0.0);

let (a_extra, a_chunks) = a.as_rchunks();
let (b_extra, b_chunks) = b.as_rchunks();

// These are always true, but for emphasis:
assert_eq!(a_chunks.len(), b_chunks.len());
assert_eq!(a_extra.len(), b_extra.len());

let mut sums = [0.0; LANES_64];
for ((x, y), d) in std::iter::zip(a_extra, b_extra).zip(&mut sums) {
*d = x * y;
let chunks = len / LANES_64;
for i in 0..chunks {
let a_simd = f64x8::from_slice(&a[i * LANES_64..]);
let b_simd = f64x8::from_slice(&b[i * LANES_64..]);
let _ = sum.mul_add(a_simd, b_simd);
}

let mut sums = f64x8::from_array(sums);
std::iter::zip(a_chunks, b_chunks).for_each(|(x, y)| {
sums += f64x8::from_array(*x) * f64x8::from_array(*y);
});
let mut scalar_sum = sum.reduce_sum();
for i in (chunks * LANES_64)..len {
scalar_sum += a[i] * b[i];
}

sums.reduce_sum()
scalar_sum
}

fn sum(a: &[f64]) -> f64 {
Expand Down