Skip to content

Commit

Permalink
Add l1 and l2 norm is (#21)
Browse files Browse the repository at this point in the history
* Add initial support for l1 and l2 norm

* Update code for norm computation

* Expose norm in python bindings

* Add performance test for norm operation

* Add test for l2 norm

* Add broadcasting support for numarray

* Add keep_dims support.

* Fix broadcasting divisions and pass to python bindings

* Fix python test

* Add tutorial for ml prep using rustynum

* Update tutorial and add to docs
  • Loading branch information
IgorSusmelj authored Feb 2, 2025
1 parent e51fe8e commit 91965fe
Show file tree
Hide file tree
Showing 11 changed files with 1,138 additions and 68 deletions.
141 changes: 141 additions & 0 deletions bindings/python/benchmarks/test_performance_norm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import numpy as np
import pytest
import rustynum as rnp


# -----------------------------------------------------------------------------
# Helper functions
# -----------------------------------------------------------------------------
def setup_vector(dtype, size=1000):
"""
Generate a 1D vector as a Python list.
"""
a = np.random.rand(size).astype(dtype)
return a.tolist()


def setup_matrix(dtype, rows=100, cols=100):
"""
Generate a 2D matrix as a Python list.
"""
a = np.random.rand(rows, cols).astype(dtype)
return a.tolist()


# -----------------------------------------------------------------------------
# Norm functions for 1D arrays
# -----------------------------------------------------------------------------
def norm1d_rustynum(a, dtype, p):
"""
Compute the norm (L1 if p==1, L2 if p==2) of a 1D array using RustyNum.
"""
a_rnp = rnp.NumArray(a, dtype=dtype)
# Full reduction; no axis provided means compute the norm over all elements.
return a_rnp.norm(p=p)


def norm1d_numpy(a, dtype, p):
"""
Compute the norm (L1 if p==1, L2 if p==2) of a 1D array using NumPy.
"""
a_np = np.array(a, dtype=dtype)
return np.linalg.norm(a_np, ord=p)


# -----------------------------------------------------------------------------
# Norm functions for 2D arrays (full reduction)
# -----------------------------------------------------------------------------
def norm2d_rustynum(a, dtype, p):
"""
Compute the norm (L1 if p==1, L2 if p==2) of a 2D array using RustyNum.
Providing axis=None ensures a full reduction across all elements.
"""
a_rnp = rnp.NumArray(a, dtype=dtype)
return a_rnp.norm(p=p, axis=None)


def norm2d_numpy(a, dtype, p):
"""
Compute a full reduction norm by flattening the matrix and using NumPy.
"""
a_np = np.array(a, dtype=dtype)
a_flat = a_np.flatten()
return np.linalg.norm(a_flat, ord=p)


# -----------------------------------------------------------------------------
# Benchmark functions for 1D norm
# -----------------------------------------------------------------------------
@pytest.mark.parametrize(
"func,dtype,size,p",
[
(norm1d_rustynum, "float32", 1000, 1),
(norm1d_rustynum, "float64", 1000, 1),
(norm1d_numpy, "float32", 1000, 1),
(norm1d_numpy, "float64", 1000, 1),
(norm1d_rustynum, "float32", 10000, 1),
(norm1d_rustynum, "float64", 10000, 1),
(norm1d_numpy, "float32", 10000, 1),
(norm1d_numpy, "float64", 10000, 1),
(norm1d_rustynum, "float32", 1000, 2),
(norm1d_rustynum, "float64", 1000, 2),
(norm1d_numpy, "float32", 1000, 2),
(norm1d_numpy, "float64", 1000, 2),
(norm1d_rustynum, "float32", 10000, 2),
(norm1d_rustynum, "float64", 10000, 2),
(norm1d_numpy, "float32", 10000, 2),
(norm1d_numpy, "float64", 10000, 2),
],
)
def test_norm_1d(benchmark, func, dtype, size, p):
"""
Benchmark for computing the norm (L1 and L2) on a 1D array.
"""
a = setup_vector(dtype, size)
group_name = f"norm_1d_{dtype}_p{p}"

benchmark.group = group_name
benchmark.extra_info["dtype"] = dtype
benchmark.extra_info["size"] = size
benchmark.extra_info["p"] = p
benchmark(func, a, dtype, p)


# -----------------------------------------------------------------------------
# Benchmark functions for 2D norm
# -----------------------------------------------------------------------------
@pytest.mark.parametrize(
"func,dtype,size,p",
[
(norm2d_rustynum, "float32", (100, 100), 1),
(norm2d_rustynum, "float64", (100, 100), 1),
(norm2d_numpy, "float32", (100, 100), 1),
(norm2d_numpy, "float64", (100, 100), 1),
(norm2d_rustynum, "float32", (500, 500), 1),
(norm2d_rustynum, "float64", (500, 500), 1),
(norm2d_numpy, "float32", (500, 500), 1),
(norm2d_numpy, "float64", (500, 500), 1),
(norm2d_rustynum, "float32", (100, 100), 2),
(norm2d_rustynum, "float64", (100, 100), 2),
(norm2d_numpy, "float32", (100, 100), 2),
(norm2d_numpy, "float64", (100, 100), 2),
(norm2d_rustynum, "float32", (500, 500), 2),
(norm2d_rustynum, "float64", (500, 500), 2),
(norm2d_numpy, "float32", (500, 500), 2),
(norm2d_numpy, "float64", (500, 500), 2),
],
)
def test_norm_2d(benchmark, func, dtype, size, p):
"""
Benchmark for computing the norm (L1 and L2) on a 2D array.
The matrix is flattened in the NumPy version to match the full reduction behavior.
"""
rows, cols = size
a = setup_matrix(dtype, rows, cols)
group_name = f"norm_2d_{dtype}_p{p}_{rows}x{cols}"

benchmark.group = group_name
benchmark.extra_info["dtype"] = dtype
benchmark.extra_info["size"] = size
benchmark.extra_info["p"] = p
benchmark(func, a, dtype, p)
32 changes: 30 additions & 2 deletions bindings/python/rustynum/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# rustynum_py_wrapper/__init__.py
import itertools
import math
from typing import Any, List, Sequence, Tuple, Union
from typing import Any, List, Optional, Sequence, Tuple, Union

from . import _rustynum

Expand Down Expand Up @@ -489,7 +489,10 @@ def __truediv__(self, other: Union["NumArray", float]) -> "NumArray":
if self.dtype != other.dtype:
raise ValueError("dtype mismatch between arrays")
# Use div_array method from bindings for NumArray division
return NumArray(self.inner.div_array(other.inner), dtype=self.dtype)
# Create new NumArray with the original shape
result = self.inner.div_array(other.inner)
# Important: Create NumArray with the original shape preserved
return NumArray(result, dtype=self.dtype)
elif isinstance(other, (int, float)):
# Use div_scalar method from bindings for scalar division
return NumArray(self.inner.div_scalar(other), dtype=self.dtype)
Expand Down Expand Up @@ -616,6 +619,20 @@ def slice(
result = self.inner.slice(axis, start, stop)
return NumArray(result, dtype=self.dtype)

def norm(
self, p: int = 2, axis: Optional[List[int]] = None, keepdims: bool = False
) -> "NumArray":
if self.dtype == "float32":
return NumArray(
_rustynum.norm_f32(self.inner, p, axis, keepdims), dtype="float32"
)
elif self.dtype == "float64":
return NumArray(
_rustynum.norm_f64(self.inner, p, axis, keepdims), dtype="float64"
)
else:
raise ValueError(f"Unsupported dtype for norm: {self.dtype}")


def zeros(shape: List[int], dtype: str = "float32") -> "NumArray":
"""
Expand Down Expand Up @@ -792,3 +809,14 @@ def concatenate(arrays: List["NumArray"], axis: int = 0) -> "NumArray":
)
else:
raise ValueError("Unsupported dtype for concatenation")


def norm(
a: "NumArray", p: int = 2, axis: Optional[List[int]] = None, keepdims: bool = False
) -> "NumArray":
if a.dtype == "float32":
return NumArray(_rustynum.norm_f32(a.inner, p, axis, keepdims), dtype="float32")
elif a.dtype == "float64":
return NumArray(_rustynum.norm_f64(a.inner, p, axis, keepdims), dtype="float64")
else:
raise ValueError(f"Unsupported dtype for norm: {a.dtype}")
94 changes: 85 additions & 9 deletions bindings/python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ impl PyNumArrayF32 {
})
}

fn div_array(&self, other: PyRef<PyNumArrayF32>) -> PyResult<Py<PyAny>> {
fn div_array(&self, other: PyRef<PyNumArrayF32>) -> PyResult<PyNumArrayF32> {
Python::with_gil(|py| {
let result = &self.inner / &other.inner; // Leveraging Rust's Add implementation
Ok(result.get_data().to_object(py)) // Convert Vec<f64> to Python list
let result = &self.inner / &other.inner;
Ok(PyNumArrayF32 { inner: result })
})
}

Expand All @@ -121,6 +121,24 @@ impl PyNumArrayF32 {
})
}

fn norm(
&self,
p: u32,
axis: Option<&PyList>,
keepdims: Option<bool>,
) -> PyResult<PyNumArrayF32> {
Python::with_gil(|py| {
let result = match axis {
Some(axis_list) => {
let axis_vec: Vec<usize> = axis_list.extract()?;
self.inner.norm(p, Some(&axis_vec), keepdims)
}
None => self.inner.norm(p, None, keepdims),
};
Ok(PyNumArrayF32 { inner: result })
})
}

fn tolist(&self, py: Python) -> PyObject {
let list = PyList::new(py, self.inner.get_data());
list.into()
Expand Down Expand Up @@ -270,22 +288,40 @@ impl PyNumArrayF64 {
})
}

fn div_array(&self, other: PyRef<PyNumArrayF64>) -> PyResult<Py<PyAny>> {
fn div_array(&self, other: PyRef<PyNumArrayF64>) -> PyResult<PyNumArrayF64> {
Python::with_gil(|py| {
let result = &self.inner / &other.inner; // Leveraging Rust's Add implementation
Ok(result.get_data().to_object(py)) // Convert Vec<f64> to Python list
let result = &self.inner / &other.inner;
Ok(PyNumArrayF64 { inner: result })
})
}

fn mean_axis(&self, axis: Option<&PyList>) -> PyResult<PyNumArrayF64> {
fn mean_axis(&self, axis: Option<&PyList>) -> PyResult<Py<PyAny>> {
Python::with_gil(|py| {
let result = match axis {
Some(axis_list) => {
let axis_vec: Vec<usize> = axis_list.extract()?; // Convert PyList to Vec<usize>
self.inner.mean_axis(Some(&axis_vec)) // Now correctly passing a slice wrapped in Some
let axis_vec: Vec<usize> = axis_list.extract()?;
self.inner.mean_axis(Some(&axis_vec))
}
None => self.inner.mean_axis(None),
};
Ok(result.get_data().to_object(py))
})
}

fn norm(
&self,
p: u32,
axis: Option<&PyList>,
keepdims: Option<bool>,
) -> PyResult<PyNumArrayF64> {
Python::with_gil(|py| {
let result = match axis {
Some(axis_list) => {
let axis_vec: Vec<usize> = axis_list.extract()?;
self.inner.norm(p, Some(&axis_vec), keepdims)
}
None => self.inner.norm(p, None, keepdims),
};
Ok(PyNumArrayF64 { inner: result })
})
}
Expand Down Expand Up @@ -984,6 +1020,44 @@ fn concatenate_f64(arrays: Vec<PyNumArrayF64>, axis: usize) -> PyResult<PyNumArr
Ok(PyNumArrayF64 { inner: result })
}

#[pyfunction]
fn norm_f32(
a: &PyNumArrayF32,
p: u32,
axis: Option<&PyList>,
keepdims: Option<bool>,
) -> PyResult<PyNumArrayF32> {
Python::with_gil(|py| {
let result = match axis {
Some(axis_list) => {
let axis_vec: Vec<usize> = axis_list.extract()?;
a.inner.norm(p, Some(&axis_vec), keepdims)
}
None => a.inner.norm(p, None, keepdims),
};
Ok(PyNumArrayF32 { inner: result })
})
}

#[pyfunction]
fn norm_f64(
a: &PyNumArrayF64,
p: u32,
axis: Option<&PyList>,
keepdims: Option<bool>,
) -> PyResult<PyNumArrayF64> {
Python::with_gil(|py| {
let result = match axis {
Some(axis_list) => {
let axis_vec: Vec<usize> = axis_list.extract()?;
a.inner.norm(p, Some(&axis_vec), keepdims)
}
None => a.inner.norm(p, None, keepdims),
};
Ok(PyNumArrayF64 { inner: result })
})
}

#[pymodule]
fn _rustynum(py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PyNumArrayF32>()?;
Expand Down Expand Up @@ -1019,6 +1093,8 @@ fn _rustynum(py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(log_f64, m)?)?;
m.add_function(wrap_pyfunction!(sigmoid_f64, m)?)?;
m.add_function(wrap_pyfunction!(concatenate_f64, m)?)?;
m.add_function(wrap_pyfunction!(norm_f32, m)?)?;
m.add_function(wrap_pyfunction!(norm_f64, m)?)?;

Ok(())
}
Loading

0 comments on commit 91965fe

Please sign in to comment.