Add l1 and l2 norm is (#21)

* Add initial support for l1 and l2 norm * Update code for norm computation * Expose norm in python bindings * Add performance test for norm operation * Add test for l2 norm * Add broadcasting support for numarray * Add keep_dims support. * Fix broadcasting divisions and pass to python bindings * Fix python test * Add tutorial for ml prep using rustynum * Update tutorial and add to docs
IgorSusmelj · Feb 2, 2025 · 91965fe · 91965fe
1 parent e51fe8e
commit 91965fe
Show file tree

Hide file tree

Showing 11 changed files with 1,138 additions and 68 deletions.
diff --git a/bindings/python/benchmarks/test_performance_norm.py b/bindings/python/benchmarks/test_performance_norm.py
@@ -0,0 +1,141 @@
+import numpy as np
+import pytest
+import rustynum as rnp
+
+
+# -----------------------------------------------------------------------------
+# Helper functions
+# -----------------------------------------------------------------------------
+def setup_vector(dtype, size=1000):
+    """
+    Generate a 1D vector as a Python list.
+    """
+    a = np.random.rand(size).astype(dtype)
+    return a.tolist()
+
+
+def setup_matrix(dtype, rows=100, cols=100):
+    """
+    Generate a 2D matrix as a Python list.
+    """
+    a = np.random.rand(rows, cols).astype(dtype)
+    return a.tolist()
+
+
+# -----------------------------------------------------------------------------
+# Norm functions for 1D arrays
+# -----------------------------------------------------------------------------
+def norm1d_rustynum(a, dtype, p):
+    """
+    Compute the norm (L1 if p==1, L2 if p==2) of a 1D array using RustyNum.
+    """
+    a_rnp = rnp.NumArray(a, dtype=dtype)
+    # Full reduction; no axis provided means compute the norm over all elements.
+    return a_rnp.norm(p=p)
+
+
+def norm1d_numpy(a, dtype, p):
+    """
+    Compute the norm (L1 if p==1, L2 if p==2) of a 1D array using NumPy.
+    """
+    a_np = np.array(a, dtype=dtype)
+    return np.linalg.norm(a_np, ord=p)
+
+
+# -----------------------------------------------------------------------------
+# Norm functions for 2D arrays (full reduction)
+# -----------------------------------------------------------------------------
+def norm2d_rustynum(a, dtype, p):
+    """
+    Compute the norm (L1 if p==1, L2 if p==2) of a 2D array using RustyNum.
+    Providing axis=None ensures a full reduction across all elements.
+    """
+    a_rnp = rnp.NumArray(a, dtype=dtype)
+    return a_rnp.norm(p=p, axis=None)
+
+
+def norm2d_numpy(a, dtype, p):
+    """
+    Compute a full reduction norm by flattening the matrix and using NumPy.
+    """
+    a_np = np.array(a, dtype=dtype)
+    a_flat = a_np.flatten()
+    return np.linalg.norm(a_flat, ord=p)
+
+
+# -----------------------------------------------------------------------------
+# Benchmark functions for 1D norm
+# -----------------------------------------------------------------------------
+@pytest.mark.parametrize(
+    "func,dtype,size,p",
+    [
+        (norm1d_rustynum, "float32", 1000, 1),
+        (norm1d_rustynum, "float64", 1000, 1),
+        (norm1d_numpy, "float32", 1000, 1),
+        (norm1d_numpy, "float64", 1000, 1),
+        (norm1d_rustynum, "float32", 10000, 1),
+        (norm1d_rustynum, "float64", 10000, 1),
+        (norm1d_numpy, "float32", 10000, 1),
+        (norm1d_numpy, "float64", 10000, 1),
+        (norm1d_rustynum, "float32", 1000, 2),
+        (norm1d_rustynum, "float64", 1000, 2),
+        (norm1d_numpy, "float32", 1000, 2),
+        (norm1d_numpy, "float64", 1000, 2),
+        (norm1d_rustynum, "float32", 10000, 2),
+        (norm1d_rustynum, "float64", 10000, 2),
+        (norm1d_numpy, "float32", 10000, 2),
+        (norm1d_numpy, "float64", 10000, 2),
+    ],
+)
+def test_norm_1d(benchmark, func, dtype, size, p):
+    """
+    Benchmark for computing the norm (L1 and L2) on a 1D array.
+    """
+    a = setup_vector(dtype, size)
+    group_name = f"norm_1d_{dtype}_p{p}"
+
+    benchmark.group = group_name
+    benchmark.extra_info["dtype"] = dtype
+    benchmark.extra_info["size"] = size
+    benchmark.extra_info["p"] = p
+    benchmark(func, a, dtype, p)
+
+
+# -----------------------------------------------------------------------------
+# Benchmark functions for 2D norm
+# -----------------------------------------------------------------------------
+@pytest.mark.parametrize(
+    "func,dtype,size,p",
+    [
+        (norm2d_rustynum, "float32", (100, 100), 1),
+        (norm2d_rustynum, "float64", (100, 100), 1),
+        (norm2d_numpy, "float32", (100, 100), 1),
+        (norm2d_numpy, "float64", (100, 100), 1),
+        (norm2d_rustynum, "float32", (500, 500), 1),
+        (norm2d_rustynum, "float64", (500, 500), 1),
+        (norm2d_numpy, "float32", (500, 500), 1),
+        (norm2d_numpy, "float64", (500, 500), 1),
+        (norm2d_rustynum, "float32", (100, 100), 2),
+        (norm2d_rustynum, "float64", (100, 100), 2),
+        (norm2d_numpy, "float32", (100, 100), 2),
+        (norm2d_numpy, "float64", (100, 100), 2),
+        (norm2d_rustynum, "float32", (500, 500), 2),
+        (norm2d_rustynum, "float64", (500, 500), 2),
+        (norm2d_numpy, "float32", (500, 500), 2),
+        (norm2d_numpy, "float64", (500, 500), 2),
+    ],
+)
+def test_norm_2d(benchmark, func, dtype, size, p):
+    """
+    Benchmark for computing the norm (L1 and L2) on a 2D array.
+    The matrix is flattened in the NumPy version to match the full reduction behavior.
+    """
+    rows, cols = size
+    a = setup_matrix(dtype, rows, cols)
+    group_name = f"norm_2d_{dtype}_p{p}_{rows}x{cols}"
+
+    benchmark.group = group_name
+    benchmark.extra_info["dtype"] = dtype
+    benchmark.extra_info["size"] = size
+    benchmark.extra_info["p"] = p
+    benchmark(func, a, dtype, p)
diff --git a/bindings/python/rustynum/__init__.py b/bindings/python/rustynum/__init__.py
@@ -1,7 +1,7 @@
 # rustynum_py_wrapper/__init__.py
 import itertools
 import math
-from typing import Any, List, Sequence, Tuple, Union
+from typing import Any, List, Optional, Sequence, Tuple, Union
 
 from . import _rustynum
 
@@ -489,7 +489,10 @@ def __truediv__(self, other: Union["NumArray", float]) -> "NumArray":
             if self.dtype != other.dtype:
                 raise ValueError("dtype mismatch between arrays")
             # Use div_array method from bindings for NumArray division
-            return NumArray(self.inner.div_array(other.inner), dtype=self.dtype)
+            # Create new NumArray with the original shape
+            result = self.inner.div_array(other.inner)
+            # Important: Create NumArray with the original shape preserved
+            return NumArray(result, dtype=self.dtype)
         elif isinstance(other, (int, float)):
             # Use div_scalar method from bindings for scalar division
             return NumArray(self.inner.div_scalar(other), dtype=self.dtype)
@@ -616,6 +619,20 @@ def slice(
         result = self.inner.slice(axis, start, stop)
         return NumArray(result, dtype=self.dtype)
 
+    def norm(
+        self, p: int = 2, axis: Optional[List[int]] = None, keepdims: bool = False
+    ) -> "NumArray":
+        if self.dtype == "float32":
+            return NumArray(
+                _rustynum.norm_f32(self.inner, p, axis, keepdims), dtype="float32"
+            )
+        elif self.dtype == "float64":
+            return NumArray(
+                _rustynum.norm_f64(self.inner, p, axis, keepdims), dtype="float64"
+            )
+        else:
+            raise ValueError(f"Unsupported dtype for norm: {self.dtype}")
+
 
 def zeros(shape: List[int], dtype: str = "float32") -> "NumArray":
     """
@@ -792,3 +809,14 @@ def concatenate(arrays: List["NumArray"], axis: int = 0) -> "NumArray":
         )
     else:
         raise ValueError("Unsupported dtype for concatenation")
+
+
+def norm(
+    a: "NumArray", p: int = 2, axis: Optional[List[int]] = None, keepdims: bool = False
+) -> "NumArray":
+    if a.dtype == "float32":
+        return NumArray(_rustynum.norm_f32(a.inner, p, axis, keepdims), dtype="float32")
+    elif a.dtype == "float64":
+        return NumArray(_rustynum.norm_f64(a.inner, p, axis, keepdims), dtype="float64")
+    else:
+        raise ValueError(f"Unsupported dtype for norm: {a.dtype}")
diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs
@@ -101,10 +101,10 @@ impl PyNumArrayF32 {
         })
     }
 
-    fn div_array(&self, other: PyRef<PyNumArrayF32>) -> PyResult<Py<PyAny>> {
+    fn div_array(&self, other: PyRef<PyNumArrayF32>) -> PyResult<PyNumArrayF32> {
         Python::with_gil(|py| {
-            let result = &self.inner / &other.inner; // Leveraging Rust's Add implementation
-            Ok(result.get_data().to_object(py)) // Convert Vec<f64> to Python list
+            let result = &self.inner / &other.inner;
+            Ok(PyNumArrayF32 { inner: result })
         })
     }
 
@@ -121,6 +121,24 @@ impl PyNumArrayF32 {
         })
     }
 
+    fn norm(
+        &self,
+        p: u32,
+        axis: Option<&PyList>,
+        keepdims: Option<bool>,
+    ) -> PyResult<PyNumArrayF32> {
+        Python::with_gil(|py| {
+            let result = match axis {
+                Some(axis_list) => {
+                    let axis_vec: Vec<usize> = axis_list.extract()?;
+                    self.inner.norm(p, Some(&axis_vec), keepdims)
+                }
+                None => self.inner.norm(p, None, keepdims),
+            };
+            Ok(PyNumArrayF32 { inner: result })
+        })
+    }
+
     fn tolist(&self, py: Python) -> PyObject {
         let list = PyList::new(py, self.inner.get_data());
         list.into()
@@ -270,22 +288,40 @@ impl PyNumArrayF64 {
         })
     }
 
-    fn div_array(&self, other: PyRef<PyNumArrayF64>) -> PyResult<Py<PyAny>> {
+    fn div_array(&self, other: PyRef<PyNumArrayF64>) -> PyResult<PyNumArrayF64> {
         Python::with_gil(|py| {
-            let result = &self.inner / &other.inner; // Leveraging Rust's Add implementation
-            Ok(result.get_data().to_object(py)) // Convert Vec<f64> to Python list
+            let result = &self.inner / &other.inner;
+            Ok(PyNumArrayF64 { inner: result })
         })
     }
 
-    fn mean_axis(&self, axis: Option<&PyList>) -> PyResult<PyNumArrayF64> {
+    fn mean_axis(&self, axis: Option<&PyList>) -> PyResult<Py<PyAny>> {
         Python::with_gil(|py| {
             let result = match axis {
                 Some(axis_list) => {
-                    let axis_vec: Vec<usize> = axis_list.extract()?; // Convert PyList to Vec<usize>
-                    self.inner.mean_axis(Some(&axis_vec)) // Now correctly passing a slice wrapped in Some
+                    let axis_vec: Vec<usize> = axis_list.extract()?;
+                    self.inner.mean_axis(Some(&axis_vec))
                 }
                 None => self.inner.mean_axis(None),
             };
+            Ok(result.get_data().to_object(py))
+        })
+    }
+
+    fn norm(
+        &self,
+        p: u32,
+        axis: Option<&PyList>,
+        keepdims: Option<bool>,
+    ) -> PyResult<PyNumArrayF64> {
+        Python::with_gil(|py| {
+            let result = match axis {
+                Some(axis_list) => {
+                    let axis_vec: Vec<usize> = axis_list.extract()?;
+                    self.inner.norm(p, Some(&axis_vec), keepdims)
+                }
+                None => self.inner.norm(p, None, keepdims),
+            };
             Ok(PyNumArrayF64 { inner: result })
         })
     }
@@ -984,6 +1020,44 @@ fn concatenate_f64(arrays: Vec<PyNumArrayF64>, axis: usize) -> PyResult<PyNumArr
     Ok(PyNumArrayF64 { inner: result })
 }
 
+#[pyfunction]
+fn norm_f32(
+    a: &PyNumArrayF32,
+    p: u32,
+    axis: Option<&PyList>,
+    keepdims: Option<bool>,
+) -> PyResult<PyNumArrayF32> {
+    Python::with_gil(|py| {
+        let result = match axis {
+            Some(axis_list) => {
+                let axis_vec: Vec<usize> = axis_list.extract()?;
+                a.inner.norm(p, Some(&axis_vec), keepdims)
+            }
+            None => a.inner.norm(p, None, keepdims),
+        };
+        Ok(PyNumArrayF32 { inner: result })
+    })
+}
+
+#[pyfunction]
+fn norm_f64(
+    a: &PyNumArrayF64,
+    p: u32,
+    axis: Option<&PyList>,
+    keepdims: Option<bool>,
+) -> PyResult<PyNumArrayF64> {
+    Python::with_gil(|py| {
+        let result = match axis {
+            Some(axis_list) => {
+                let axis_vec: Vec<usize> = axis_list.extract()?;
+                a.inner.norm(p, Some(&axis_vec), keepdims)
+            }
+            None => a.inner.norm(p, None, keepdims),
+        };
+        Ok(PyNumArrayF64 { inner: result })
+    })
+}
+
 #[pymodule]
 fn _rustynum(py: Python, m: &PyModule) -> PyResult<()> {
     m.add_class::<PyNumArrayF32>()?;
@@ -1019,6 +1093,8 @@ fn _rustynum(py: Python, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(log_f64, m)?)?;
     m.add_function(wrap_pyfunction!(sigmoid_f64, m)?)?;
     m.add_function(wrap_pyfunction!(concatenate_f64, m)?)?;
+    m.add_function(wrap_pyfunction!(norm_f32, m)?)?;
+    m.add_function(wrap_pyfunction!(norm_f64, m)?)?;
 
     Ok(())
 }