Use np.float32 in more python doc examples

Includes all examples in the python readme, as well as examples for symbol codes.
bamler-lab · Aug 31, 2024 · 06cd7a2 · 06cd7a2
1 parent 8520814
commit 06cd7a2
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 24 deletions.
diff --git a/README-python.md b/README-python.md
@@ -105,8 +105,8 @@ import numpy as np
 
 # Same message as above, but a complex entropy model consisting of two parts:
 message = np.array([6,   10,   -4,   2,   5,    2, 1, 0, 2], dtype=np.int32)
-means   = np.array([2.3,  6.1, -8.5, 4.1, 1.3], dtype=np.float64)
-stds    = np.array([6.2,  5.3,  3.8, 3.2, 4.7], dtype=np.float64)
+means   = np.array([2.3,  6.1, -8.5, 4.1, 1.3], dtype=np.float32)
+stds    = np.array([6.2,  5.3,  3.8, 3.2, 4.7], dtype=np.float32)
 entropy_model1 = constriction.stream.model.QuantizedGaussian(-50, 50)
 entropy_model2 = constriction.stream.model.Categorical(
     np.array([0.2, 0.5, 0.3], dtype=np.float32), # Probabilities of the symbols 0,1,2.

diff --git a/src/pybindings/mod.rs b/src/pybindings/mod.rs
@@ -116,8 +116,8 @@ use crate::NanError;
 ///
 /// # Same message as above, but a complex entropy model consisting of two parts:
 /// message = np.array([6,   10,   -4,   2,   5,    2, 1, 0, 2], dtype=np.int32)
-/// means   = np.array([2.3,  6.1, -8.5, 4.1, 1.3], dtype=np.float64)
-/// stds    = np.array([6.2,  5.3,  3.8, 3.2, 4.7], dtype=np.float64)
+/// means   = np.array([2.3,  6.1, -8.5, 4.1, 1.3], dtype=np.float32)
+/// stds    = np.array([6.2,  5.3,  3.8, 3.2, 4.7], dtype=np.float32)
 /// entropy_model1 = constriction.stream.model.QuantizedGaussian(-50, 50)
 /// entropy_model2 = constriction.stream.model.Categorical(
 ///     np.array([0.2, 0.5, 0.3], dtype=np.float32), # Probabilities of the symbols 0,1,2.
@@ -259,7 +259,7 @@ fn init_stream(py: Python<'_>, module: &PyModule) -> PyResult<()> {
 /// import numpy as np
 ///
 /// # Define an entropy model over the (implied) alphabet {0, 1, 2, 3}:
-/// probabils = np.array([0.3, 0.2, 0.4, 0.1], dtype=np.float64)
+/// probabils = np.array([0.3, 0.2, 0.4, 0.1], dtype=np.float32)
 ///
 /// # Encode some example message, using the same model for each symbol here:
 /// message = [1, 3, 2, 3, 0, 1, 3, 0, 2, 1, 1, 3, 3, 1, 2, 0, 1, 3, 1]
@@ -291,7 +291,7 @@ fn init_stream(py: Python<'_>, module: &PyModule) -> PyResult<()> {
 /// import numpy as np
 ///
 /// # Define an entropy model over the (implied) alphabet {0, 1, 2, 3}:
-/// probabils = np.array([0.3, 0.2, 0.4, 0.1], dtype=np.float64)
+/// probabils = np.array([0.3, 0.2, 0.4, 0.1], dtype=np.float32)
 ///
 /// # Encode some example message, using the same model for each symbol here:
 /// message = [1, 3, 2, 3, 0, 1, 3, 0, 2, 1, 1, 3, 3, 1, 2, 0, 1, 3, 1]
@@ -332,12 +332,12 @@ pub type PyReadonlyFloatArray2<'py> = PyReadonlyFloatArray<'py, numpy::Ix2>;
 
 impl<'py, D: ndarray::Dimension> FromPyObject<'py> for PyReadonlyFloatArray<'py, D> {
     fn extract(ob: &'py PyAny) -> PyResult<Self> {
-        if let Ok(x) = ob.extract::<PyReadonlyArray<'_, f32, D>>() {
-            Ok(PyReadonlyFloatArray::F32(x))
+        if let Ok(x) = ob.extract::<PyReadonlyArray<'_, f64, D>>() {
+            Ok(PyReadonlyFloatArray::F64(x))
         } else {
             // This should also return a well crafted error in case it fails.
-            ob.extract::<PyReadonlyArray<'_, f64, D>>()
-                .map(PyReadonlyFloatArray::F64)
+            ob.extract::<PyReadonlyArray<'_, f32, D>>()
+                .map(PyReadonlyFloatArray::F32)
         }
     }
 }

diff --git a/src/pybindings/symbol/huffman.rs b/src/pybindings/symbol/huffman.rs
@@ -2,7 +2,10 @@ use std::prelude::v1::*;
 
 use pyo3::prelude::*;
 
-use crate::{pybindings::PyReadonlyFloatArray1, symbol::huffman};
+use crate::{
+    pybindings::{PyReadonlyFloatArray, PyReadonlyFloatArray1},
+    symbol::huffman,
+};
 
 pub fn init_module(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
     module.add_class::<EncoderHuffmanTree>()?;
@@ -12,9 +15,9 @@ pub fn init_module(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
 
 /// A Huffman tree that can be used for encoding data.
 ///
-/// Expects a single argument `probabilities`, which is a rank-1 numpy array with
-/// `dtype=np.float64` that specifies the probabilities of each one of the symbols in the
-/// range `{0, 1, ..., len(probabilities)-1}`. All probabilities must be nonnegative and
+/// Expects a single argument `probabilities`, which is a rank-1 numpy array with float
+/// `dtype` that specifies the probabilities of each one of the symbols in the range
+/// `{0, 1, ..., len(probabilities)-1}`. All probabilities must be nonnegative and
 /// finite, but probabilities do not need to add up to one since only the ratios of
 /// probabilities will affect the shape of the constructed Huffman tree (note, however, that
 /// rescaling probabilities can, in edge cases, affect the shape of the Huffman tree due
@@ -34,19 +37,28 @@ impl EncoderHuffmanTree {
     #[new]
     #[pyo3(text_signature = "(self, probabilities)")]
     pub fn new(probabilities: PyReadonlyFloatArray1<'_>) -> PyResult<Self> {
-        let inner = huffman::EncoderHuffmanTree::from_float_probabilities::<f64, _>(
-            probabilities.cast_f64()?.as_array(),
-        )?;
+        let inner = match probabilities {
+            PyReadonlyFloatArray::F32(probabilities) => {
+                huffman::EncoderHuffmanTree::from_float_probabilities::<f32, _>(
+                    probabilities.as_array(),
+                )
+            }
+            PyReadonlyFloatArray::F64(probabilities) => {
+                huffman::EncoderHuffmanTree::from_float_probabilities::<f64, _>(
+                    probabilities.as_array(),
+                )
+            }
+        }?;
 
         Ok(Self { inner })
     }
 }
 
 /// A Huffman tree that can be used for decoding data.
 ///
-/// Expects a single argument `probabilities`, which is a rank-1 numpy array with
-/// `dtype=np.float64` that specifies the probabilities of each one of the symbols in the
-/// range `{0, 1, ..., len(probabilities)-1}`. All probabilities must be nonnegative and
+/// Expects a single argument `probabilities`, which is a rank-1 numpy array with float
+/// `dtype` that specifies the probabilities of each one of the symbols in the range
+/// `{0, 1, ..., len(probabilities)-1}`. All probabilities must be nonnegative and
 /// finite, but probabilities do not need to add up to one since only the ratios of
 /// probabilities will affect the shape of the constructed Huffman tree (note, however, that
 /// rescaling probabilities can, in edge cases, affect the shape of the Huffman tree due
@@ -66,9 +78,18 @@ impl DecoderHuffmanTree {
     #[new]
     #[pyo3(text_signature = "(self, probabilities)")]
     pub fn new(probabilities: PyReadonlyFloatArray1<'_>) -> PyResult<Self> {
-        let inner = huffman::DecoderHuffmanTree::from_float_probabilities::<f64, _>(
-            probabilities.cast_f64()?.as_array(),
-        )?;
+        let inner = match probabilities {
+            PyReadonlyFloatArray::F32(probabilities) => {
+                huffman::DecoderHuffmanTree::from_float_probabilities::<f32, _>(
+                    probabilities.as_array(),
+                )
+            }
+            PyReadonlyFloatArray::F64(probabilities) => {
+                huffman::DecoderHuffmanTree::from_float_probabilities::<f64, _>(
+                    probabilities.as_array(),
+                )
+            }
+        }?;
 
         Ok(Self { inner })
     }

diff --git a/tests/python/test_docexamples.py b/tests/python/test_docexamples.py
@@ -108,6 +108,7 @@ def test_module_example3():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([3176507208], dtype=np.uint32))
 
     decoder = constriction.stream.queue.RangeDecoder(compressed)
     decoded_part1 = decoder.decode(entropy_model1, means, stds)
@@ -1007,6 +1008,8 @@ def test_huffman1():
     compressed, bitrate = encoder.get_compressed()
     print(compressed, bitrate)  # (prints: [3756389791, 61358], 48)
     print(f"(in binary: {[bin(word) for word in compressed]}")
+    assert np.all(compressed == np.array([3756389791, 61358], dtype=np.uint32))
+    assert bitrate == 48
 
     # Decode the message
     decoder = constriction.symbol.QueueDecoder(compressed)
@@ -1033,8 +1036,10 @@ def test_huffman2():
 
     # Obtain the compressed representation and the bitrate:
     compressed, bitrate = coder.get_compressed()
-    print(compressed, bitrate)  # (prints: [[2818274807, 129455] 48)
+    print(compressed, bitrate)  # (prints: [2818274807, 129455] 48)
     print(f"(in binary: {[bin(word) for word in compressed]}")
+    assert np.all(compressed == np.array([2818274807, 129455], dtype=np.uint32))
+    assert bitrate == 48
 
     # Decode the message (we could explicitly construct a decoder:
     # `decoder = constritcion.symbol.StackCoder(compressed)`

diff --git a/tests/python/test_docexamples_f32.py b/tests/python/test_docexamples_f32.py
@@ -18,6 +18,7 @@ def test_module_example1():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([3114258274, 357938615], dtype=np.uint32))
 
     decoder = constriction.stream.stack.AnsCoder(compressed)
     decoded = decoder.decode(entropy_model, 9)  # (decodes 9 symbols)
@@ -38,6 +39,7 @@ def test_module_example2():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([2682585243, 513522013], dtype=np.uint32))
 
     decoder = constriction.stream.queue.RangeDecoder(
         compressed)  # <--CHANGED LINE
@@ -108,6 +110,7 @@ def test_module_example3():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([3176507206], dtype=np.uint32))
 
     decoder = constriction.stream.queue.RangeDecoder(compressed)
     decoded_part1 = decoder.decode(entropy_model1, means, stds)
@@ -1007,6 +1010,8 @@ def test_huffman1():
     compressed, bitrate = encoder.get_compressed()
     print(compressed, bitrate)  # (prints: [3756389791, 61358], 48)
     print(f"(in binary: {[bin(word) for word in compressed]}")
+    assert np.all(compressed == np.array([3756389791, 61358], dtype=np.uint32))
+    assert bitrate == 48
 
     # Decode the message
     decoder = constriction.symbol.QueueDecoder(compressed)
@@ -1035,6 +1040,8 @@ def test_huffman2():
     compressed, bitrate = coder.get_compressed()
     print(compressed, bitrate)  # (prints: [[2818274807, 129455] 48)
     print(f"(in binary: {[bin(word) for word in compressed]}")
+    assert np.all(compressed == np.array([2818274807, 129455], dtype=np.uint32))
+    assert bitrate == 48
 
     # Decode the message (we could explicitly construct a decoder:
     # `decoder = constritcion.symbol.StackCoder(compressed)`

diff --git a/tests/python/test_lazy_f32.py b/tests/python/test_lazy_f32.py
@@ -51,6 +51,7 @@ def test_module_example3():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([3176507206], dtype=np.uint32))
 
     decoder = constriction.stream.queue.RangeDecoder(compressed)
     decoded_part1 = decoder.decode(entropy_model1, means, stds)

diff --git a/tests/python/test_lazy_f64.py b/tests/python/test_lazy_f64.py
@@ -24,6 +24,7 @@ def test_module_example3():
     compressed = encoder.get_compressed()
     print(f"compressed representation: {compressed}")
     print(f"(in binary: {[bin(word) for word in compressed]})")
+    assert np.all(compressed == np.array([3176507208], dtype=np.uint32))
 
     decoder = constriction.stream.queue.RangeDecoder(compressed)
     decoded_part1 = decoder.decode(entropy_model1, means, stds)