Document ..._fast and ..._perfect

Also - fixes doc links that broke when I split up `model.rs` into a subtree of internal modules. - Renames `LazyContiguousCategoricalEntropyModel::from_floating_point_probabilities` to `LazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`
bamler-lab · Aug 26, 2024 · 3a07233 · 3a07233
1 parent 29c69e6
commit 3a07233
Show file tree

Hide file tree

Showing 12 changed files with 299 additions and 173 deletions.
diff --git a/src/pybindings/stream/model.rs b/src/pybindings/stream/model.rs
@@ -354,7 +354,7 @@ where
 {
     if lazy {
         let model =
-            DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities(
+            DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast(
                 probabilities.to_vec(),
                 None,
             )?;
@@ -451,7 +451,7 @@ impl LazyCategorical {
                 as Arc<dyn internals::Model>,
             Some(probabilities) => {
                 let model =
-                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities(
+                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast(
                         probabilities.cast_f32()?.to_vec()?,
                         normalization,
                     ).unwrap();

diff --git a/src/pybindings/stream/model/internals.rs b/src/pybindings/stream/model/internals.rs
@@ -441,7 +441,7 @@ where
         parameterize_categorical_with_model_builder(
             probabilities,
             |probabilities| {
-                DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities(
+                DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast(
                     probabilities,
                     None,
                 )
@@ -573,7 +573,7 @@ impl Model for UnparameterizedLazyCategoricalDistribution {
         if reverse {
             for probabilities in probabilities.chunks_exact(range).rev() {
                 let model =
-                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities(
+                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast(
                         probabilities,
                         None,
                     ).unwrap();
@@ -582,7 +582,7 @@ impl Model for UnparameterizedLazyCategoricalDistribution {
         } else {
             for probabilities in probabilities.chunks_exact(range) {
                 let model =
-                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities(
+                    DefaultLazyContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast(
                         probabilities,
                         None,
                     ).unwrap();

diff --git a/src/stream/mod.rs b/src/stream/mod.rs
@@ -115,10 +115,11 @@
 //!   branches and a smaller internal coder state. Empirically, our decoding benchmarks in
 //!   the file `benches/lookup.rs` run more than twice as fast with an `AnsCoder` than with
 //!   a `RangeDecoder`. However, please note that (i) these benchmarks use the highly
-//!   optimized [lookup models](model::LookupDecoderModel); if you use other entropy
-//!   models then these will likely be the computational bottleneck, not the coder; (ii)
-//!   future versions of `constriction` may introduce further run-time optimizations; and
-//!   (iii) while *decoding* is more than two times faster with ANS, *encoding* is somewhat
+//!   optimized lookup models (see [`ContiguousLookupDecoderModel`] and
+//!   [`NonContiguousLookupDecoderModel`]); if you use other entropy models then these will
+//!   likely be the computational bottleneck, not the coder; (ii) future versions of
+//!   `constriction` may introduce further run-time optimizations; and (iii) while
+//!   *decoding* is more than two times faster with ANS, *encoding* is somewhat
 //!   (~&nbsp;10&nbsp;%) faster with Range Coding (this *might* be because encoding with
 //!   ANS, unlike decoding, involves an integer division, which is a surprisingly slow
 //!   operation on most hardware).
@@ -155,12 +156,12 @@
 //!
 //! *The near-optimal compression performance* of stream codes is to be seen in contrast to
 //! symbol codes (see module [`symbol`](crate::symbol)), such as the well-known [Huffman
-//! code](crate::symbol::huffman). Symbol codes do not amortize over symbols.
-//! Instead, they map each symbol to a fixed sequence of bits of integer length (a
-//! "codeword"). This leads to a typical overhead of 0.5&nbsp;bits *per symbol* in the best
-//! case, and to an overhead of almost 1&nbsp;bit per symbol for entropy models with very
-//! low (≪&nbsp;1&nbsp;bit of) entropy per symbol, which is common for deep learning based
-//! entropy models. Stream codes do not suffer from this overhead.
+//! code](crate::symbol::huffman). Symbol codes do not amortize over symbols. Instead, they
+//! map each symbol to a fixed sequence of bits of integer length (a "codeword"). This leads
+//! to a typical overhead of 0.5&nbsp;bits *per symbol* in the best case, and to an overhead
+//! of almost 1&nbsp;bit per symbol for entropy models with very low (≪&nbsp;1&nbsp;bit of)
+//! entropy per symbol, which is common for deep learning based entropy models. Stream codes
+//! do not suffer from this overhead.
 //!
 //! *The computational efficiency* of stream codes is to be seen in contrast to block codes.
 //! Block codes are symbol codes that operate on blocks of several consecutive symbols at
@@ -190,9 +191,10 @@
 //!   API](https://bamler-lab.github.io/constriction/apidoc/python/). The "default" presets
 //!   provide very near-optimal compression effectiveness for most conceivable applications
 //!   and high runtime performance on typical (64&nbsp;bit) desktop computers. However, the
-//!   "default" presets are *not* recommended for a [`LookupDecoderModel`] as their high
-//!   numerical precision would lead to enormeous lookup tables (~&nbsp;67&nbsp;MB), which
-//!   would take a considerable time to build and likely leead to extremely poor cashing.
+//!   "default" presets are *not* recommended for a [`ContiguousLookupDecoderModel`] or
+//!   [`NonContiguousLookupDecoderModel`] as their high numerical precision would lead to
+//!   enormeous lookup tables (~&nbsp;67&nbsp;MB), which would take a considerable time to
+//!   build and likely leead to extremely poor cashing.
 //!   - entropy *coders* with "default" presets: [`DefaultAnsCoder`],
 //!     [`DefaultRangeEncoder`], [`DefaultRangeDecoder`], and [`DefaultChainCoder`];
 //!   - entropy *models* with "default" presets: [`DefaultLeakyQuantizer`],
@@ -203,14 +205,13 @@
 //!   efficiency and memory consumption. The "small" presets use a lower numerical precision
 //!   and a smaller state and word size than the "default" presets. The lower numerical
 //!   precision makes it possible to use the highly runtime efficient
-//!   [`LookupDecoderModel`], the smaller state size reduces the memory overhead of jump
-//!   tables for random access, and the smaller word size may be advantageous on some
-//!   embedded devices.
+//!   [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`], the smaller
+//!   state size reduces the memory overhead of jump tables for random access, and the
+//!   smaller word size may be advantageous on some embedded devices.
 //!   - entropy *coders* with "small" presets: [`SmallAnsCoder`], [`SmallRangeEncoder`],
 //!     [`SmallRangeDecoder`], and [`SmallChainCoder`];
-//!   - entropy *models* with "small" presets: [`SmallContiguousLookupDecoderModel`],
-//!     [`SmallNonContiguousLookupDecoderModel`],
-//!     [`SmallContiguousCategoricalEntropyModel`],
+//!   - entropy *models* with "small" presets: [`ContiguousLookupDecoderModel`],
+//!     [`NonContiguousLookupDecoderModel`], [`SmallContiguousCategoricalEntropyModel`],
 //!     [`SmallNonContiguousCategoricalEncoderModel`],
 //!     [`SmallNonContiguousCategoricalDecoderModel`], and [`SmallLeakyQuantizer`].
 //!
@@ -268,8 +269,9 @@
 //!   representing probabilities in fixed-point arithmetic. Must not be zero or larger than
 //!   `Probability::BITS`. A small `PRECISION` will lead to compression overhead due to poor
 //!   approximations of the true probability distribution. A large `PRECISION` will lead to
-//!   a large memory overhead if you use a [`LookupDecoderModel`], and it can make decoding
-//!   with a [`LeakilyQuantizedDistribution`] slow.
+//!   a large memory overhead if you use a [`ContiguousLookupDecoderModel`] or
+//!   [`NonContiguousLookupDecoderModel`], and it can make decoding with a
+//!   [`LeakilyQuantizedDistribution`] slow.
 //!   - The "default" preset sets `PRECISION = 24`.
 //!   - The "small" preset sets `PRECISION = 12`.
 //!
@@ -282,26 +284,33 @@
 //! [`DefaultRangeDecoder`]: queue::DefaultRangeDecoder
 //! [`DefaultChainCoder`]: chain::DefaultChainCoder
 //! [`DefaultLeakyQuantizer`]: model::DefaultLeakyQuantizer
-//! [`DefaultContiguousCategoricalEntropyModel`]: model::DefaultContiguousCategoricalEntropyModel
-//! [`DefaultNonContiguousCategoricalEncoderModel`]: model::DefaultNonContiguousCategoricalEncoderModel
-//! [`DefaultNonContiguousCategoricalDecoderModel`]: model::DefaultNonContiguousCategoricalDecoderModel
+//! [`DefaultContiguousCategoricalEntropyModel`]:
+//!     model::DefaultContiguousCategoricalEntropyModel
+//! [`DefaultNonContiguousCategoricalEncoderModel`]:
+//!     model::DefaultNonContiguousCategoricalEncoderModel
+//! [`DefaultNonContiguousCategoricalDecoderModel`]:
+//!     model::DefaultNonContiguousCategoricalDecoderModel
 //! [`SmallAnsCoder`]: stack::SmallAnsCoder
 //! [`SmallRangeEncoder`]: queue::SmallRangeEncoder
 //! [`SmallRangeDecoder`]: queue::SmallRangeDecoder
 //! [`SmallChainCoder`]: chain::SmallChainCoder
 //! [`SmallLeakyQuantizer`]: model::SmallLeakyQuantizer
-//! [`SmallContiguousLookupDecoderModel`]: model::SmallContiguousLookupDecoderModel
-//! [`SmallNonContiguousLookupDecoderModel`]: model::SmallNonContiguousLookupDecoderModel
-//! [`SmallContiguousCategoricalEntropyModel`]: model::SmallContiguousCategoricalEntropyModel
-//! [`SmallNonContiguousCategoricalEncoderModel`]: model::SmallNonContiguousCategoricalEncoderModel
-//! [`SmallNonContiguousCategoricalDecoderModel`]: model::SmallNonContiguousCategoricalDecoderModel
+//! [`ContiguousLookupDecoderModel`]: model::ContiguousLookupDecoderModel
+//! [`NonContiguousLookupDecoderModel`]: model::NonContiguousLookupDecoderModel
+//! [`SmallContiguousCategoricalEntropyModel`]:
+//!     model::SmallContiguousCategoricalEntropyModel
+//! [`SmallNonContiguousCategoricalEncoderModel`]:
+//!     model::SmallNonContiguousCategoricalEncoderModel
+//! [`SmallNonContiguousCategoricalDecoderModel`]:
+//!     model::SmallNonContiguousCategoricalDecoderModel
 //! [`AnsCoder`]: stack::AnsCoder
 //! [`AnsCoder::from_binary`]: stack::AnsCoder::from_binary
 //! [`ChainCoder`]: chain::ChainCoder
 //! [`Cursor`]: crate::backends::Cursor
 //! [`backends`]: crate::backends
 //! [Deflate]: https://en.wikipedia.org/wiki/Deflate
-//! [`LookupDecoderModel`]: model::LookupDecoderModel
+//! [`ContiguousLookupDecoderModel`]: model::ContiguousLookupDecoderModel
+//! [`NonContiguousLookupDecoderModel`]: model::NonContiguousLookupDecoderModel
 //! [`LeakilyQuantizedDistribution`]: model::LeakilyQuantizedDistribution
 
 #![allow(clippy::type_complexity)]
@@ -995,7 +1004,7 @@ pub trait Decode<const PRECISION: usize>: Code {
     /// accidental misuse in this regard. We provide the ability to pass the `DecoderModel`
     /// by value as an opportunity for microoptimzations when dealing with models that can
     /// be cheaply copied (see, e.g.,
-    /// [`LookupDecoderModel::as_view`](model::LookupDecoderModel::as_view)).
+    /// [`ContiguousLookupDecoderModel::as_view`](crate::stream::model::ContiguousLookupDecoderModel::as_view)).
     ///
     /// If you want to decode each symbol with its individual entropy model, then consider
     /// calling [`decode_symbols`] instead. If you just want to decode a single symbol, then

diff --git a/src/stream/model.rs b/src/stream/model.rs
@@ -41,14 +41,14 @@
 //!   *decoding* of i.i.d. data; these types build up a lookup table with `2^PRECISION`
 //!   entries (one entry per
 //!   possible *quantile*) and are therefore only recommended to be used with relatively
-//!   small `PRECISION`. See [`SmallContiguousLookupDecoderModel`] and
-//!   [`SmallNonContiguousLookupDecoderModel`].
+//!   small `PRECISION`. See [`ContiguousLookupDecoderModel`] and
+//!   [`NonContiguousLookupDecoderModel`].
 //!
 //! # Examples
 //!
 //! See [`LeakyQuantizer`](LeakyQuantizer#examples), [`ContiguousCategoricalEntropyModel`],
 //! [`NonContiguousCategoricalEncoderModel`]. [`NonContiguousCategoricalDecoderModel`], and
-//! [`LookupDecoderModel`].
+//! [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`].
 //!
 //! TODO: direct links to "Examples" sections.
 //!
@@ -199,7 +199,8 @@ use crate::{BitArray, NonZeroBitArray};
 ///   e.g., [`Encode::encode_iid_symbols`](super::Encode::encode_iid_symbols)). This will
 ///   allow users to call your function either with a reference to an entropy model (all
 ///   shared references implement `Copy`), or with some cheaply copyable entropy model such
-///   as a view to a lookup model (see [`LookupDecoderModel::as_view`]).
+///   as a view to a lookup model (see [`ContiguousLookupDecoderModel::as_view`] or
+///   [`NonContiguousLookupDecoderModel::as_view`]).
 ///
 /// # See Also
 ///
@@ -491,7 +492,7 @@ pub trait IterableEntropyModel<'m, const PRECISION: usize>: EntropyModel<PRECISI
     /// This method may be used, e.g., to export the model into a serializable format. It is
     /// also used internally by constructors that create a different but equivalent
     /// representation of the same entropy model (e.g., to construct a
-    /// [`LookupDecoderModel`] from some `EncoderModel`).
+    /// [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`] from some `EncoderModel`).
     ///
     /// # Example
     ///
@@ -795,7 +796,7 @@ pub trait IterableEntropyModel<'m, const PRECISION: usize>: EntropyModel<PRECISI
     /// Creates a [`DecoderModel`] from this `EntropyModel`
     ///
     /// This is a fallback method that should only be used if no more specialized
-    /// conversions are available. It generates a [`LookupDecoderModel`] that makes no
+    /// conversions are available. It generates a [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`] that makes no
     /// assumption about contiguity of the support. Thus, before calling this method first
     /// check if the `Self` type has some inherent method with a name like
     /// `to_lookup_decoder_model`. If it does, that method probably returns a