From 9a8956592532aca56ea41b3936ce81172e9bf0df Mon Sep 17 00:00:00 2001 From: Robert Bamler Date: Wed, 28 Aug 2024 19:58:39 +0200 Subject: [PATCH] Document lookup decoder models Includes several new doc tests. --- src/backends.rs | 2 +- src/stream/model.rs | 2 +- src/stream/model/categorical/contiguous.rs | 3 +- .../model/categorical/lazy_contiguous.rs | 11 +- .../model/categorical/lookup_contiguous.rs | 381 ++++++++++++++++-- .../model/categorical/lookup_noncontiguous.rs | 331 +++++++++++++-- .../model/categorical/non_contiguous.rs | 14 +- 7 files changed, 666 insertions(+), 78 deletions(-) diff --git a/src/backends.rs b/src/backends.rs index ea062d3..6a686a9 100644 --- a/src/backends.rs +++ b/src/backends.rs @@ -1023,7 +1023,7 @@ impl Seek for Reverse { /// ); /// // Encoding *a few* more symbols works ... /// cursor_coder.encode_iid_symbols_reverse(65..75, &model).unwrap(); -/// // ... but at some point we'll run out of buffer space. +/// // ... but at some point we'll run out of buffer space: /// assert_eq!( /// cursor_coder.encode_iid_symbols_reverse(50..65, &model), /// Err(CoderError::Backend(constriction::backends::BoundedWriteError::OutOfSpace)) diff --git a/src/stream/model.rs b/src/stream/model.rs index 8c782c5..4ae968b 100644 --- a/src/stream/model.rs +++ b/src/stream/model.rs @@ -354,7 +354,7 @@ pub trait EncoderModel: EntropyModel { /// let probabilities = vec![1u32 << 21, 1 << 23, 1 << 22, 1 << 21]; /// let model = DefaultNonContiguousCategoricalEncoderModel // "Default" uses `PRECISION = 24` /// ::from_symbols_and_nonzero_fixed_point_probabilities( - /// symbols.iter().copied(), &probabilities, false) + /// symbols.iter().copied(), probabilities.iter().copied(), false) /// .unwrap(); /// /// assert_eq!(model.floating_point_probability::('a'), 0.125); diff --git a/src/stream/model/categorical/contiguous.rs b/src/stream/model/categorical/contiguous.rs index 768fda4..e9a634a 100644 --- a/src/stream/model/categorical/contiguous.rs +++ b/src/stream/model/categorical/contiguous.rs @@ -400,7 +400,8 @@ impl /// assert_eq!(probabilities.iter().sum::(), 1 << 24); /// /// let model = DefaultContiguousCategoricalEntropyModel - /// ::from_nonzero_fixed_point_probabilities(&probabilities, false).unwrap(); + /// ::from_nonzero_fixed_point_probabilities( + /// probabilities.iter().copied(), false).unwrap(); /// let symbol_table = model.floating_point_symbol_table::().collect::>(); /// assert_eq!( /// symbol_table, diff --git a/src/stream/model/categorical/lazy_contiguous.rs b/src/stream/model/categorical/lazy_contiguous.rs index 6c8c8bf..acd8ac3 100644 --- a/src/stream/model/categorical/lazy_contiguous.rs +++ b/src/stream/model/categorical/lazy_contiguous.rs @@ -44,13 +44,11 @@ pub type SmallLazyContiguousCategoricalEntropyModel> = /// /// # When Should I Use This Type of Entropy Model? /// -/// - Use this type if you want to encode or decode only a few (or even just a single) +/// - Use this type if you want to encode or decode only very few (or even just a single) /// symbol with the same categorical distribution. /// - Use [`ContiguousCategoricalEntropyModel`], [`NonContiguousCategoricalEncoderModel`], -/// or [`NonContiguousCategoricalDecoderModel`] if you want to encode many symbols with -/// the same categorical distribution (more precisely, if the number of encoded or decoded -/// symbols is on the order of, or larger than, the square root of the size of the -/// alphabet, i.e., the support of the model). These models precalculate the fixed-point +/// or [`NonContiguousCategoricalDecoderModel`] if you want to encode several symbols with +/// the same categorical distribution. These models precalculate the fixed-point /// approximation of the entire cumulative distribution function at model construction. /// - Use [`ContiguousLookupDecoderModel`] or [`NonContiguousLookupDecoderModel`] (together /// with a small `Probability` data type, see [discussion of presets]) for decoding a @@ -72,7 +70,7 @@ pub type SmallLazyContiguousCategoricalEntropyModel> = /// [`NonContiguousCategoricalDecoderModel`]. /// - encoding a symbol (calling [`EncoderModel::left_cumulative_and_probability`]): /// - runtime cost: `Θ(1)` (cheaper than for [`NonContiguousCategoricalEncoderModel`] -/// since it compiles to a simiple array lookup rather than a `HashMap` lookup) +/// since it compiles to a simple array lookup rather than a `HashMap` lookup) /// - memory footprint: no heap allocations, constant stack space. /// - decoding a symbol (calling [`DecoderModel::quantile_function`]): /// - runtime cost: `Θ(log(N))` (both expected and worst-case; probably slightly cheaper @@ -101,7 +99,6 @@ pub type SmallLazyContiguousCategoricalEntropyModel> = /// [compatibility table for `ContiguousCategoricalEntropyModel`]: /// crate::stream::model::ContiguousCategoricalEntropyModel#compatibility-table /// [discussion of presets]: crate::stream#presets - #[derive(Debug, Clone, Copy)] pub struct LazyContiguousCategoricalEntropyModel { /// Invariants: diff --git a/src/stream/model/categorical/lookup_contiguous.rs b/src/stream/model/categorical/lookup_contiguous.rs index 99978c4..6c5bad7 100644 --- a/src/stream/model/categorical/lookup_contiguous.rs +++ b/src/stream/model/categorical/lookup_contiguous.rs @@ -13,8 +13,145 @@ use super::{ }; /// A tabularized [`DecoderModel`] that is optimized for fast decoding of i.i.d. symbols +/// over a contiguous alphabet of symbols (i.e., `{0, 1, ..., n-1}`) /// -/// TODO: documentation +/// The default type parameters correspond to the "small" [preset], i.e., they allow +/// decoding with a [`SmallAnsCoder`] or a [`SmallRangeDecoder`] (as well as with a +/// [`DefaultAnsCoder`] or a [`DefaultRangeDecoder`], since you can always use a "bigger" +/// coder on a "smaller" model). Increasing the const generic `PRECISION` by much beyond its +/// default value is not recommended because the size of the lookup table grows +/// exponentially in `PRECISION`, thus increasing both memory consumption and runtime (due +/// to reduced cache locality). +/// +/// # See also +/// +/// - [`NonContiguousLookupDecoderModel`] +/// +/// # Example +/// +/// ## Full typical usage example +/// +/// ``` +/// use constriction::stream::{ +/// model::{ +/// ContiguousLookupDecoderModel, IterableEntropyModel, +/// SmallContiguousCategoricalEntropyModel, +/// }, +/// queue::{SmallRangeDecoder, SmallRangeEncoder}, +/// Decode, Encode, +/// }; +/// +/// // Let's first encode some message. We use a `SmallContiguousCategoricalEntropyModel` +/// // for encoding, so that we can decode with a lookup model later. +/// let message = [2, 1, 3, 0, 0, 2, 0, 2, 1, 0, 2]; +/// let floating_point_probabilities = [0.4f32, 0.2, 0.1, 0.3]; +/// let encoder_model = +/// SmallContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect( +/// &floating_point_probabilities, +/// ) +/// .unwrap(); +/// let mut encoder = SmallRangeEncoder::new(); +/// encoder.encode_iid_symbols(message, &encoder_model); +/// +/// // Note: we could construct a matching `decoder` and `lookup_decoder_model` as follows: +/// // let mut decoder = encoder.into_decoder().unwrap(); +/// // let lookup_decoder_model = encoder_model.to_lookup_decoder_model(); +/// # { // (actually run this in the doc test to be sure) +/// # let mut decoder = encoder.clone().into_decoder().unwrap(); +/// # let lookup_decoder_model = encoder_model.to_lookup_decoder_model(); +/// # } +/// // But in a more realistic compression setup, we'd want to serialize the compressed bit string +/// // (and possibly the model) to a file and read it back. So let's simulate this here: +/// +/// let compressed = encoder.get_compressed(); +/// let fixed_point_probabilities = encoder_model +/// .symbol_table() +/// .map(|(_symbol, _cdf, probability)| probability.get()) +/// .collect::>(); +/// +/// // ... write `compressed` and `fixed_point_probabilities` to a file and read them back ... +/// +/// let lookup_decoder_model = +/// ContiguousLookupDecoderModel::::from_nonzero_fixed_point_probabilities( +/// &fixed_point_probabilities, +/// false, +/// ) +/// .unwrap(); +/// let mut decoder = SmallRangeDecoder::from_compressed(compressed).unwrap(); +/// +/// let reconstructed = decoder +/// .decode_iid_symbols(11, &lookup_decoder_model) +/// .collect::, _>>() +/// .unwrap(); +/// +/// assert_eq!(&reconstructed[..], message); +/// ``` +/// +/// ## Compatibility with "default" entropy coders +/// +/// The above example uses coders with the "small" [preset] to demonstrate typical usage of +/// lookup decoder models. However, lookup models are also compatible with coders with the +/// "default" preset (you can always use a "smaller" model with a "larger" coder; so you +/// could, e.g., encode part of a message with a model that uses the "default" preset and +/// another part of the message with a model that uses the "small" preset so it can be +/// decoded with a lookup model). +/// +/// ``` +/// // Same imports, `message`, and `floating_point_probabilities` as in the example above ... +/// # use constriction::stream::{ +/// # model::{ +/// # ContiguousLookupDecoderModel, IterableEntropyModel, +/// # SmallContiguousCategoricalEntropyModel, +/// # }, +/// # queue::{DefaultRangeDecoder, DefaultRangeEncoder}, +/// # Decode, Encode, +/// # }; +/// # +/// # let message = [2, 1, 3, 0, 0, 2, 0, 2, 1, 0, 2]; +/// # let floating_point_probabilities = [0.4f32, 0.2, 0.1, 0.3]; +/// +/// let encoder_model = +/// SmallContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect( +/// &floating_point_probabilities, +/// ) +/// .unwrap(); // We're using a "small" encoder model again ... +/// let mut encoder = DefaultRangeEncoder::new(); // ... but now with a "default" coder. +/// encoder.encode_iid_symbols(message, &encoder_model); +/// +/// // ... obtain `compressed` and `fixed_point_probabilities` as in the example above ... +/// # let compressed = encoder.get_compressed(); +/// # let fixed_point_probabilities = encoder_model +/// # .symbol_table() +/// # .map(|(_symbol, _cdf, probability)| probability.get()) +/// # .collect::>(); +/// +/// // Then decode with the same lookup model as before, but now with a "default" decoder: +/// let lookup_decoder_model = +/// ContiguousLookupDecoderModel::::from_nonzero_fixed_point_probabilities( +/// &fixed_point_probabilities, +/// false, +/// ) +/// .unwrap(); +/// let mut decoder = DefaultRangeDecoder::from_compressed(compressed).unwrap(); +/// +/// let reconstructed = decoder +/// .decode_iid_symbols(11, &lookup_decoder_model) +/// .collect::, _>>() +/// .unwrap(); +/// +/// assert_eq!(&reconstructed[..], message); +/// ``` +/// +/// You can also use an [`AnsCoder`] instead of a range coder of course. +/// +/// [`AnsCoder`]: crate::stream::stack::AnsCoder +/// [`SmallAnsCoder`]: crate::stream::stack::SmallAnsCoder +/// [`SmallRangeDecoder`]: crate::stream::queue::SmallRangeDecoder +/// [`DefaultAnsCoder`]: crate::stream::stack::DefaultAnsCoder +/// [`DefaultRangeDecoder`]: crate::stream::queue::DefaultRangeDecoder +/// [`NonContiguousLookupDecoderModel`]: +/// crate::stream::model::NonContiguousLookupDecoderModel +/// [preset]: crate::stream#presets #[derive(Debug, Clone, Copy)] pub struct ContiguousLookupDecoderModel< Probability = u16, @@ -42,16 +179,116 @@ where Probability: BitArray + Into, usize: AsPrimitive, { + /// Constructs a lookup table for decoding whose PMF approximates given `probabilities`. + /// + /// Similar to [`from_floating_point_probabilities_fast`], but the resulting + /// (fixed-point precision) model typically approximates the provided floating point + /// `probabilities` slightly better. + /// + /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`] + /// for a detailed comparison between `..._fast` and `..._perfect` constructors of + /// categorical entropy models. However, note that, different to the case with + /// non-lookup models, using this `..._perfect` variant of the constructor may be + /// justified in more cases because + /// - lookup decoder models use a smaller `PRECISION` by default, so the differences in + /// bit rate between the `..._fast` and the `..._perfect` constructor are more + /// pronounced; and + /// - lookup decoder models should only be used if you expect to use them to decode + /// *lots of* symbols anyway, so an increased upfront cost for construction is less of + /// an issue. + /// + /// # Example + /// + /// ``` + /// use constriction::stream::{ + /// model::ContiguousLookupDecoderModel, + /// stack::SmallAnsCoder, + /// Decode, Code, + /// }; + /// + /// let probabilities = [0.3f32, 0.1, 0.4, 0.2]; + /// let decoder_model = ContiguousLookupDecoderModel:: + /// ::from_floating_point_probabilities_perfect(&probabilities).unwrap(); + /// + /// let compressed = [0x956Eu16, 0x0155]; // (imagine this was read from a file) + /// let expected = [1, 0, 0, 2, 2, 3, 2, 2, 0]; + /// let mut coder = SmallAnsCoder::from_compressed_slice(&compressed).unwrap(); + /// + /// let reconstructed = coder + /// .decode_iid_symbols(9, &decoder_model).collect::, _>>().unwrap(); + /// assert!(coder.is_empty()); + /// assert_eq!(reconstructed, expected); + /// ``` + /// + /// [`from_floating_point_probabilities_fast`]: + /// Self::from_floating_point_probabilities_fast + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]: + /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect #[allow(clippy::result_unit_err)] - pub fn from_floating_point_probabilities_contiguous_fast( + pub fn from_floating_point_probabilities_perfect(probabilities: &[F]) -> Result + where + F: FloatCore + core::iter::Sum + Into, + Probability: Into + AsPrimitive, + f64: AsPrimitive, + usize: AsPrimitive, + { + let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?; + Self::from_nonzero_fixed_point_probabilities( + slots.into_iter().map(|slot| slot.weight), + false, + ) + } + + /// Faster but less accurate variant of [`from_floating_point_probabilities_perfect`] + /// + /// Semantics are analogous to + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`], + /// except that this method constructs a *lookup table*, i.e., a model that takes + /// considerably more runtime to build but, once built, is optimized for very fast + /// decoding of lots i.i.d. symbols. + /// + /// # See also + /// + /// - [`from_floating_point_probabilities_perfect`], which can be slower but + /// typically approximates the provided `probabilities` better, which may be a good + /// trade-off in the kind of situations that lookup decoder models are intended for. + /// + /// # Example + /// + /// ``` + /// use constriction::stream::{ + /// model::ContiguousLookupDecoderModel, + /// stack::SmallAnsCoder, + /// Decode, Code, + /// }; + /// + /// let probabilities = [0.3f32, 0.1, 0.4, 0.2]; + /// let decoder_model = ContiguousLookupDecoderModel:: + /// ::from_floating_point_probabilities_fast(&probabilities, None).unwrap(); + /// + /// let compressed = [0xF592u16, 0x0133]; // (imagine this was read from a file) + /// let expected = [1, 0, 0, 2, 2, 3, 2, 2, 0]; + /// let mut coder = SmallAnsCoder::from_compressed_slice(&compressed).unwrap(); + /// + /// let reconstructed = coder + /// .decode_iid_symbols(9, &decoder_model).collect::, _>>().unwrap(); + /// assert!(coder.is_empty()); + /// assert_eq!(reconstructed, expected); + /// ``` + /// + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]: + /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast + /// [`from_floating_point_probabilities_perfect`]: + /// Self::from_floating_point_probabilities_perfect + #[allow(clippy::result_unit_err)] + pub fn from_floating_point_probabilities_fast( probabilities: &[F], normalization: Option, ) -> Result where F: FloatCore + core::iter::Sum + AsPrimitive, Probability: AsPrimitive, - f64: AsPrimitive, - usize: AsPrimitive + AsPrimitive, + usize: AsPrimitive, { generic_static_asserts!( (Probability: BitArray; const PRECISION: usize); @@ -82,50 +319,77 @@ where }) } - #[allow(clippy::result_unit_err)] - pub fn from_floating_point_probabilities_contiguous_perfect( - probabilities: &[F], - ) -> Result - where - F: FloatCore + core::iter::Sum + Into, - Probability: Into + AsPrimitive, - f64: AsPrimitive, - usize: AsPrimitive, - { - let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?; - Self::from_nonzero_fixed_point_probabilities_contiguous( - slots.into_iter().map(|slot| slot.weight), - false, - ) - } - - /// Create a `LookupDecoderModel` over a contiguous range of symbols. + /// Deprecated constructor. + /// + /// This constructor has been deprecated in constriction version 0.4.0, and it will be + /// removed in constriction version 0.5.0. + /// + /// # Upgrade Instructions + /// + /// Call either of the following two constructors instead: + /// - [`from_floating_point_probabilities_perfect`] (referred to as `..._perfect` in the + /// following); or + /// - [`from_floating_point_probabilities_fast`] (referred to as `..._fast` in the + /// following). + /// + /// Both constructors approximate the given (floating-point) probability distribution in + /// fixed point arithmetic, and both construct a valid (exactly invertible) model that + /// is guaranteed to assign a nonzero probability to all symbols. The `..._perfect` + /// constructor finds the best possible approximation of the provided fixed-point + /// probability distribution (thus leading to the lowest bit rates), while the + /// `..._fast` constructor is faster but may find a *slightly* imperfect approximation. + /// Note that, since lookup models use a smaller fixed-point `PRECISION` than other + /// models (e.g., [`ContiguousCategoricalEntropyModel`]), the difference in bit rate + /// between the two models is more pronounced. + /// + /// Note that the `..._fast` constructor breaks binary compatibility with `constriction` + /// version <= 0.3.5. If you need to be able to exchange binary compressed data with a + /// program that uses a lookup decoder model or a categorical entropy model from + /// `constriction` version <= 0.3.5, then call + /// [`from_floating_point_probabilities_perfect`]. + /// + /// # Compatibility Table /// - /// TODO: example + /// (Lookup decoder models can only be used for decoding; in the following table, + /// "encoding" refers to [`ContiguousCategoricalEntropyModel`]) + /// + /// | constructor used for encoding →
↓ constructor used for decoding ↓ | [legacy](ContiguousCategoricalEntropyModel::from_floating_point_probabilities) | [`..._perfect`](ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect) | [`..._fast`](ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast) | + /// | --------------------: | --------------- | --------------- | --------------- | + /// | **legacy (this one)** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._perfect`]** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._fast`]** | ❌ incompatible | ❌ incompatible | ✅ compatible | + /// + /// [`from_floating_point_probabilities_perfect`]: + /// Self::from_floating_point_probabilities_perfect + /// [`..._perfect`]: Self::from_floating_point_probabilities_perfect + /// [`from_floating_point_probabilities_fast`]: + /// Self::from_floating_point_probabilities_fast + /// [`..._fast`]: Self::from_floating_point_probabilities_fast #[deprecated( since = "0.4.0", - note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \ - `from_symbols_and_floating_point_probabilities_perfect` instead. See documentation for \ + note = "Please use `from_floating_point_probabilities_perfect` or \ + `from_floating_point_probabilities_fast` instead. See documentation for \ detailed upgrade instructions." )] #[allow(clippy::result_unit_err)] - pub fn from_floating_point_probabilities_contiguous(probabilities: &[F]) -> Result + pub fn from_floating_point_probabilities(probabilities: &[F]) -> Result where F: FloatCore + core::iter::Sum + Into, Probability: Into + AsPrimitive, f64: AsPrimitive, usize: AsPrimitive, { - Self::from_floating_point_probabilities_contiguous_perfect(probabilities) + Self::from_floating_point_probabilities_perfect(probabilities) } - /// Create a `LookupDecoderModel` over a contiguous range of symbols using fixed point arighmetic. + /// Create a `ContiguousLookupDecoderModel` from pre-calculated fixed-point + /// probabilities. /// /// # Example /// - /// See [`ContiguousLookupDecoderModel`]. + /// See [type level documentation](Self#example). #[allow(clippy::result_unit_err)] - pub fn from_nonzero_fixed_point_probabilities_contiguous( + pub fn from_nonzero_fixed_point_probabilities( probabilities: I, infer_last_probability: bool, ) -> Result @@ -176,12 +440,53 @@ where /// Makes a very cheap shallow copy of the model that can be used much like a shared /// reference. /// - /// The returned `LookupDecoderModel` implements `Copy`, which is a requirement for some - /// methods, such as [`Decode::decode_iid_symbols`]. These methods could also accept a - /// shared reference to a `NonContiguousCategoricalDecoderModel` (since all references - /// to entropy models are also entropy models, and all shared references implement - /// `Copy`), but passing a *view* instead may be slightly more efficient because it - /// avoids one level of dereferencing. + /// The returned `ContiguousLookupDecoderModel` implements `Copy`, which is a + /// requirement for some methods, such as [`Decode::decode_iid_symbols`]. These methods + /// could also accept a shared reference to a `NonContiguousCategoricalDecoderModel` + /// (since all references to entropy models are also entropy models, and all shared + /// references implement `Copy`), but passing a *view* instead may be slightly more + /// efficient because it avoids one level of dereferencing. + /// + /// # Example + /// + /// ``` + /// use constriction::stream::{ + /// model::{ + /// ContiguousLookupDecoderModel, IterableEntropyModel, + /// SmallContiguousCategoricalEntropyModel, + /// }, + /// queue::SmallRangeDecoder, + /// Decode, Encode, + /// }; + /// + /// let expected = [2, 1, 3, 0, 0, 2, 0, 2, 1, 0, 2]; + /// let probabilities = [0.4f32, 0.2, 0.1, 0.3]; + /// let decoder_model = ContiguousLookupDecoderModel:: + /// ::from_floating_point_probabilities_perfect(&probabilities).unwrap(); + /// + /// let compressed = [0xA78Cu16, 0xA856]; // (imagine this was read from a file) + /// let mut decoder = SmallRangeDecoder::from_compressed(&compressed).unwrap(); + /// + /// // We can decode symbols by passing a shared reference to `decoder_model`: + /// let reconstructed = decoder + /// .decode_iid_symbols(11, &decoder_model) + /// .collect::, _>>() + /// .unwrap(); + /// assert_eq!(&reconstructed[..], &expected); + /// + /// // However, `decode_iid_symbols` internally calls `decode_symbol` multiple times. + /// // If we encode lots of symbols then it's slightly cheaper to first get a view of + /// // the decoder model, which we can then pass by value: + /// let mut decoder = SmallRangeDecoder::from_compressed(&compressed).unwrap(); // (same as before) + /// let decoder_model_view = decoder_model.as_view(); + /// let reconstructed2 = decoder + /// .decode_iid_symbols(11, decoder_model_view) + /// .collect::, _>>() + /// .unwrap(); + /// assert_eq!(&reconstructed2[..], &expected); + /// + /// // `decoder_model_view` can be used again here if necessary (since it implements `Copy`). + /// ``` /// /// [`Decode::decode_iid_symbols`]: crate::stream::Decode::decode_iid_symbols pub fn as_view( @@ -210,8 +515,8 @@ where /// Converts a lookup model into a non-lookup model. /// - /// This drops the lookup table, so its only conceivable use case is to - /// free memory while still holding on to the model. + /// This drops the lookup table, so its only conceivable use case is to free memory + /// while still holding on to (a slower variant of) the model. /// /// # See also /// diff --git a/src/stream/model/categorical/lookup_noncontiguous.rs b/src/stream/model/categorical/lookup_noncontiguous.rs index 4c3bb91..6cb3bb5 100644 --- a/src/stream/model/categorical/lookup_noncontiguous.rs +++ b/src/stream/model/categorical/lookup_noncontiguous.rs @@ -13,8 +13,141 @@ use super::{ }; /// A tabularized [`DecoderModel`] that is optimized for fast decoding of i.i.d. symbols +/// over a arbitrary (i.e., not necessarily contiguous) alphabet of symbols /// -/// TODO: documentation +/// The default type parameters correspond to the "small" [preset], i.e., they allow +/// decoding with a [`SmallAnsCoder`] or a [`SmallRangeDecoder`] (as well as with a +/// [`DefaultAnsCoder`] or a [`DefaultRangeDecoder`], since you can always use a "bigger" +/// coder on a "smaller" model). Increasing the const generic `PRECISION` by much beyond its +/// default value is not recommended because the size of the lookup table grows +/// exponentially in `PRECISION`, thus increasing both memory consumption and runtime (due +/// to reduced cache locality). +/// +/// # See also +/// +/// - [`ContiguousLookupDecoderModel`] +/// +/// # Example +/// +/// ## Full typical usage example +/// +/// ``` +/// use constriction::stream::{ +/// model::{ +/// EncoderModel, NonContiguousLookupDecoderModel, IterableEntropyModel, +/// SmallNonContiguousCategoricalEncoderModel, +/// }, +/// queue::{SmallRangeDecoder, SmallRangeEncoder}, +/// Decode, Encode, +/// }; +/// +/// // Let's first encode some message. We use a `SmallContiguousCategoricalEntropyModel` +/// // for encoding, so that we can decode with a lookup model later. +/// let message = "Mississippi"; +/// let symbols = ['M', 'i', 's', 'p']; +/// let floating_point_probabilities = [1.0f32 / 11., 4. / 11., 4. / 11., 2. / 11.]; +/// let encoder_model = SmallNonContiguousCategoricalEncoderModel +/// ::from_symbols_and_floating_point_probabilities_perfect( +/// symbols.iter().cloned(), +/// &floating_point_probabilities, +/// ) +/// .unwrap(); +/// let mut encoder = SmallRangeEncoder::new(); +/// encoder.encode_iid_symbols(message.chars(), &encoder_model); +/// +/// let compressed = encoder.get_compressed(); +/// let fixed_point_probabilities = symbols +/// .iter() +/// .map(|symbol| encoder_model.left_cumulative_and_probability(symbol).unwrap().1.get()) +/// .collect::>(); +/// +/// // ... write `compressed` and `fixed_point_probabilities` to a file and read them back ... +/// +/// let lookup_decoder_model = +/// NonContiguousLookupDecoderModel::<_, u16, _, _>::from_symbols_and_nonzero_fixed_point_probabilities( +/// symbols.iter().cloned(), +/// &fixed_point_probabilities, +/// false, +/// ) +/// .unwrap(); +/// let mut decoder = SmallRangeDecoder::from_compressed(compressed).unwrap(); +/// +/// let reconstructed = decoder +/// .decode_iid_symbols(11, &lookup_decoder_model) +/// .collect::>() +/// .unwrap(); +/// +/// assert_eq!(&reconstructed, message); +/// ``` +/// +/// ## Compatibility with "default" entropy coders +/// +/// The above example uses coders with the "small" [preset] to demonstrate typical usage of +/// lookup decoder models. However, lookup models are also compatible with coders with the +/// "default" preset (you can always use a "smaller" model with a "larger" coder; so you +/// could, e.g., encode part of a message with a model that uses the "default" preset and +/// another part of the message with a model that uses the "small" preset so it can be +/// decoded with a lookup model). +/// +/// ``` +/// // Same imports, `message`, `symbols` and `floating_point_probabilities` as above ... +/// # use constriction::stream::{ +/// # model::{ +/// # EncoderModel, NonContiguousLookupDecoderModel, IterableEntropyModel, +/// # SmallNonContiguousCategoricalEncoderModel, +/// # }, +/// # queue::{DefaultRangeDecoder, DefaultRangeEncoder}, +/// # Decode, Encode, +/// # }; +/// # +/// # let message = "Mississippi"; +/// # let symbols = ['M', 'i', 's', 'p']; +/// # let floating_point_probabilities = [1.0f32 / 11., 4. / 11., 4. / 11., 2. / 11.]; +/// +/// let encoder_model = SmallNonContiguousCategoricalEncoderModel +/// ::from_symbols_and_floating_point_probabilities_perfect( +/// symbols.iter().cloned(), +/// &floating_point_probabilities, +/// ) +/// .unwrap(); // We're using a "small" encoder model again ... +/// let mut encoder = DefaultRangeEncoder::new(); // ... but now with a "default" coder. +/// encoder.encode_iid_symbols(message.chars(), &encoder_model); +/// +/// // ... obtain `compressed` and `fixed_point_probabilities` as in the example above ... +/// # let compressed = encoder.get_compressed(); +/// # let fixed_point_probabilities = symbols +/// # .iter() +/// # .map(|symbol| encoder_model.left_cumulative_and_probability(symbol).unwrap().1.get()) +/// # .collect::>(); +/// +/// // Then decode with the same lookup model as before, but now with a "default" decoder: +/// let lookup_decoder_model = +/// NonContiguousLookupDecoderModel::<_, u16, _, _>::from_symbols_and_nonzero_fixed_point_probabilities( +/// symbols.iter().cloned(), +/// &fixed_point_probabilities, +/// false, +/// ) +/// .unwrap(); +/// let mut decoder = DefaultRangeDecoder::from_compressed(compressed).unwrap(); +/// +/// let reconstructed = decoder +/// .decode_iid_symbols(11, &lookup_decoder_model) +/// .collect::>() +/// .unwrap(); +/// +/// assert_eq!(&reconstructed, message); +/// ``` +/// +/// You can also use an [`AnsCoder`] instead of a range coder of course. +/// +/// [`AnsCoder`]: crate::stream::stack::AnsCoder +/// [`SmallAnsCoder`]: crate::stream::stack::SmallAnsCoder +/// [`SmallRangeDecoder`]: crate::stream::queue::SmallRangeDecoder +/// [`DefaultAnsCoder`]: crate::stream::stack::DefaultAnsCoder +/// [`DefaultRangeDecoder`]: crate::stream::queue::DefaultRangeDecoder +/// [`ContiguousLookupDecoderModel`]: +/// crate::stream::model::ContiguousLookupDecoderModel +/// [preset]: crate::stream#presets #[derive(Debug, Clone, Copy)] pub struct NonContiguousLookupDecoderModel< Symbol, @@ -50,6 +183,121 @@ where Symbol: Clone, usize: AsPrimitive, { + /// Constructs a lookup table for decoding whose PMF approximates given `probabilities`. + /// + /// Similar to [`from_symbols_and_floating_point_probabilities_fast`], but the resulting + /// (fixed-point precision) model typically approximates the provided floating point + /// `probabilities` slightly better. + /// + /// See [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`] + /// for a detailed comparison between `..._fast` and `..._perfect` constructors of + /// categorical entropy models. However, note that, different to the case with + /// non-lookup models, using this `..._perfect` variant of the constructor may be + /// justified in more cases because + /// - lookup decoder models use a smaller `PRECISION` by default, so the differences in + /// bit rate between the `..._fast` and the `..._perfect` constructor are more + /// pronounced; and + /// - lookup decoder models should only be used if you expect to use them to decode + /// *lots of* symbols anyway, so an increased upfront cost for construction is less of + /// an issue. + /// + /// # Example + /// + /// ``` + /// use constriction::stream::{ + /// model::NonContiguousLookupDecoderModel, + /// stack::SmallAnsCoder, + /// Decode, Code, + /// }; + /// + /// let probabilities = [0.3f32, 0.1, 0.4, 0.2]; + /// let symbols = ['a', 'b', 'x', 'y']; + /// let decoder_model = NonContiguousLookupDecoderModel::<_, u16, _, _> + /// ::from_symbols_and_floating_point_probabilities_perfect( + /// symbols.iter().copied(), + /// &probabilities + /// ).unwrap(); + /// + /// let compressed = [0x956Eu16, 0x0155]; // (imagine this was read from a file) + /// let expected = ['b', 'a', 'a', 'x', 'x', 'y', 'x', 'x', 'a']; + /// let mut coder = SmallAnsCoder::from_compressed_slice(&compressed).unwrap(); + /// + /// let reconstructed = coder + /// .decode_iid_symbols(9, &decoder_model).collect::, _>>().unwrap(); + /// assert!(coder.is_empty()); + /// assert_eq!(reconstructed, expected); + /// ``` + /// + /// [`from_symbols_and_floating_point_probabilities_fast`]: + /// Self::from_symbols_and_floating_point_probabilities_fast + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect`]: + /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_perfect + #[allow(clippy::result_unit_err)] + pub fn from_symbols_and_floating_point_probabilities_perfect( + symbols: impl IntoIterator, + probabilities: &[F], + ) -> Result + where + F: FloatCore + core::iter::Sum + Into, + Probability: Into + AsPrimitive, + f64: AsPrimitive, + usize: AsPrimitive, + { + let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?; + Self::from_symbols_and_nonzero_fixed_point_probabilities( + symbols, + slots.into_iter().map(|slot| slot.weight), + false, + ) + } + + /// Faster but less accurate variant of + /// [`from_symbols_and_floating_point_probabilities_perfect`] + /// + /// Semantics are analogous to + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`], + /// except that this method constructs a *lookup table*, i.e., a model that takes + /// considerably more runtime to build but, once built, is optimized for very fast + /// decoding of lots i.i.d. symbols. + /// + /// # See also + /// + /// - [`from_symbols_and_floating_point_probabilities_perfect`], which can be slower but + /// typically approximates the provided `probabilities` better, which may be a good + /// trade-off in the kind of situations that lookup decoder models are intended for. + /// + /// # Example + /// + /// ``` + /// use constriction::stream::{ + /// model::NonContiguousLookupDecoderModel, + /// stack::SmallAnsCoder, + /// Decode, Code, + /// }; + /// + /// let probabilities = [0.3f32, 0.1, 0.4, 0.2]; + /// let symbols = ['a', 'b', 'x', 'y']; + /// let decoder_model = NonContiguousLookupDecoderModel::<_, u16, _, _> + /// ::from_symbols_and_floating_point_probabilities_fast( + /// symbols.iter().copied(), + /// &probabilities, + /// None, + /// ).unwrap(); + /// + /// let compressed = [0xF592u16, 0x0133]; // (imagine this was read from a file) + /// let expected = ['b', 'a', 'a', 'x', 'x', 'y', 'x', 'x', 'a']; + /// let mut coder = SmallAnsCoder::from_compressed_slice(&compressed).unwrap(); + /// + /// let reconstructed = coder + /// .decode_iid_symbols(9, &decoder_model).collect::, _>>().unwrap(); + /// assert!(coder.is_empty()); + /// assert_eq!(reconstructed, expected); + /// ``` + /// + /// [`ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast`]: + /// crate::stream::model::ContiguousCategoricalEntropyModel::from_floating_point_probabilities_fast + /// [`from_symbols_and_floating_point_probabilities_perfect`]: + /// Self::from_symbols_and_floating_point_probabilities_perfect #[allow(clippy::result_unit_err)] pub fn from_symbols_and_floating_point_probabilities_fast( symbols: impl IntoIterator, @@ -83,28 +331,56 @@ where Ok(Self::from_symbol_table(symbol_table)) } - #[allow(clippy::result_unit_err)] - pub fn from_symbols_and_floating_point_probabilities_perfect( - symbols: impl IntoIterator, - probabilities: &[F], - ) -> Result - where - F: FloatCore + core::iter::Sum + Into, - Probability: Into + AsPrimitive, - f64: AsPrimitive, - usize: AsPrimitive, - { - let slots = perfectly_quantized_probabilities::<_, _, PRECISION>(probabilities)?; - Self::from_symbols_and_nonzero_fixed_point_probabilities( - symbols, - slots.into_iter().map(|slot| slot.weight), - false, - ) - } - - /// Create a `LookupDecoderModel` over arbitrary symbols. + /// Deprecated constructor. + /// + /// This constructor has been deprecated in constriction version 0.4.0, and it will be + /// removed in constriction version 0.5.0. + /// + /// # Upgrade Instructions + /// + /// Call either of the following two constructors instead: + /// - [`from_symbols_and_floating_point_probabilities_perfect`] (referred to as + /// `..._perfect` in the following); or + /// - [`from_symbols_and_floating_point_probabilities_fast`] (referred to as `..._fast` + /// in the following). + /// + /// Both constructors approximate the given (floating-point) probability distribution in + /// fixed point arithmetic, and both construct a valid (exactly invertible) model that + /// is guaranteed to assign a nonzero probability to all symbols. The `..._perfect` + /// constructor finds the best possible approximation of the provided fixed-point + /// probability distribution (thus leading to the lowest bit rates), while the + /// `..._fast` constructor is faster but may find a *slightly* imperfect approximation. + /// Note that, since lookup models use a smaller fixed-point `PRECISION` than other + /// models (e.g., [`NonContiguousCategoricalDecoderModel`]), the difference in bit rate + /// between the two models is more pronounced. + /// + /// Note that the `..._fast` constructor breaks binary compatibility with `constriction` + /// version <= 0.3.5. If you need to be able to exchange binary compressed data with a + /// program that uses a lookup decoder model or a categorical entropy model from + /// `constriction` version <= 0.3.5, then call + /// [`from_symbols_and_floating_point_probabilities_perfect`]. + /// + /// # Compatibility Table + /// + /// (Lookup decoder models can only be used for decoding; in the following table, + /// "encoding" refers to [`NonContiguousCategoricalEncoderModel`]) + /// + /// | constructor used for encoding →
↓ constructor used for decoding ↓ | [legacy](crate::stream::model::NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities) | [`..._perfect`](crate::stream::model::NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_perfect) | [`..._fast`](crate::stream::model::NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_fast) | + /// | --------------------: | --------------- | --------------- | --------------- | + /// | **legacy (this one)** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._perfect`]** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._fast`]** | ❌ incompatible | ❌ incompatible | ✅ compatible | /// - /// TODO: example + /// [`from_symbols_and_floating_point_probabilities_perfect`]: + /// Self::from_symbols_and_floating_point_probabilities_perfect + /// [`..._perfect`]: Self::from_symbols_and_floating_point_probabilities_perfect + /// [`from_symbols_and_floating_point_probabilities_fast`]: + /// Self::from_symbols_and_floating_point_probabilities_fast + /// [`..._fast`]: Self::from_symbols_and_floating_point_probabilities_fast + /// [`NonContiguousCategoricalEncoderModel`]: + /// crate::stream::model::NonContiguousCategoricalEncoderModel + /// [`NonContiguousCategoricalDecoderModel`]: + /// crate::stream::model::NonContiguousCategoricalDecoderModel #[deprecated( since = "0.4.0", note = "Please use `from_symbols_and_floating_point_probabilities_fast` or \ @@ -128,9 +404,12 @@ where ) } - /// Create a `LookupDecoderModel` over arbitrary symbols. + /// Create a `NonContiguousLookupDecoderModel` from pre-calculated fixed-point + /// probabilities. /// - /// TODO: example + /// # Example + /// + /// See [type level documentation](Self). #[allow(clippy::result_unit_err)] pub fn from_symbols_and_nonzero_fixed_point_probabilities( symbols: S, @@ -271,8 +550,8 @@ where /// Converts a lookup model into a non-lookup model. /// - /// This drops the lookup table, so its only conceivable use case is to - /// free memory while still holding on to the model. + /// This drops the lookup table, so its only conceivable use case is to free memory + /// while still holding on to (a slower variant of) the model. /// /// # See also /// diff --git a/src/stream/model/categorical/non_contiguous.rs b/src/stream/model/categorical/non_contiguous.rs index 6d81659..c8e5f9b 100644 --- a/src/stream/model/categorical/non_contiguous.rs +++ b/src/stream/model/categorical/non_contiguous.rs @@ -252,7 +252,10 @@ where /// /// # Compatibility Table /// - /// | constructor used for encoding →
↓ constructor used for decoding ↓ | legacy
(this one) | [`..._perfect`] | [`..._fast`] | + /// (In the following table, "encoding" refers to + /// [`NonContiguousCategoricalEncoderModel`]) + /// + /// | constructor used for encoding →
↓ constructor used for decoding ↓ | [legacy](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities) | [`..._perfect`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_perfect) | [`..._fast`](NonContiguousCategoricalEncoderModel::from_symbols_and_floating_point_probabilities_fast) | /// | --------------------: | --------------- | --------------- | --------------- | /// | **legacy (this one)** | ✅ compatible | ✅ compatible | ❌ incompatible | /// | **[`..._perfect`]** | ✅ compatible | ✅ compatible | ❌ incompatible | @@ -890,11 +893,14 @@ where /// /// # Compatibility Table /// + /// (In the following table, "encoding" refers to + /// [`NonContiguousCategoricalDecoderModel`]) + /// /// | constructor used for encoding →
↓ constructor used for decoding ↓ | legacy
(this one) | [`..._perfect`] | [`..._fast`] | /// | --------------------: | --------------- | --------------- | --------------- | - /// | **legacy (this one)** | ✅ compatible | ✅ compatible | ❌ incompatible | - /// | **[`..._perfect`]** | ✅ compatible | ✅ compatible | ❌ incompatible | - /// | **[`..._fast`]** | ❌ incompatible | ❌ incompatible | ✅ compatible | + /// | **[legacy](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities)** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._perfect`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_perfect)** | ✅ compatible | ✅ compatible | ❌ incompatible | + /// | **[`..._fast`](NonContiguousCategoricalDecoderModel::from_symbols_and_floating_point_probabilities_fast)** | ❌ incompatible | ❌ incompatible | ✅ compatible | /// /// [`from_symbols_and_floating_point_probabilities_perfect`]: /// Self::from_symbols_and_floating_point_probabilities_perfect