diff --git a/src/lib.rs b/src/lib.rs index c6d3f62..e948e3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,19 +46,19 @@ //! [ronomon/deduplication](https://github.com/ronomon/deduplication) //! repository, written by Joran Dirk Greef. That variation makes several //! changes to the original algorithm, primarily to accomodate JavaScript. The -//! Rust version of this variation is found in the `ronomon` module in this +//! Rust version of this variation is found in the [`ronomon`] module in this //! crate. //! //! For a canonical implementation of the algorithm as described in the 2016 -//! paper, see the `v2016` crate. +//! paper, see the [`v2016`] module. //! //! For a canonical implementation of the algorithm as described in the 2020 -//! paper, see the `v2020` crate. This implementation produces identical cut +//! paper, see the [`v2020`] module. This implementation produces identical cut //! points as the 2016 version, but does so a bit faster. //! -//! If you are using this crate for the first time, the `v2020` implementation +//! If you are using this crate for the first time, the [`v2020`] implementation //! would be the most appropriate. It uses 64-bit hash values and tends to be -//! faster than both the `ronomon` and `v2016` versions. +//! faster than both the [`ronomon`] and [`v2016`] versions. //! //! ## Examples //! @@ -116,7 +116,7 @@ //! ## Large Data //! //! If processing very large files, the streaming version of the chunkers in the -//! `v2016` and `v2020` modules may be a suitable approach. They both allocate a +//! [`v2016`] and [`v2020`] modules may be a suitable approach. They both allocate a //! byte vector equal to the maximum chunk size, draining and resizing the //! vector as chunks are found. However, using a crate such as `memmap2` can be //! significantly faster than the streaming chunkers. See the examples in the diff --git a/src/v2020/async_stream_cdc.rs b/src/v2020/async_stream_cdc.rs index 276c7c7..10cc376 100644 --- a/src/v2020/async_stream_cdc.rs +++ b/src/v2020/async_stream_cdc.rs @@ -23,15 +23,15 @@ use async_stream::try_stream; /// An async-streamable version of the FastCDC chunker implementation from 2020 /// with streaming support. /// -/// Use `new` to construct an instance, and then `as_stream` to produce an async -/// [Stream] of the chunks. +/// Use `new` to construct an instance, and then [`as_stream`](AsyncStreamCDC::as_stream) +/// to produce an async [Stream] of the chunks. /// /// Both `futures` and `tokio`-based [AsyncRead] inputs are supported via /// feature flags. But, if necessary you can also use the /// [`async_compat`](https://docs.rs/async-compat/latest/async_compat/) crate to /// adapt your inputs as circumstances may require. /// -/// Note that this struct allocates a `Vec` of `max_size` bytes to act as a +/// Note that this struct allocates a [`Vec`] of `max_size` bytes to act as a /// buffer when reading from the source and finding chunk boundaries. /// /// ```no_run @@ -80,7 +80,7 @@ pub struct AsyncStreamCDC { impl AsyncStreamCDC { /// - /// Construct a `StreamCDC` that will process bytes from the given source. + /// Construct a [`AsyncStreamCDC`] that will process bytes from the given source. /// /// Uses chunk size normalization level 1 by default. /// @@ -89,7 +89,7 @@ impl AsyncStreamCDC { } /// - /// Create a new `StreamCDC` with the given normalization level. + /// Create a new [`AsyncStreamCDC`] with the given normalization level. /// pub fn with_level( source: R, diff --git a/src/v2020/mod.rs b/src/v2020/mod.rs index f651977..b97ee9e 100644 --- a/src/v2020/mod.rs +++ b/src/v2020/mod.rs @@ -14,23 +14,23 @@ //! Apple M1 show about a 20% improvement, but results may vary depending on CPU //! architecture, file size, chunk size, etc. //! -//! There are two ways in which to use the `FastCDC` struct defined in this -//! module. One is to simply invoke `cut()` while managing your own `start` and -//! `remaining` values. The other is to use the struct as an `Iterator` that -//! yields `Chunk` structs which represent the offset and size of the chunks. -//! Note that attempting to use both `cut()` and `Iterator` on the same -//! `FastCDC` instance will yield incorrect results. +//! There are two ways in which to use the [`FastCDC`] struct defined in this +//! module. One is to simply invoke [`cut()`](FastCDC::cut) while managing your own `start` and +//! `remaining` values. The other is to use the struct as an [`Iterator`] that +//! yields [`Chunk`] structs which represent the offset and size of the chunks. +//! Note that attempting to use both [`cut()`](FastCDC::cut) and [`Iterator`] on the same +//! [`FastCDC`] instance will yield incorrect results. //! -//! Note that the `cut()` function returns the 64-bit hash of the chunk, which +//! Note that the [`cut()`] function returns the 64-bit hash of the chunk, which //! may be useful in scenarios involving chunk size prediction using historical //! data, such as in RapidCDC or SuperCDC. This hash value is also given in the -//! `hash` field of the `Chunk` struct. While this value has rather low entropy, +//! `hash` field of the [`Chunk`] struct. While this value has rather low entropy, //! it is computationally cost-free and can be put to some use with additional //! record keeping. //! -//! The `StreamCDC` implementation is similar to `FastCDC` except that it will -//! read data from a `Read` into an internal buffer of `max_size` and produce -//! `ChunkData` values from the `Iterator`. +//! The [`StreamCDC`] implementation is similar to [`FastCDC`] except that it will +//! read data from a [`Read`] into an internal buffer of `max_size` and produce +//! [`ChunkData`] values from the [`Iterator`]. use std::fmt; use std::io::Read; @@ -300,7 +300,7 @@ pub fn cut( /// /// Note that lower levels of normalization will result in a larger range of /// generated chunk sizes. It may be beneficial to widen the minimum/maximum -/// chunk size values given to the `FastCDC` constructor in that case. +/// chunk size values given to the [`FastCDC`] constructor in that case. /// /// Note that higher levels of normalization may result in the final chunk of /// data being smaller than the minimum chunk size, which results in a hash @@ -336,7 +336,7 @@ impl fmt::Display for Normalization { } /// -/// Represents a chunk returned from the FastCDC iterator. +/// Represents a chunk returned from the [`FastCDC`] iterator. /// #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] pub struct Chunk { @@ -351,8 +351,8 @@ pub struct Chunk { /// /// The FastCDC chunker implementation from 2020. /// -/// Use `new` to construct an instance, and then iterate over the `Chunk`s via -/// the `Iterator` trait. +/// Use `new` to construct an instance, and then iterate over the [`Chunk`]s via +/// the [`Iterator`] trait. /// /// This example reads a file into memory and splits it into chunks that are /// roughly 16 KB in size. The minimum and maximum sizes are the absolute limit @@ -388,7 +388,7 @@ pub struct FastCDC<'a> { impl<'a> FastCDC<'a> { /// - /// Construct a `FastCDC` that will process the given slice of bytes. + /// Construct a [`FastCDC`] that will process the given slice of bytes. /// /// Uses chunk size normalization level 1 by default. /// @@ -397,7 +397,7 @@ impl<'a> FastCDC<'a> { } /// - /// Create a new `FastCDC` with the given normalization level. + /// Create a new [`FastCDC`] with the given normalization level. /// pub fn with_level( source: &'a [u8], @@ -494,7 +494,7 @@ impl<'a> Iterator for FastCDC<'a> { } /// -/// The error type returned from the `StreamCDC` iterator. +/// The error type returned from the [`StreamCDC`] iterator. /// #[derive(Debug)] pub enum Error { @@ -531,7 +531,7 @@ impl From for std::io::Error { } /// -/// Represents a chunk returned from the StreamCDC iterator. +/// Represents a chunk returned from the [`StreamCDC`] iterator. /// #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub struct ChunkData { @@ -548,10 +548,10 @@ pub struct ChunkData { /// /// The FastCDC chunker implementation from 2020 with streaming support. /// -/// Use `new` to construct an instance, and then iterate over the `ChunkData`s -/// via the `Iterator` trait. +/// Use `new` to construct an instance, and then iterate over the [`ChunkData`]s +/// via the [`Iterator`] trait. /// -/// Note that this struct allocates a `Vec` of `max_size` bytes to act as a +/// Note that this struct allocates a [`Vec`] of `max_size` bytes to act as a /// buffer when reading from the source and finding chunk boundaries. /// /// ```no_run @@ -589,7 +589,7 @@ pub struct StreamCDC { impl StreamCDC { /// - /// Construct a `StreamCDC` that will process bytes from the given source. + /// Construct a [`StreamCDC`] that will process bytes from the given source. /// /// Uses chunk size normalization level 1 by default. /// @@ -598,7 +598,7 @@ impl StreamCDC { } /// - /// Create a new `StreamCDC` with the given normalization level. + /// Create a new [`StreamCDC`] with the given normalization level. /// pub fn with_level( source: R,