Add comments

a16z · Nov 7, 2024 · 854c0ab · 854c0ab
1 parent cdedfd3
commit 854c0ab
Show file tree

Hide file tree

Showing 13 changed files with 347 additions and 90 deletions.
diff --git a/jolt-core/Cargo.toml b/jolt-core/Cargo.toml
@@ -87,7 +87,7 @@ name = "commit"
 harness = false
 
 [[bench]]
-name = "polynomial"
+name = "binding"
 harness = false
 
 [[bench]]

diff --git a/jolt-core/benches/polynomial.rs → jolt-core/benches/binding.rs b/jolt-core/benches/polynomial.rs → jolt-core/benches/binding.rs
diff --git a/jolt-core/src/jolt/instruction/div.rs b/jolt-core/src/jolt/instruction/div.rs
@@ -76,7 +76,7 @@ impl<const WORD_SIZE: usize> VirtualInstructionSequence for DIVInstruction<WORD_
                 rd_post_val: Some(q),
             },
             memory_state: None,
-            advice_value: Some(quotient), // What should advice value be here?
+            advice_value: Some(quotient),
         });
 
         let r = ADVICEInstruction::<WORD_SIZE>(remainder).lookup_entry();
@@ -96,7 +96,7 @@ impl<const WORD_SIZE: usize> VirtualInstructionSequence for DIVInstruction<WORD_
                 rd_post_val: Some(r),
             },
             memory_state: None,
-            advice_value: Some(remainder), // What should advice value be here?
+            advice_value: Some(remainder),
         });
 
         let is_valid: u64 = AssertValidSignedRemainderInstruction::<WORD_SIZE>(r, y).lookup_entry();

diff --git a/jolt-core/src/jolt/vm/instruction_lookups.rs b/jolt-core/src/jolt/vm/instruction_lookups.rs
@@ -256,6 +256,7 @@ where
             (memory_flags, read_write_leaves),
             (
                 init_final_leaves,
+                // # init = # subtables; # final = # memories
                 Self::NUM_SUBTABLES + preprocessing.num_memories,
             ),
         )
@@ -447,8 +448,10 @@ where
             .collect()
     }
 
-    /// Checks that the claimed multiset hashes (output by grand product) are consistent with the
-    /// openings given by `read_write_openings` and `init_final_openings`.
+    /// Checks that the claims output by the grand products are consistent with the openings of
+    /// the polynomials comprising the input layers.
+    ///
+    ///
     fn check_fingerprints(
         preprocessing: &Self::Preprocessing,
         read_write_claim: F,
@@ -486,22 +489,55 @@ where
             .iter()
             .map(|tuple| tuple.3.unwrap())
             .collect();
+        // For the toggled grand product, the flags in the input layer are padded with 1s,
+        // while the fingerprints are padded with 0s, so that all subsequent padding layers
+        // are all 0s.
+        // To see why this is the case, observe that the input layer's gates will output
+        // flag * fingerprint + 1 - flag = 1 * 0 + 1 - 1 = 0.
+        // Then all subsequent layers will output gate values 0 * 0 = 0.
         read_write_flags.resize(read_write_flags.len().next_power_of_two(), F::one());
+
+        // Let r' := r_read_write_batch_index
+        // and r'':= r_read_write_opening.
+        //
+        // Let k denote the batch size.
+        //
+        // The `read_write_flags` vector above contains the evaluations of the k individual
+        // flag MLEs at r''.
+        //
+        // What we want to compute is the evaluation of the MLE of ALL the flags, concatenated together,
+        // at (r', r''):
+        //
+        // flags(r', r'') = \sum_j eq(r', j) * flag_j(r'')
+        //
+        // where flag_j(r'') is what we already have in `read_write_flags`.
         let combined_flags: F = read_write_flags
             .iter()
             .zip(EqPolynomial::evals(r_read_write_batch_index).iter())
             .map(|(flag, eq_eval)| *flag * eq_eval)
             .sum();
+        // Similar thing for the fingerprints:
+        //
+        // fingerprints(r', r'') = \sum_j eq(r', j) * (t_j(r'') * \gamma^2 + v_j(r'') * \gamma + a_j(r'') - \tau)
         let combined_read_write_fingerprint: F = read_write_tuples
             .iter()
             .zip(EqPolynomial::evals(r_read_write_batch_index).iter())
             .map(|(tuple, eq_eval)| Self::fingerprint(tuple, gamma, tau) * eq_eval)
             .sum();
+
+        // Now we combine flags(r', r'') and fingerprints(r', r'') to obtain the evaluation of the
+        // multi-*quadratic* extension W of the input layer at (r', r'')
+        //
+        // W(r', r'') = flags(r', r'') * fingerprints(r', r'') + 1 - flags(r', r'')
+        //
+        // and this should equal the claim output by the read-write grand product.
         assert_eq!(
             combined_flags * combined_read_write_fingerprint + F::one() - combined_flags,
             read_write_claim
         );
 
+        // The init-final grand product isn't toggled using flags (it's just a "normal" grand product)
+        // so we combine the openings the normal way.
         let combined_init_final_fingerprint: F = init_final_tuples
             .iter()
             .zip(EqPolynomial::evals(r_init_final_batch_index).iter())

diff --git a/jolt-core/src/jolt/vm/mod.rs b/jolt-core/src/jolt/vm/mod.rs
@@ -245,6 +245,12 @@ impl<F: JoltField> JoltPolynomials<F> {
             .zip(trace_comitments.into_iter())
             .for_each(|(dest, src)| *dest = src);
 
+        println!(
+            "# commitments: {} + {}",
+            commitments.read_write_values().len(),
+            commitments.init_final_values().len(),
+        );
+
         commitments.bytecode.t_final =
             PCS::commit(&self.bytecode.t_final, &preprocessing.generators);
         (
@@ -366,6 +372,7 @@ where
     ) {
         let trace_length = trace.len();
         let padded_trace_length = trace_length.next_power_of_two();
+        println!("Trace length: {}", trace_length);
 
         JoltTraceStep::pad(&mut trace);
 

diff --git a/jolt-core/src/lasso/memory_checking.rs b/jolt-core/src/lasso/memory_checking.rs
@@ -199,7 +199,7 @@ pub trait Initializable<T, Preprocessing>: StructuredPolynomialData<T> + Default
     }
 }
 
-// Empty struct to represent that no preprocessing data is used.
+/// Empty struct to represent that no preprocessing data is used.
 pub struct NoPreprocessing;
 
 pub trait MemoryCheckingProver<F, PCS, ProofTranscript>
@@ -254,6 +254,9 @@ where
         let init_final_batch_size =
             multiset_hashes.init_hashes.len() + multiset_hashes.final_hashes.len();
 
+        // For a batch size of k, the first log2(k) elements of `r_read_write`/`r_init_final`
+        // form the point at which the output layer's MLE is evaluated. The remaining elements
+        // then form the point at which the leaf layer's polynomials are evaluated.
         let (_, r_read_write_opening) =
             r_read_write.split_at(read_write_batch_size.next_power_of_two().log_2());
         let (_, r_init_final_opening) =
@@ -569,6 +572,9 @@ where
             transcript,
             Some(pcs_setup),
         );
+        // For a batch size of k, the first log2(k) elements of `r_read_write`/`r_init_final`
+        // form the point at which the output layer's MLE is evaluated. The remaining elements
+        // then form the point at which the leaf layer's polynomials are evaluated.
         let (r_read_write_batch_index, r_read_write_opening) =
             r_read_write.split_at(read_write_batch_size.next_power_of_two().log_2());
 
@@ -665,8 +671,8 @@ where
         exogenous_openings: &Self::ExogenousOpenings,
     ) -> Vec<Self::MemoryTuple>;
 
-    /// Checks that the claimed multiset hashes (output by grand product) are consistent with the
-    /// openings given by `read_write_openings` and `init_final_openings`.
+    /// Checks that the claims output by the grand products are consistent with the openings of
+    /// the polynomials comprising the input layers.
     fn check_fingerprints(
         preprocessing: &Self::Preprocessing,
         read_write_claim: F,
@@ -712,6 +718,8 @@ where
             r_init_final_batch_index.len().pow2()
         );
 
+        // `r_read_write_batch_index`/`r_init_final_batch_index` are used to
+        // combine the k claims in the batch into a single claim.
         let combined_read_write_hash: F = read_write_hashes
             .iter()
             .zip(EqPolynomial::evals(r_read_write_batch_index).iter())

diff --git a/jolt-core/src/lib.rs b/jolt-core/src/lib.rs
@@ -10,8 +10,6 @@
 #![allow(long_running_const_eval)]
 #![allow(clippy::len_without_is_empty)]
 #![allow(type_alias_bounds)]
-#![feature(coroutines)]
-#![feature(iter_from_coroutine)]
 
 #[cfg(feature = "host")]
 pub mod benches;

diff --git a/jolt-core/src/poly/dense_interleaved_poly.rs b/jolt-core/src/poly/dense_interleaved_poly.rs
@@ -12,10 +12,26 @@ use rayon::{prelude::*, slice::Chunks};
 use super::dense_mlpoly::DensePolynomial;
 use super::{split_eq_poly::SplitEqPolynomial, unipoly::UniPoly};
 
+/// Represents a single layer of a grand product circuit.
+/// A layer is assumed to be arranged in "interleaved" order, i.e. the natural
+/// order in the visual representation of the circuit:
+///      Λ        Λ        Λ        Λ
+///     / \      / \      / \      /  \
+///   L0   R0  L1   R1  L2   R2  L3   R3   <- This is layer would be represented as [L0, R0, L1, R1, L2, R2, L3, R3]
+///                                           (as opposed to e.g. [L0, L1, L2, L3, R0, R1, R2, R3])
 #[derive(Default, Debug, Clone)]
 pub struct DenseInterleavedPolynomial<F: JoltField> {
+    /// The coefficients for the "left" and "right" polynomials comprising a
+    /// dense grand product layer.
+    /// The coefficients are in interleaved order:
+    /// [L0, R0, L1, R1, L2, R2, L3, R3, ...]
     pub(crate) coeffs: Vec<F>,
+    /// The effective length of `coeffs`. When binding, we update this length
+    /// instead of truncating `coeffs`, which incurs the cost of dropping the
+    /// truncated values.
     len: usize,
+    /// A reused buffer where bound values are written to during `bind`.
+    /// With every bind, `coeffs` and `binding_scratch_space` are swapped.
     binding_scratch_space: Vec<F>,
 }
 
@@ -36,7 +52,7 @@ impl<F: JoltField> DenseInterleavedPolynomial<F> {
         Self {
             coeffs,
             len,
-            binding_scratch_space: unsafe_allocate_zero_vec(len),
+            binding_scratch_space: unsafe_allocate_zero_vec(len.next_multiple_of(4) / 2),
         }
     }
 
@@ -87,11 +103,8 @@ impl<F: JoltField> DenseInterleavedPolynomial<F> {
 }
 
 impl<F: JoltField> Bindable<F> for DenseInterleavedPolynomial<F> {
-    /// Incrementally binds a variable of this batched layer's polynomials.
-    /// Even though each layer is backed by a single Vec<F>, it represents two polynomials
-    /// one for the left nodes in the circuit, one for the right nodes in the circuit.
-    /// These two polynomials' coefficients are interleaved into one Vec<F>. To preserve
-    /// this interleaved order, we bind values like this:
+    /// Incrementally binds a variable of the interleaved left and right polynomials.
+    /// To preserve the interleaved order of coefficients, we bind values like this:
     ///   0'  1'     2'  3'
     ///   |\ |\      |\ |\
     ///   | \| \     | \| \
@@ -105,6 +118,9 @@ impl<F: JoltField> Bindable<F> for DenseInterleavedPolynomial<F> {
         let (mut left_before_binding, mut right_before_binding) = self.uninterleave();
 
         let padded_len = self.len.next_multiple_of(4);
+        // In order to parallelize binding while obeying Rust ownership rules, we
+        // must write to a different vector than we are reading from. `binding_scratch_space`
+        // serves this purpose.
         self.binding_scratch_space
             .par_chunks_mut(2)
             .zip(self.coeffs[..self.len].par_chunks(4))
@@ -121,6 +137,8 @@ impl<F: JoltField> Bindable<F> for DenseInterleavedPolynomial<F> {
             });
 
         self.len = padded_len / 2;
+        // Point `self.coeffs` to the bound coefficients, and `self.coeffs` will serve as the
+        // binding scratch space in the next invocation of `bind`.
         std::mem::swap(&mut self.coeffs, &mut self.binding_scratch_space);
 
         #[cfg(test)]
@@ -155,13 +173,6 @@ pub fn bind_left_and_right<F: JoltField>(left: &mut Vec<F>, right: &mut Vec<F>,
     *right = right_poly.Z[..right.len() / 2].to_vec();
 }
 
-/// Represents a single layer of a batched grand product circuit.
-/// A layer is assumed to be arranged in "interleaved" order, i.e. the natural
-/// order in the visual representation of the circuit:
-///      Λ        Λ        Λ        Λ
-///     / \      / \      / \      / \
-///   L0   R0  L1   R1  L2   R2  L3   R3   <- This is layer would be represented as [L0, R0, L1, R1, L2, R2, L3, R3]
-///                                           (as opposed to e.g. [L0, L1, L2, L3, R0, R1, R2, R3])
 impl<F: JoltField, ProofTranscript: Transcript> BatchedGrandProductLayer<F, ProofTranscript>
     for DenseInterleavedPolynomial<F>
 {
@@ -190,15 +201,20 @@ impl<F: JoltField, ProofTranscript: Transcript> BatchedCubicSumcheck<F, ProofTra
     ///
     /// Computing these evaluations requires processing pairs of adjacent coefficients of
     /// `eq`, `left`, and `right`.
-    /// Recall that the `left` and `right` polynomials are interleaved in each layer of `self.layers`,
-    /// so we process each layer 4 values at a time:
-    ///                  layer = [L, R, L, R, L, R, ...]
+    /// Recall that the `left` and `right` polynomials are interleaved in `self.coeffs`,
+    /// so we process 4 values at a time:
+    ///                 coeffs = [L, R, L, R, L, R, ...]
     ///                           |  |  |  |
     ///    left(0, 0, 0, ..., x_b=0) |  |  right(0, 0, 0, ..., x_b=1)
     ///     right(0, 0, 0, ..., x_b=0)  left(0, 0, 0, ..., x_b=1)
     #[tracing::instrument(skip_all, name = "DenseInterleavedPolynomial::compute_cubic")]
     fn compute_cubic(&self, eq_poly: &SplitEqPolynomial<F>, previous_round_claim: F) -> UniPoly<F> {
+        // We use the Dao-Thaler optimization for the EQ polynomial, so there are two cases we
+        // must handle. For details, refer to Section 2.2 of https://eprint.iacr.org/2024/1210.pdf
         let cubic_evals = if eq_poly.E1_len == 1 {
+            // If `eq_poly.E1` has been fully bound, we compute the cubic polynomial as we
+            // would without the Dao-Thaler optimization, using the standard linear-time
+            // sumcheck algorithm.
             self.par_chunks(4)
                 .zip(eq_poly.E2.par_chunks(2))
                 .map(|(layer_chunk, eq_chunk)| {
@@ -238,6 +254,22 @@ impl<F: JoltField, ProofTranscript: Transcript> BatchedCubicSumcheck<F, ProofTra
                     |sum, evals| (sum.0 + evals.0, sum.1 + evals.1, sum.2 + evals.2),
                 )
         } else {
+            // If `eq_poly.E1` has NOT been fully bound, we compute the cubic polynomial
+            // using the nested summation approach described in Section 2.2 of https://eprint.iacr.org/2024/1210.pdf
+            //
+            // Note, however, that we reverse the inner/outer summation compared to the
+            // description in the paper. I.e. instead of:
+            //
+            // \sum_x1 ((1 - j) * E1[0, x1] + j * E1[1, x1]) * (\sum_x2 E2[x2] * \prod_k ((1 - j) * P_k(0 || x1 || x2) + j * P_k(1 || x1 || x2)))
+            //
+            // we do:
+            //
+            // \sum_x2 E2[x2] * (\sum_x1 ((1 - j) * E1[0, x1] + j * E1[1, x1]) * \prod_k ((1 - j) * P_k(0 || x1 || x2) + j * P_k(1 || x1 || x2)))
+            //
+            // because it has better memory locality.
+
+            // We start by computing the E1 evals:
+            // (1 - j) * E1[0, x1] + j * E1[1, x1]
             let E1_evals: Vec<_> = eq_poly.E1[..eq_poly.E1_len]
                 .par_chunks(2)
                 .map(|E1_chunk| {
@@ -254,6 +286,8 @@ impl<F: JoltField, ProofTranscript: Transcript> BatchedCubicSumcheck<F, ProofTra
                 .par_iter()
                 .zip(self.par_chunks(chunk_size))
                 .map(|(E2_eval, P_x2)| {
+                    // The for-loop below corresponds to the inner sum:
+                    // \sum_x1 ((1 - j) * E1[0, x1] + j * E1[1, x1]) * \prod_k ((1 - j) * P_k(0 || x1 || x2) + j * P_k(1 || x1 || x2))
                     let mut inner_sum = (F::zero(), F::zero(), F::zero());
                     for (E1_evals, P_chunk) in E1_evals.iter().zip(P_x2.chunks(4)) {
                         let left = (
@@ -278,6 +312,7 @@ impl<F: JoltField, ProofTranscript: Transcript> BatchedCubicSumcheck<F, ProofTra
                         inner_sum.2 += E1_evals.2 * left_eval_3 * right_eval_3;
                     }
 
+                    // Multiply the inner sum by E2[x2]
                     (
                         *E2_eval * inner_sum.0,
                         *E2_eval * inner_sum.1,