From 854c0abece524a114b1e442213fce2af1f11aefe Mon Sep 17 00:00:00 2001 From: Michael Zhu Date: Thu, 7 Nov 2024 17:21:55 -0500 Subject: [PATCH] Add comments --- jolt-core/Cargo.toml | 2 +- .../benches/{polynomial.rs => binding.rs} | 0 jolt-core/src/jolt/instruction/div.rs | 4 +- jolt-core/src/jolt/vm/instruction_lookups.rs | 40 +++++- jolt-core/src/jolt/vm/mod.rs | 7 + jolt-core/src/lasso/memory_checking.rs | 14 +- jolt-core/src/lib.rs | 2 - jolt-core/src/poly/dense_interleaved_poly.rs | 67 +++++++--- jolt-core/src/poly/sparse_interleaved_poly.rs | 121 ++++++++++++++---- jolt-core/src/subprotocols/grand_product.rs | 30 +++-- .../src/subprotocols/grand_product_quarks.rs | 64 ++++++--- .../src/subprotocols/sparse_grand_product.rs | 84 ++++++++++-- jolt-core/src/utils/sol_types.rs | 2 +- 13 files changed, 347 insertions(+), 90 deletions(-) rename jolt-core/benches/{polynomial.rs => binding.rs} (100%) diff --git a/jolt-core/Cargo.toml b/jolt-core/Cargo.toml index 15aedbd09..a08331b22 100644 --- a/jolt-core/Cargo.toml +++ b/jolt-core/Cargo.toml @@ -87,7 +87,7 @@ name = "commit" harness = false [[bench]] -name = "polynomial" +name = "binding" harness = false [[bench]] diff --git a/jolt-core/benches/polynomial.rs b/jolt-core/benches/binding.rs similarity index 100% rename from jolt-core/benches/polynomial.rs rename to jolt-core/benches/binding.rs diff --git a/jolt-core/src/jolt/instruction/div.rs b/jolt-core/src/jolt/instruction/div.rs index 616f17d85..9b7e7e601 100644 --- a/jolt-core/src/jolt/instruction/div.rs +++ b/jolt-core/src/jolt/instruction/div.rs @@ -76,7 +76,7 @@ impl VirtualInstructionSequence for DIVInstruction(remainder).lookup_entry(); @@ -96,7 +96,7 @@ impl VirtualInstructionSequence for DIVInstruction(r, y).lookup_entry(); diff --git a/jolt-core/src/jolt/vm/instruction_lookups.rs b/jolt-core/src/jolt/vm/instruction_lookups.rs index abb954dce..846929bc1 100644 --- a/jolt-core/src/jolt/vm/instruction_lookups.rs +++ b/jolt-core/src/jolt/vm/instruction_lookups.rs @@ -256,6 +256,7 @@ where (memory_flags, read_write_leaves), ( init_final_leaves, + // # init = # subtables; # final = # memories Self::NUM_SUBTABLES + preprocessing.num_memories, ), ) @@ -447,8 +448,10 @@ where .collect() } - /// Checks that the claimed multiset hashes (output by grand product) are consistent with the - /// openings given by `read_write_openings` and `init_final_openings`. + /// Checks that the claims output by the grand products are consistent with the openings of + /// the polynomials comprising the input layers. + /// + /// fn check_fingerprints( preprocessing: &Self::Preprocessing, read_write_claim: F, @@ -486,22 +489,55 @@ where .iter() .map(|tuple| tuple.3.unwrap()) .collect(); + // For the toggled grand product, the flags in the input layer are padded with 1s, + // while the fingerprints are padded with 0s, so that all subsequent padding layers + // are all 0s. + // To see why this is the case, observe that the input layer's gates will output + // flag * fingerprint + 1 - flag = 1 * 0 + 1 - 1 = 0. + // Then all subsequent layers will output gate values 0 * 0 = 0. read_write_flags.resize(read_write_flags.len().next_power_of_two(), F::one()); + + // Let r' := r_read_write_batch_index + // and r'':= r_read_write_opening. + // + // Let k denote the batch size. + // + // The `read_write_flags` vector above contains the evaluations of the k individual + // flag MLEs at r''. + // + // What we want to compute is the evaluation of the MLE of ALL the flags, concatenated together, + // at (r', r''): + // + // flags(r', r'') = \sum_j eq(r', j) * flag_j(r'') + // + // where flag_j(r'') is what we already have in `read_write_flags`. let combined_flags: F = read_write_flags .iter() .zip(EqPolynomial::evals(r_read_write_batch_index).iter()) .map(|(flag, eq_eval)| *flag * eq_eval) .sum(); + // Similar thing for the fingerprints: + // + // fingerprints(r', r'') = \sum_j eq(r', j) * (t_j(r'') * \gamma^2 + v_j(r'') * \gamma + a_j(r'') - \tau) let combined_read_write_fingerprint: F = read_write_tuples .iter() .zip(EqPolynomial::evals(r_read_write_batch_index).iter()) .map(|(tuple, eq_eval)| Self::fingerprint(tuple, gamma, tau) * eq_eval) .sum(); + + // Now we combine flags(r', r'') and fingerprints(r', r'') to obtain the evaluation of the + // multi-*quadratic* extension W of the input layer at (r', r'') + // + // W(r', r'') = flags(r', r'') * fingerprints(r', r'') + 1 - flags(r', r'') + // + // and this should equal the claim output by the read-write grand product. assert_eq!( combined_flags * combined_read_write_fingerprint + F::one() - combined_flags, read_write_claim ); + // The init-final grand product isn't toggled using flags (it's just a "normal" grand product) + // so we combine the openings the normal way. let combined_init_final_fingerprint: F = init_final_tuples .iter() .zip(EqPolynomial::evals(r_init_final_batch_index).iter()) diff --git a/jolt-core/src/jolt/vm/mod.rs b/jolt-core/src/jolt/vm/mod.rs index 11f224f80..b1aac8825 100644 --- a/jolt-core/src/jolt/vm/mod.rs +++ b/jolt-core/src/jolt/vm/mod.rs @@ -245,6 +245,12 @@ impl JoltPolynomials { .zip(trace_comitments.into_iter()) .for_each(|(dest, src)| *dest = src); + println!( + "# commitments: {} + {}", + commitments.read_write_values().len(), + commitments.init_final_values().len(), + ); + commitments.bytecode.t_final = PCS::commit(&self.bytecode.t_final, &preprocessing.generators); ( @@ -366,6 +372,7 @@ where ) { let trace_length = trace.len(); let padded_trace_length = trace_length.next_power_of_two(); + println!("Trace length: {}", trace_length); JoltTraceStep::pad(&mut trace); diff --git a/jolt-core/src/lasso/memory_checking.rs b/jolt-core/src/lasso/memory_checking.rs index 67cb07ecb..9e3fc121e 100644 --- a/jolt-core/src/lasso/memory_checking.rs +++ b/jolt-core/src/lasso/memory_checking.rs @@ -199,7 +199,7 @@ pub trait Initializable: StructuredPolynomialData + Default } } -// Empty struct to represent that no preprocessing data is used. +/// Empty struct to represent that no preprocessing data is used. pub struct NoPreprocessing; pub trait MemoryCheckingProver @@ -254,6 +254,9 @@ where let init_final_batch_size = multiset_hashes.init_hashes.len() + multiset_hashes.final_hashes.len(); + // For a batch size of k, the first log2(k) elements of `r_read_write`/`r_init_final` + // form the point at which the output layer's MLE is evaluated. The remaining elements + // then form the point at which the leaf layer's polynomials are evaluated. let (_, r_read_write_opening) = r_read_write.split_at(read_write_batch_size.next_power_of_two().log_2()); let (_, r_init_final_opening) = @@ -569,6 +572,9 @@ where transcript, Some(pcs_setup), ); + // For a batch size of k, the first log2(k) elements of `r_read_write`/`r_init_final` + // form the point at which the output layer's MLE is evaluated. The remaining elements + // then form the point at which the leaf layer's polynomials are evaluated. let (r_read_write_batch_index, r_read_write_opening) = r_read_write.split_at(read_write_batch_size.next_power_of_two().log_2()); @@ -665,8 +671,8 @@ where exogenous_openings: &Self::ExogenousOpenings, ) -> Vec; - /// Checks that the claimed multiset hashes (output by grand product) are consistent with the - /// openings given by `read_write_openings` and `init_final_openings`. + /// Checks that the claims output by the grand products are consistent with the openings of + /// the polynomials comprising the input layers. fn check_fingerprints( preprocessing: &Self::Preprocessing, read_write_claim: F, @@ -712,6 +718,8 @@ where r_init_final_batch_index.len().pow2() ); + // `r_read_write_batch_index`/`r_init_final_batch_index` are used to + // combine the k claims in the batch into a single claim. let combined_read_write_hash: F = read_write_hashes .iter() .zip(EqPolynomial::evals(r_read_write_batch_index).iter()) diff --git a/jolt-core/src/lib.rs b/jolt-core/src/lib.rs index 7c9091ab6..86559a07c 100644 --- a/jolt-core/src/lib.rs +++ b/jolt-core/src/lib.rs @@ -10,8 +10,6 @@ #![allow(long_running_const_eval)] #![allow(clippy::len_without_is_empty)] #![allow(type_alias_bounds)] -#![feature(coroutines)] -#![feature(iter_from_coroutine)] #[cfg(feature = "host")] pub mod benches; diff --git a/jolt-core/src/poly/dense_interleaved_poly.rs b/jolt-core/src/poly/dense_interleaved_poly.rs index af5a5a48f..361fc5cfe 100644 --- a/jolt-core/src/poly/dense_interleaved_poly.rs +++ b/jolt-core/src/poly/dense_interleaved_poly.rs @@ -12,10 +12,26 @@ use rayon::{prelude::*, slice::Chunks}; use super::dense_mlpoly::DensePolynomial; use super::{split_eq_poly::SplitEqPolynomial, unipoly::UniPoly}; +/// Represents a single layer of a grand product circuit. +/// A layer is assumed to be arranged in "interleaved" order, i.e. the natural +/// order in the visual representation of the circuit: +/// Λ Λ Λ Λ +/// / \ / \ / \ / \ +/// L0 R0 L1 R1 L2 R2 L3 R3 <- This is layer would be represented as [L0, R0, L1, R1, L2, R2, L3, R3] +/// (as opposed to e.g. [L0, L1, L2, L3, R0, R1, R2, R3]) #[derive(Default, Debug, Clone)] pub struct DenseInterleavedPolynomial { + /// The coefficients for the "left" and "right" polynomials comprising a + /// dense grand product layer. + /// The coefficients are in interleaved order: + /// [L0, R0, L1, R1, L2, R2, L3, R3, ...] pub(crate) coeffs: Vec, + /// The effective length of `coeffs`. When binding, we update this length + /// instead of truncating `coeffs`, which incurs the cost of dropping the + /// truncated values. len: usize, + /// A reused buffer where bound values are written to during `bind`. + /// With every bind, `coeffs` and `binding_scratch_space` are swapped. binding_scratch_space: Vec, } @@ -36,7 +52,7 @@ impl DenseInterleavedPolynomial { Self { coeffs, len, - binding_scratch_space: unsafe_allocate_zero_vec(len), + binding_scratch_space: unsafe_allocate_zero_vec(len.next_multiple_of(4) / 2), } } @@ -87,11 +103,8 @@ impl DenseInterleavedPolynomial { } impl Bindable for DenseInterleavedPolynomial { - /// Incrementally binds a variable of this batched layer's polynomials. - /// Even though each layer is backed by a single Vec, it represents two polynomials - /// one for the left nodes in the circuit, one for the right nodes in the circuit. - /// These two polynomials' coefficients are interleaved into one Vec. To preserve - /// this interleaved order, we bind values like this: + /// Incrementally binds a variable of the interleaved left and right polynomials. + /// To preserve the interleaved order of coefficients, we bind values like this: /// 0' 1' 2' 3' /// |\ |\ |\ |\ /// | \| \ | \| \ @@ -105,6 +118,9 @@ impl Bindable for DenseInterleavedPolynomial { let (mut left_before_binding, mut right_before_binding) = self.uninterleave(); let padded_len = self.len.next_multiple_of(4); + // In order to parallelize binding while obeying Rust ownership rules, we + // must write to a different vector than we are reading from. `binding_scratch_space` + // serves this purpose. self.binding_scratch_space .par_chunks_mut(2) .zip(self.coeffs[..self.len].par_chunks(4)) @@ -121,6 +137,8 @@ impl Bindable for DenseInterleavedPolynomial { }); self.len = padded_len / 2; + // Point `self.coeffs` to the bound coefficients, and `self.coeffs` will serve as the + // binding scratch space in the next invocation of `bind`. std::mem::swap(&mut self.coeffs, &mut self.binding_scratch_space); #[cfg(test)] @@ -155,13 +173,6 @@ pub fn bind_left_and_right(left: &mut Vec, right: &mut Vec, *right = right_poly.Z[..right.len() / 2].to_vec(); } -/// Represents a single layer of a batched grand product circuit. -/// A layer is assumed to be arranged in "interleaved" order, i.e. the natural -/// order in the visual representation of the circuit: -/// Λ Λ Λ Λ -/// / \ / \ / \ / \ -/// L0 R0 L1 R1 L2 R2 L3 R3 <- This is layer would be represented as [L0, R0, L1, R1, L2, R2, L3, R3] -/// (as opposed to e.g. [L0, L1, L2, L3, R0, R1, R2, R3]) impl BatchedGrandProductLayer for DenseInterleavedPolynomial { @@ -190,15 +201,20 @@ impl BatchedCubicSumcheck, previous_round_claim: F) -> UniPoly { + // We use the Dao-Thaler optimization for the EQ polynomial, so there are two cases we + // must handle. For details, refer to Section 2.2 of https://eprint.iacr.org/2024/1210.pdf let cubic_evals = if eq_poly.E1_len == 1 { + // If `eq_poly.E1` has been fully bound, we compute the cubic polynomial as we + // would without the Dao-Thaler optimization, using the standard linear-time + // sumcheck algorithm. self.par_chunks(4) .zip(eq_poly.E2.par_chunks(2)) .map(|(layer_chunk, eq_chunk)| { @@ -238,6 +254,22 @@ impl BatchedCubicSumcheck = eq_poly.E1[..eq_poly.E1_len] .par_chunks(2) .map(|E1_chunk| { @@ -254,6 +286,8 @@ impl BatchedCubicSumcheck BatchedCubicSumcheck From<(usize, F)> for SparseCoefficient { } } +/// Represents a single layer of a sparse grand product circuit. +/// A layer is assumed to be arranged in "interleaved" order, i.e. the natural +/// order in the visual representation of the circuit: +/// Λ Λ Λ Λ +/// / \ / \ / \ / \ +/// L0 R0 L1 R1 L2 R2 L3 R3 <- This is layer would be represented as [L0, R0, L1, R1, L2, R2, L3, R3] +/// (as opposed to e.g. [L0, L1, L2, L3, R0, R1, R2, R3]) +/// +/// Where SparseInterleavedPolynomial differs from DenseInterleavedPolynomial +/// is that many of the coefficients are expected to be 1s, so the circuit may +/// look something like this: +/// Λ Λ Λ Λ +/// / \ / \ / \ / \ +/// 1 R0 1 1 L2 1 1 1 +/// +/// Instead of materializing all the 1s, we use a sparse vector to represent the layer, +/// where each element of the vector contains the index and value of a non-one coefficient. +/// So the above layer would be represented by: +/// vec![(1, R0), (4, L2)] (except with `SparseCoefficient` structs, not tuples) +/// +/// In the context of a batched grand product (see sparse_grand_product.rs), there +/// are k of these sparse vectors, where k is the batch size. +/// For the first log2(n) rounds of binding, these k vectors can be processed in parallel. +/// After that, they are "coalesced" into a single DenseInterleavedPolynomial for the +/// remaining rounds of binding. #[derive(Default, Debug, Clone)] pub struct SparseInterleavedPolynomial { + /// A vector of sparse vectors representing the coefficients in a batched grand product + /// layer, where batch size = coeffs.len(). pub(crate) coeffs: Vec>>, + /// Once `coeffs` cannot be bound further (i.e. binding would require processing values + /// in different vectors), we switch to using `coalesced` to represent the grand product + /// layer. See `SparseInterleavedPolynomial::coalesce()`. pub(crate) coalesced: Option>, + /// The length of the layer if it were represented by a single dense vector. pub(crate) dense_len: usize, } @@ -64,6 +95,8 @@ impl SparseInterleavedPolynomial { .for_each(|sparse_coeff| coalesced[sparse_coeff.index] = sparse_coeff.value); Self { dense_len, + // The batch size is implied by coeffs.len(), so we must initialize this + // vector: coeffs: vec![vec![]; batch_size], coalesced: Some(DenseInterleavedPolynomial::new(coalesced)), } @@ -80,19 +113,17 @@ impl SparseInterleavedPolynomial { self.coeffs.len() } + /// Converts a `SparseInterleavedPolynomial` into the equivalent `DensePolynomial`. pub fn to_dense(&self) -> DensePolynomial { if let Some(coalesced) = &self.coalesced { DensePolynomial::new_padded(coalesced.coeffs[..coalesced.len()].to_vec()) } else { - let mut dense_layer = vec![F::one(); self.dense_len]; - for coeff in self.coeffs.iter().flatten() { - dense_layer[coeff.index] = coeff.value; - } - DensePolynomial::new_padded(dense_layer) + DensePolynomial::new_padded(self.coalesce()) } } #[tracing::instrument(skip_all, name = "SparseInterleavedPolynomial::coalesce")] + /// Coalesces a `SparseInterleavedPolynomial` into a `DenseInterleavedPolynomial`. pub fn coalesce(&self) -> Vec { if let Some(coalesced) = &self.coalesced { coalesced.coeffs.clone() @@ -152,6 +183,8 @@ impl SparseInterleavedPolynomial { Self::new(coeffs, left.len() + right.len()) } + /// Uninterleaves a `SparseInterleavedPolynomial` into two vectors + /// containing the left and right coefficients. pub fn uninterleave(&self) -> (Vec, Vec) { if let Some(coalesced) = &self.coalesced { coalesced.uninterleave() @@ -170,12 +203,11 @@ impl SparseInterleavedPolynomial { } } - pub fn par_blocks(&self) -> impl ParallelIterator]> { - self.coeffs - .par_iter() - .flat_map(|segment| segment.par_chunk_by(|x, y| x.index / 4 == y.index / 4)) - } - + /// Computes the grand product layer output by this one. + /// L0' R0' L1' R1' <- Output layer + /// Λ Λ Λ Λ + /// / \ / \ / \ / \ + /// L0 R0 L1 R1 L2 R2 L3 R3 <- This layer #[tracing::instrument(skip_all, name = "SparseInterleavedPolynomial::layer_output")] pub fn layer_output(&self) -> Self { if let Some(coalesced) = &self.coalesced { @@ -211,8 +243,8 @@ impl SparseInterleavedPolynomial { } impl Bindable for SparseInterleavedPolynomial { - /// Incrementally binds a variable of this batched layer's polynomials. - /// If `self` is dense, we bind as in `BatchedDenseGrandProductLayer`, + /// Incrementally binds a variable of the interleaved left and right polynomials. + /// If `self` is coalesced, we invoke `DenseInterleavedPolynomial::bind`, /// processing nodes 4 at a time to preserve the interleaved order: /// 0' 1' 2' 3' /// |\ |\ |\ |\ @@ -221,8 +253,9 @@ impl Bindable for SparseInterleavedPolynomial { /// | |\ \ | |\ \ /// 0 1 2 3 4 5 6 7 /// Left nodes have even indices, right nodes have odd indices. - /// If `self` is sparse, we basically do the same thing but with more - /// cases to check 😬 + /// + /// If `self` is not coalesced, we basically do the same thing but with the + /// sparse vectors in `self.coeffs`, and many more cases to check 😬 #[tracing::instrument(skip_all, name = "SparseInterleavedPolynomial::bind")] fn bind(&mut self, r: F) { #[cfg(test)] @@ -390,13 +423,15 @@ impl BatchedCubicSumcheck, previous_round_claim: F) -> UniPoly { if let Some(coalesced) = &self.coalesced { @@ -407,7 +442,13 @@ impl BatchedCubicSumcheck = eq_poly .E2 .par_chunks(2) @@ -420,7 +461,8 @@ impl BatchedCubicSumcheck BatchedCubicSumcheck BatchedCubicSumcheck = eq_poly.E1[..eq_poly.E1_len] .par_chunks(2) .map(|E1_chunk| { @@ -488,6 +536,7 @@ impl BatchedCubicSumcheck BatchedCubicSumcheck BatchedCubicSumcheck, ProofTranscript: Transcript, { - pub layers: Vec>, + pub gkr_layers: Vec>, pub quark_proof: Option>, } @@ -81,6 +81,8 @@ where ) -> (BatchedGrandProductProof, Vec) { let mut proof_layers = Vec::with_capacity(self.num_layers()); + // Evaluate the MLE of the output layer at a random point to reduce the outputs to + // a single claim. let outputs = self.claimed_outputs(); transcript.append_scalars(&outputs); let output_mle = DensePolynomial::new_padded(outputs); @@ -93,7 +95,7 @@ where ( BatchedGrandProductProof { - layers: proof_layers, + gkr_layers: proof_layers, quark_proof: None, }, r, @@ -101,7 +103,7 @@ where } /// Verifies that the `sumcheck_claim` output by sumcheck verification is consistent - /// with the `left_claims` and `right_claims` of corresponding `BatchedGrandProductLayerProof`. + /// with the `left_claim` and `right_claim` of corresponding `BatchedGrandProductLayerProof`. /// This function may be overridden if the layer isn't just multiplication gates, e.g. in the /// case of `ToggledBatchedGrandProduct`. fn verify_sumcheck_claim( @@ -132,9 +134,9 @@ where transcript: &mut ProofTranscript, r_start: Vec, ) -> (F, Vec) { - // We allow a non empty start in this function call because the quark hybrid form provides prespecified random for - // most of the positions and then we proceed with GKR on the remaining layers using the preset random values. - // For default thaler '13 layered grand products this should be empty. + // `r_start` is the random point at which the MLE of the first layer of the grand product is evaluated. + // In the case of the Quarks hybrid grand product, this is obtained from the Quarks grand product sumcheck. + // In the case of Thaler'13 GKR-based grand products, this is from Fiat-Shamir. let mut r_grand_product = r_start.clone(); let fixed_at_start = r_start.len(); @@ -175,12 +177,14 @@ where transcript: &mut ProofTranscript, _setup: Option<&PCS::Setup>, ) -> (F, Vec) { + // Evaluate the MLE of the output layer at a random point to reduce the outputs to + // a single claim. transcript.append_scalars(claimed_outputs); let r: Vec = transcript.challenge_vector(claimed_outputs.len().next_power_of_two().log_2()); let claim = DensePolynomial::new_padded(claimed_outputs.to_vec()).evaluate(&r); - Self::verify_layers(&proof.layers, claim, transcript, r) + Self::verify_layers(&proof.gkr_layers, claim, transcript, r) } } @@ -229,12 +233,12 @@ where /// A batched grand product circuit. /// Note that the circuit roots are not included in `self.layers` -/// o -/// / \ -/// o o <- layers[layers.len() - 1] -/// / \ / \ -/// o o o o <- layers[layers.len() - 2] -/// ... +/// o o +/// / \ / \ +/// o o o o <- layers[layers.len() - 1] +/// / \ / \ / \ / \ +/// o o o o o o o o <- layers[layers.len() - 2] +/// ... ... pub struct BatchedDenseGrandProduct { layers: Vec>, } diff --git a/jolt-core/src/subprotocols/grand_product_quarks.rs b/jolt-core/src/subprotocols/grand_product_quarks.rs index 44491bbcf..6096b0734 100644 --- a/jolt-core/src/subprotocols/grand_product_quarks.rs +++ b/jolt-core/src/subprotocols/grand_product_quarks.rs @@ -47,7 +47,8 @@ pub enum QuarkHybridLayerDepth { } impl QuarkHybridLayerDepth { - // The depth in the product tree of the GKR grand product at which the hybrid scheme will switch to using quarks grand product proofs + /// The depth in the binary tree of the GKR grand product at which the hybrid scheme + /// will switch to using Quarks Section 5 grand product argument. pub fn get_crossover_depth(&self) -> usize { match self { QuarkHybridLayerDepth::Min => 0, @@ -99,7 +100,7 @@ where crossover }; - // Taken 1 to 1 from the code in the BatchedDenseGrandProductLayer implementation + // Taken 1 to 1 from the code in the BatchedDenseGrandProduct implementation let mut layers = Vec::>::new(); layers.push(DenseInterleavedPolynomial::new(leaves)); @@ -109,7 +110,7 @@ where layers.push(new_layer); } - // If the tree depth is too small we return no quark poly and all base layers + // If the tree depth is too small we just do the GKR grand product if tree_depth <= num_layers { return Self { batch_size, @@ -129,10 +130,9 @@ where _marker: PhantomData, } } - /// The number of layers in the grand product, in this case it is the log of the quark layer size plus the gkr layer depth. + fn num_layers(&self) -> usize { - todo!() - // self.quark_poly[0].len().log_2() + unimplemented!("Unused"); } /// The claimed outputs of the grand products. @@ -151,7 +151,7 @@ where fn layers( &'_ mut self, ) -> impl Iterator> { - panic!("We don't use the default prover and so we don't need the generic iterator"); + unimplemented!("We don't use the default prover and so we don't need the generic iterator"); std::iter::empty() } @@ -172,10 +172,10 @@ where let r_outputs: Vec = transcript.challenge_vector(output_mle.get_num_vars()); let claim = output_mle.evaluate(&r_outputs); - // For proofs of polynomials of size less than 16 we support these with no quark proof - let (quark_option, mut random, mut claim) = if !self.quark_poly.is_empty() { + // For polynomials of size less than 16 we just use the GKR grand product + let (quark_proof, mut random, mut claim) = if !self.quark_poly.is_empty() { // When doing the quark hybrid proof, we first prove the grand product of a layer of a polynomial which is 4 layers deep in the tree - // of a standard layered sumcheck grand product, then we use the sumcheck layers to prove via gkr layers that the random point opened + // of a standard layered sumcheck grand product, then we use the sumcheck layers to prove via GKR layers that the random point opened // by the quark proof is in fact the folded result of the base layer. let (quark, random, quark_claim) = QuarkGrandProductProof::::prove( @@ -197,8 +197,8 @@ where ( BatchedGrandProductProof { - layers: proof_layers, - quark_proof: quark_option, + gkr_layers: proof_layers, + quark_proof, }, random, ) @@ -213,6 +213,8 @@ where transcript: &mut ProofTranscript, _setup: Option<&PCS::Setup>, ) -> (F, Vec) { + // Evaluate the MLE of the output layer at a random point to reduce the outputs to + // a single claim. transcript.append_scalars(claimed_outputs); let r_outputs: Vec = transcript.challenge_vector(claimed_outputs.len().next_power_of_two().log_2()); @@ -245,7 +247,7 @@ where PCS, ProofTranscript, >>::verify_layers( - &proof.layers, claim, transcript, rand + &proof.gkr_layers, claim, transcript, rand ); (grand_product_claim, grand_product_r) @@ -287,12 +289,13 @@ where let v_variables = v_length.log_2(); let v_polynomial = DensePolynomial::::new(v.to_vec()); + // Compute f(1, x), f(x, 0), and f(x, 1) from v(x) let (f_1x, f_x0, f_x1) = v_into_f::(&v_polynomial); let g_polynomial = f_1x.clone(); let mut sumcheck_polys = vec![f_1x, f_x0, f_x1]; - // We commit to f(1, x) + // We commit to g(x) = f(1, x) let g_commitment = PCS::commit(&g_polynomial, setup); g_commitment.append_to_transcript(transcript); @@ -302,7 +305,33 @@ where // We add eq_tau as the second to last polynomial in the sumcheck sumcheck_polys.push(eq_tau); - // Next we calculate EQ(11...10 || r_outputs, x) + // This is where things start to deviate from the protocol described in + // Quarks Section 5. + // + // We batch our grand products by laying out the circuits side-by-side, and + // proving them together as one big circuit with k outputs, where k is the batch size. + // In `prove_grand_product`, we evaluate the MLE of these outputs at a random point, + // claim := \tilde{outputs}(r_outputs) + // + // Quarks Section 5 assumes there's only one output, P = f(1, ..., 1, 0). + // But claim != f(1, ..., 1, 0), so we have to use a different sumcheck expression. + // + // If you closely examine `v_into_f` and work it out, you'll find that our k grand product + // outputs are contained in f(1, x) at x = (1, ..., 1, 0, b), where b \in {0, 1}^{log2(k)}. + // So we have: + // claim = \tilde{outputs}(r_outputs) + // = \sum_b EQ(r_outputs, b) * outputs(b) + // = \sum_x EQ(1, ..., 1, 0, r_outputs, x) * f(1, x) where r_outputs ∈ 𝔽^{log2(k)}, x ∈ {0, 1}^{log2(kn)} + // + // Modifying the sumcheck instance described in Section 5 of the Quarks paper, we will + // be proving: + // claim = \sum_x (EQ(\tau, x) * (f(1, x) - f(x, 0) * f(x, 1)) + EQ(1, ..., 1, 0, r_outputs, x) * f(1, x)) + // + // Note that the first half of the summand EQ(\tau, x) * (f(1, x) - f(x, 0) * f(x, 1)) + // should equal 0 for all x ∈ {0, 1}^{log2(kn)}, ensuring that every output value f(1, x) is equal to the + // product of its input values f(x, 0) and f(x, 1). + + // First we compute EQ(1, ..., 1, 0, r_outputs, x) let mut one_padded_r_outputs = vec![PCS::Field::one(); v_variables]; let slice_index = one_padded_r_outputs.len() - r_outputs.len(); one_padded_r_outputs[slice_index..].copy_from_slice(r_outputs.as_slice()); @@ -324,6 +353,8 @@ where // We add eq_output as the last polynomial in the sumcheck sumcheck_polys.push(eq_output); + // This is the sumcheck polynomial + // EQ(\tau, x) * (f(1, x) - f(x, 0) * f(x, 1)) + EQ(1, ..., 1, 0, r_outputs, x) * f(1, x) let output_check_fn = |vals: &[PCS::Field]| -> PCS::Field { assert_eq!(vals.len(), 5); let f_1x = vals[0]; @@ -494,7 +525,8 @@ where } } -// Computes slices of f for the sumcheck +/// Computes the polynomials f(1, x), f(x, 0), and f(x, 1) from the v polynomial, +/// as described in Lemma 5.1 of the Quarks paper. #[allow(clippy::type_complexity)] fn v_into_f( v: &DensePolynomial, diff --git a/jolt-core/src/subprotocols/sparse_grand_product.rs b/jolt-core/src/subprotocols/sparse_grand_product.rs index 2e7b1740d..9122e9e43 100644 --- a/jolt-core/src/subprotocols/sparse_grand_product.rs +++ b/jolt-core/src/subprotocols/sparse_grand_product.rs @@ -26,18 +26,25 @@ use rayon::prelude::*; /// 🏴 o 🏳️ o 🏳️ o 🏴 o toggle layer ↓ #[derive(Debug)] struct BatchedGrandProductToggleLayer { - /// The list of non-zero flag indices for each layer in the batch. + /// The list of non-zero flag indices for each circuit in the batch. flag_indices: Vec>, - /// The list of non-zero flag values for each layer in the batch. + /// The list of non-zero flag values for each circuit in the batch. /// Before the first binding iteration of sumcheck, this will be empty /// (we know that all non-zero, unbound flag values are 1). flag_values: Vec>, + /// The Reed-Solomon fingerprints for each circuit in the batch. fingerprints: Vec>, - + /// Once the sparse flag/fingerprint vectors cannnot be bound further + /// (i.e. binding would require processing values in different vectors), + /// we switch to using `coalesced_flags` to represent the flag values. coalesced_flags: Option>, + /// Once the sparse flag/fingerprint vectors cannnot be bound further + /// (i.e. binding would require processing values in different vectors), + /// we switch to using `coalesced_fingerprints` to represent the fingerprint values. coalesced_fingerprints: Option>, - + /// The length of a layer in one of the circuits in the batch. layer_len: usize, + batched_layer_len: usize, } @@ -60,6 +67,7 @@ impl BatchedGrandProductToggleLayer { F::one(); } } + // Fingerprints are padded with 0s, flags are padded with 1s flags.resize(flags.len().next_power_of_two(), F::one()); ( @@ -86,6 +94,7 @@ impl BatchedGrandProductToggleLayer { *flag_value; } } + // Fingerprints are padded with 0s, flags are padded with 1s flags.resize(flags.len().next_power_of_two(), F::one()); ( @@ -112,6 +121,12 @@ impl BatchedGrandProductToggleLayer { } } + /// Computes the grand product layer output by this one. + /// Since this is a toggle layer, most of the output values are 1s, so + /// the return type is a SparseInterleavedPolyomial + /// o o o o <- output layer + /// / \ / \ / \ / \ + /// 🏴 o 🏳️ o 🏳️ o 🏴 o <- toggle layer #[tracing::instrument(skip_all, name = "BatchedGrandProductToggleLayer::layer_output")] fn layer_output(&self) -> SparseInterleavedPolynomial { let values: Vec<_> = self @@ -132,6 +147,9 @@ impl BatchedGrandProductToggleLayer { SparseInterleavedPolynomial::new(values, self.batched_layer_len / 2) } + /// Coalesces flags and fingerprints into one (dense) vector each. + /// After a certain number of bindings, we can no longer process the k + /// circuits in the batch in independently, at which point we coalesce. #[tracing::instrument(skip_all, name = "BatchedGrandProductToggleLayer::coalesce")] fn coalesce(&mut self) { let mut coalesced_fingerprints: Vec = @@ -153,6 +171,7 @@ impl BatchedGrandProductToggleLayer { coalesced }) .collect(); + // Fingerprints are padded with 0s, flags are padded with 1s coalesced_flags.resize(coalesced_flags.len().next_power_of_two(), F::one()); self.coalesced_fingerprints = Some(coalesced_fingerprints); @@ -161,8 +180,8 @@ impl BatchedGrandProductToggleLayer { } impl Bindable for BatchedGrandProductToggleLayer { - /// Incrementally binds a variable of this batched layer's polynomials. - /// Similar to `BatchedSparseGrandProductLayer::bind`, in that fingerprints use + /// Incrementally binds a variable of the flag and fingerprint polynomials. + /// Similar to `SparseInterleavedPolynomial::bind`, in that flags use /// a sparse representation, but different in a couple of key ways: /// - flags use two separate vectors (for indices and values) rather than /// a single vector of (index, value) pairs @@ -179,6 +198,7 @@ impl Bindable for BatchedGrandProductToggleLayer { let (mut flags_before_binding, mut fingerprints_before_binding) = self.to_dense(); if let Some(coalesced_flags) = &mut self.coalesced_flags { + // Polynomials have already been coalesced, so bind the coalesced vectors. let mut bound_flags = vec![F::one(); coalesced_flags.len() / 2]; for i in 0..bound_flags.len() { bound_flags[i] = coalesced_flags[2 * i] @@ -215,6 +235,7 @@ impl Bindable for BatchedGrandProductToggleLayer { debug_assert!(self.layer_len % 4 == 0); + // Bind the fingerprints self.fingerprints .par_iter_mut() .for_each(|layer: &mut Vec| { @@ -229,6 +250,7 @@ impl Bindable for BatchedGrandProductToggleLayer { self.flag_values = vec![vec![]; self.flag_indices.len()]; } + // Bind the flags self.flag_indices .par_iter_mut() .zip(self.flag_values.par_iter_mut()) @@ -322,6 +344,7 @@ impl Bindable for BatchedGrandProductToggleLayer { } if self.layer_len == 2 { + // Time to coalesce assert!(self.coalesced_fingerprints.is_none()); assert!(self.coalesced_flags.is_none()); self.coalesce(); @@ -359,16 +382,24 @@ impl BatchedCubicSumcheck, previous_round_claim: F) -> UniPoly { if let Some(coalesced_flags) = &self.coalesced_flags { let coalesced_fingerpints = self.coalesced_fingerprints.as_ref().unwrap(); let cubic_evals = if eq_poly.E1_len == 1 { + // 1. Flags/fingerprints are coalesced, and E1 is fully bound + // This is similar to the if case of `DenseInterleavedPolynomial::compute_cubic` coalesced_flags .par_chunks(2) .zip(coalesced_fingerpints.par_chunks(2)) @@ -403,6 +434,8 @@ impl BatchedCubicSumcheck = eq_poly.E1[..eq_poly.E1_len] .par_chunks(2) .map(|E1_chunk| { @@ -467,8 +500,9 @@ impl BatchedCubicSumcheck = eq_poly.E2[..eq_poly.E2_len] .par_chunks(2) .take(self.batched_layer_len / 4) @@ -588,6 +622,8 @@ impl BatchedCubicSumcheck = eq_poly.E1[..eq_poly.E1_len] .par_chunks(2) .map(|E1_chunk| { @@ -716,21 +752,47 @@ impl BatchedCubicSumcheck Into for BatchedGrandProductProof, ProofTranscript> { fn into(self) -> GrandProductProof { - let layers: Vec = self.layers.into_iter().map(|i| i.into()).collect(); + let layers: Vec = self.gkr_layers.into_iter().map(|i| i.into()).collect(); assert!(self.quark_proof.is_none(), "Quarks are unsupported"); GrandProductProof { layers } }