From 08fe3e4b2f981ec8ce24459bcaf613f33c3c78b4 Mon Sep 17 00:00:00 2001 From: CPerezz Date: Wed, 7 Feb 2024 22:20:44 +0100 Subject: [PATCH] change: Apply memory optimisation technique from Scroll This incorporates the work done in https://github.com/scroll-tech/halo2/pull/28 in order to lower the memory consumption significantly trading off for some performance. A much more deep analysis can be found here: https://github.com/axiom-crypto/halo2/pull/17 --- halo2_backend/src/plonk.rs | 17 +- halo2_backend/src/plonk/evaluation.rs | 1054 +++++++++++------ halo2_backend/src/plonk/keygen.rs | 15 +- halo2_backend/src/plonk/permutation.rs | 18 +- halo2_backend/src/plonk/permutation/keygen.rs | 14 +- halo2_backend/src/plonk/permutation/prover.rs | 9 +- halo2_backend/src/plonk/prover.rs | 1 - halo2_backend/src/poly/domain.rs | 458 ++++++- halo2_common/src/plonk/circuit.rs | 67 ++ 9 files changed, 1181 insertions(+), 472 deletions(-) diff --git a/halo2_backend/src/plonk.rs b/halo2_backend/src/plonk.rs index 27f2d2f9ed..c7f76c0b17 100644 --- a/halo2_backend/src/plonk.rs +++ b/halo2_backend/src/plonk.rs @@ -6,11 +6,7 @@ use crate::helpers::{ self, polynomial_slice_byte_length, read_polynomial_vec, write_polynomial_slice, SerdeCurveAffine, SerdePrimeField, }; -use crate::poly::{ - Coeff, EvaluationDomain, ExtendedLagrangeCoeff, LagrangeCoeff, PinnedEvaluationDomain, - Polynomial, -}; -use crate::transcript::{ChallengeScalar, EncodedChallenge, Transcript}; +use crate::poly::{Coeff, EvaluationDomain, LagrangeCoeff, PinnedEvaluationDomain, Polynomial}; use evaluation::Evaluator; use halo2_common::plonk::{Circuit, ConstraintSystem, PinnedConstraintSystem}; use halo2_common::SerdeFormat; @@ -323,12 +319,11 @@ pub struct PinnedVerificationKey<'a, C: CurveAffine> { #[derive(Clone, Debug)] pub struct ProvingKey { vk: VerifyingKey, - l0: Polynomial, - l_last: Polynomial, - l_active_row: Polynomial, + l0: Polynomial, + l_last: Polynomial, + l_active_row: Polynomial, fixed_values: Vec>, fixed_polys: Vec>, - fixed_cosets: Vec>, permutation: permutation::ProvingKey, ev: Evaluator, } @@ -353,7 +348,6 @@ where + scalar_len * (self.l0.len() + self.l_last.len() + self.l_active_row.len()) + polynomial_slice_byte_length(&self.fixed_values) + polynomial_slice_byte_length(&self.fixed_polys) - + polynomial_slice_byte_length(&self.fixed_cosets) + self.permutation.bytes_length() } } @@ -379,7 +373,6 @@ where self.l_active_row.write(writer, format)?; write_polynomial_slice(&self.fixed_values, writer, format)?; write_polynomial_slice(&self.fixed_polys, writer, format)?; - write_polynomial_slice(&self.fixed_cosets, writer, format)?; self.permutation.write(writer, format)?; Ok(()) } @@ -411,7 +404,6 @@ where let l_active_row = Polynomial::read(reader, format)?; let fixed_values = read_polynomial_vec(reader, format)?; let fixed_polys = read_polynomial_vec(reader, format)?; - let fixed_cosets = read_polynomial_vec(reader, format)?; let permutation = permutation::ProvingKey::read(reader, format)?; let ev = Evaluator::new(vk.cs()); Ok(Self { @@ -421,7 +413,6 @@ where l_active_row, fixed_values, fixed_polys, - fixed_cosets, permutation, ev, }) diff --git a/halo2_backend/src/plonk/evaluation.rs b/halo2_backend/src/plonk/evaluation.rs index 74c1fb1933..afe0e5eccf 100644 --- a/halo2_backend/src/plonk/evaluation.rs +++ b/halo2_backend/src/plonk/evaluation.rs @@ -7,8 +7,10 @@ use crate::{ }; use group::ff::{Field, PrimeField, WithSmallOrderMulGroup}; use halo2_common::plonk::{ConstraintSystem, Expression}; +use halo2_common::poly::LagrangeCoeff; use halo2_middleware::circuit::Any; use halo2_middleware::poly::Rotation; +use itertools::Itertools; use super::shuffle; @@ -39,9 +41,7 @@ pub enum ValueSource { /// theta Theta(), /// y - Y(), - /// Previous value - PreviousValue(), + Y(usize), } impl Default for ValueSource { @@ -58,34 +58,36 @@ impl ValueSource { rotations: &[usize], constants: &[F], intermediates: &[F], - fixed_values: &[Polynomial], - advice_values: &[Polynomial], - instance_values: &[Polynomial], + // Why Option?? + fixed_values: &[Option>], + advice_values: &[Option>], + instance_values: &[Option>], challenges: &[F], beta: &F, gamma: &F, theta: &F, - y: &F, - previous_value: &F, + y_powers: &[F], ) -> F { match self { ValueSource::Constant(idx) => constants[*idx], ValueSource::Intermediate(idx) => intermediates[*idx], ValueSource::Fixed(column_index, rotation) => { - fixed_values[*column_index][rotations[*rotation]] + assert!(fixed_values[*column_index].is_some()); + fixed_values[*column_index].as_ref().unwrap()[rotations[*rotation]] } ValueSource::Advice(column_index, rotation) => { - advice_values[*column_index][rotations[*rotation]] + assert!(advice_values[*column_index].is_some()); + advice_values[*column_index].as_ref().unwrap()[rotations[*rotation]] } ValueSource::Instance(column_index, rotation) => { - instance_values[*column_index][rotations[*rotation]] + assert!(advice_values[*column_index].is_some()); + instance_values[*column_index].as_ref().unwrap()[rotations[*rotation]] } ValueSource::Challenge(index) => challenges[*index], ValueSource::Beta() => *beta, ValueSource::Gamma() => *gamma, ValueSource::Theta() => *theta, - ValueSource::Y() => *y, - ValueSource::PreviousValue() => *previous_value, + ValueSource::Y(idx) => y_powers[*idx], } } } @@ -119,15 +121,14 @@ impl Calculation { rotations: &[usize], constants: &[F], intermediates: &[F], - fixed_values: &[Polynomial], - advice_values: &[Polynomial], - instance_values: &[Polynomial], + fixed_values: &[Option>], + advice_values: &[Option>], + instance_values: &[Option>], challenges: &[F], beta: &F, gamma: &F, theta: &F, - y: &F, - previous_value: &F, + y_powers: &[F], ) -> F { let get_value = |value: &ValueSource| { value.get( @@ -141,8 +142,7 @@ impl Calculation { beta, gamma, theta, - y, - previous_value, + y_powers, ) }; match self { @@ -165,15 +165,39 @@ impl Calculation { } } +#[derive(Clone, Default, Debug)] +struct ConstraintCluster { + /// Used fixed columns in each cluster + used_fixed_columns: Vec, + /// Used instance columns in each cluster + used_instance_columns: Vec, + /// Used advice columns in each cluster + used_advice_columns: Vec, + /// Custom gates evalution + evaluator: GraphEvaluator, + /// The first index of constraints are being evaluated at in each cluster + first_constraint_idx: usize, + /// The last index of constraints are being evaluated at in each cluster + last_constraint_idx: usize, + /// The last value source + last_value_source: Option, +} + /// Evaluator #[derive(Clone, Default, Debug)] pub struct Evaluator { - /// Custom gates evalution - pub custom_gates: GraphEvaluator, - /// Lookups evalution - pub lookups: Vec>, - /// Shuffle evalution - pub shuffles: Vec>, + /// list of constraint clusters + custom_gate_clusters: Vec>, + /// Number of custom gate constraints + num_custom_gate_constraints: usize, + /// Lookups evalution, degree, used instance and advice columns + lookups: Vec<( + GraphEvaluator, + usize, + (Vec, Vec, Vec), + )>, + /// Powers of y + num_y_powers: usize, } /// GraphEvaluator @@ -207,46 +231,123 @@ pub struct CalculationInfo { pub target: usize, } +// TODO: Move to utils or something similar? +fn merge_unique(a: Vec, b: Vec) -> Vec { + let mut result = a; + result.extend(b); + result.into_iter().unique().collect() +} + impl Evaluator { /// Creates a new evaluation structure pub fn new(cs: &ConstraintSystem) -> Self { let mut ev = Evaluator::default(); + let mut constraint_idx = 0; + + // Compute the max cluster index + let quotient_poly_degree = (cs.degree() - 1) as u64; + let mut max_cluster_idx = 0; + while (1 << max_cluster_idx) < quotient_poly_degree { + max_cluster_idx += 1; + } + + ev.custom_gate_clusters + .resize(max_cluster_idx + 1, ConstraintCluster::default()); // Custom gates - let mut parts = Vec::new(); for gate in cs.gates.iter() { - parts.extend( - gate.polynomials() - .iter() - .map(|poly| ev.custom_gates.add_expression(poly)), - ); + for poly in gate.polynomials() { + constraint_idx += 1; + let cluster_idx = Self::compute_cluster_idx(poly.degree(), max_cluster_idx); + let custom_gate_cluster = &mut ev.custom_gate_clusters[cluster_idx]; + custom_gate_cluster.used_fixed_columns = merge_unique( + custom_gate_cluster.used_fixed_columns.clone(), + poly.extract_fixed(), + ); + custom_gate_cluster.used_instance_columns = merge_unique( + custom_gate_cluster.used_instance_columns.clone(), + poly.extract_instances(), + ); + custom_gate_cluster.used_advice_columns = merge_unique( + custom_gate_cluster.used_advice_columns.clone(), + poly.extract_advices(), + ); + let curr = custom_gate_cluster.evaluator.add_expression(poly); + if let Some(last) = custom_gate_cluster.last_value_source { + custom_gate_cluster.last_value_source = Some( + custom_gate_cluster + .evaluator + .add_calculation(Calculation::Horner( + last, + vec![curr], + ValueSource::Y( + constraint_idx - custom_gate_cluster.last_constraint_idx, + ), + )), + ); + } else { + assert_eq!(custom_gate_cluster.last_constraint_idx, 0); + custom_gate_cluster.last_value_source = Some(curr); + custom_gate_cluster.first_constraint_idx = constraint_idx; + } + custom_gate_cluster.last_constraint_idx = constraint_idx; + } } - ev.custom_gates.add_calculation(Calculation::Horner( - ValueSource::PreviousValue(), - parts, - ValueSource::Y(), - )); + + ev.num_custom_gate_constraints = constraint_idx; // Lookups for lookup in cs.lookups.iter() { + constraint_idx += 5; let mut graph = GraphEvaluator::default(); let mut evaluate_lc = |expressions: &Vec>| { + let mut max_degree = 0; + let mut used_fixed_columns = vec![]; + let mut used_instance_columns = vec![]; + let mut used_advice_columns = vec![]; let parts = expressions .iter() - .map(|expr| graph.add_expression(expr)) + .map(|expr| { + max_degree = max_degree.max(expr.degree()); + used_fixed_columns = + merge_unique(used_fixed_columns.clone(), expr.extract_fixed()); + used_instance_columns = + merge_unique(used_instance_columns.clone(), expr.extract_instances()); + used_advice_columns = + merge_unique(used_advice_columns.clone(), expr.extract_advices()); + graph.add_expression(expr) + }) .collect(); - graph.add_calculation(Calculation::Horner( - ValueSource::Constant(0), - parts, - ValueSource::Theta(), - )) + ( + graph.add_calculation(Calculation::Horner( + ValueSource::Constant(0), + parts, + ValueSource::Theta(), + )), + max_degree, + used_fixed_columns, + used_instance_columns, + used_advice_columns, + ) }; // Input coset - let compressed_input_coset = evaluate_lc(&lookup.input_expressions); + let ( + compressed_input_coset, + max_input_degree, + input_used_fixed, + input_used_instances, + input_used_advices, + ) = evaluate_lc(&lookup.input_expressions); // table coset - let compressed_table_coset = evaluate_lc(&lookup.table_expressions); + let ( + compressed_table_coset, + max_table_degree, + table_used_fixed, + table_used_instances, + table_used_advices, + ) = evaluate_lc(&lookup.table_expressions); // z(\omega X) (a'(X) + \beta) (s'(X) + \gamma) let right_gamma = graph.add_calculation(Calculation::Add( compressed_table_coset, @@ -257,48 +358,25 @@ impl Evaluator { ValueSource::Beta(), )); graph.add_calculation(Calculation::Mul(lc, right_gamma)); - - ev.lookups.push(graph); - } - - // Shuffles - for shuffle in cs.shuffles.iter() { - let evaluate_lc = |expressions: &Vec>, graph: &mut GraphEvaluator| { - let parts = expressions - .iter() - .map(|expr| graph.add_expression(expr)) - .collect(); - graph.add_calculation(Calculation::Horner( - ValueSource::Constant(0), - parts, - ValueSource::Theta(), - )) - }; - - let mut graph_input = GraphEvaluator::default(); - let compressed_input_coset = evaluate_lc(&shuffle.input_expressions, &mut graph_input); - let _ = graph_input.add_calculation(Calculation::Add( - compressed_input_coset, - ValueSource::Gamma(), - )); - - let mut graph_shuffle = GraphEvaluator::default(); - let compressed_shuffle_coset = - evaluate_lc(&shuffle.shuffle_expressions, &mut graph_shuffle); - let _ = graph_shuffle.add_calculation(Calculation::Add( - compressed_shuffle_coset, - ValueSource::Gamma(), + ev.lookups.push(( + graph, + max_input_degree + max_table_degree, + ( + merge_unique(input_used_fixed, table_used_fixed), + merge_unique(input_used_instances, table_used_instances), + merge_unique(input_used_advices, table_used_advices), + ), )); - - ev.shuffles.push(graph_input); - ev.shuffles.push(graph_shuffle); } + // Count the constraints in permutation + let num_sets = (cs.permutation.get_columns().len() + (cs.degree() - 3)) / (cs.degree() - 2); + constraint_idx += 1 + num_sets * 2; + ev.num_y_powers = constraint_idx + 10; ev } /// Evaluate h poly - #[allow(clippy::too_many_arguments)] pub(in crate::plonk) fn evaluate_h( &self, pk: &ProvingKey, @@ -310,301 +388,574 @@ impl Evaluator { gamma: C::ScalarExt, theta: C::ScalarExt, lookups: &[Vec>], - shuffles: &[Vec>], permutations: &[permutation::prover::Committed], ) -> Polynomial { let domain = &pk.vk.domain; - let size = domain.extended_len(); - let rot_scale = 1 << (domain.extended_k() - domain.k()); - let fixed = &pk.fixed_cosets[..]; + let size = 1 << domain.k() as usize; + let rot_scale = 1; let extended_omega = domain.get_extended_omega(); + let omega = domain.get_omega(); let isize = size as i32; let one = C::ScalarExt::ONE; - let l0 = &pk.l0; - let l_last = &pk.l_last; - let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; + let num_parts = domain.extended_len() >> domain.k(); + let num_clusters = (domain.extended_k() - domain.k() + 1) as usize; - // Calculate the advice and instance cosets - let advice: Vec>> = advice_polys - .iter() - .map(|advice_polys| { - advice_polys - .iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) - .collect() - }) - .collect(); - let instance: Vec>> = instance_polys - .iter() - .map(|instance_polys| { - instance_polys - .iter() - .map(|poly| domain.coeff_to_extended(poly.clone())) - .collect() - }) - .collect(); + assert!(self.custom_gate_clusters.len() <= num_clusters); + + // Initialize the the powers of y and constraint counter + let mut y_powers = vec![C::ScalarExt::ONE; self.num_y_powers * instance_polys.len()]; + for i in 1..self.num_y_powers { + y_powers[i] = y_powers[i - 1] * y; + } - let mut values = domain.empty_extended(); + let need_to_compute = |part_idx, cluster_idx| part_idx % (num_parts >> cluster_idx) == 0; + let compute_part_idx_in_cluster = + |part_idx, cluster_idx| part_idx >> (num_clusters - cluster_idx - 1); - // Core expression evaluations - let num_threads = multicore::current_num_threads(); - for ((((advice, instance), lookups), shuffles), permutation) in advice - .iter() - .zip(instance.iter()) - .zip(lookups.iter()) - .zip(shuffles.iter()) - .zip(permutations.iter()) - { - // Custom gates - multicore::scope(|scope| { - let chunk_size = (size + num_threads - 1) / num_threads; - for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() { - let start = thread_idx * chunk_size; - scope.spawn(move |_| { - let mut eval_data = self.custom_gates.instance(); - for (i, value) in values.iter_mut().enumerate() { - let idx = start + i; - *value = self.custom_gates.evaluate( - &mut eval_data, - fixed, - advice, - instance, - challenges, - &beta, - &gamma, - &theta, - &y, - value, - idx, - rot_scale, - isize, - ); + let mut value_part_clusters = Vec::new(); + value_part_clusters.resize(num_clusters, Vec::new()); + for cluster_idx in 0..num_clusters { + value_part_clusters[cluster_idx].resize(1 << cluster_idx, domain.empty_lagrange()); + } + + // Calculate the quotient polynomial for each part + let mut current_extended_omega = one; + for part_idx in 0..num_parts { + let mut fixed: Vec>> = + vec![None; pk.fixed_polys.len()]; + let l0 = domain.coeff_to_extended_part(pk.l0.clone(), current_extended_omega); + let l_last = domain.coeff_to_extended_part(pk.l_last.clone(), current_extended_omega); + let l_active_row = + domain.coeff_to_extended_part(pk.l_active_row.clone(), current_extended_omega); + + let mut constraint_idx = 0; + let mut cluster_last_constraint_idx = vec![0; num_clusters]; + + // Core expression evaluations + let num_threads = multicore::current_num_threads(); + for (((advice_polys, instance_polys), lookups), permutation) in advice_polys + .iter() + .zip(instance_polys.iter()) + .zip(lookups.iter()) + .zip(permutations.iter()) + { + // Calculate the advice and instance cosets + let mut advice: Vec>> = + vec![None; advice_polys.len()]; + let mut instance: Vec>> = + vec![None; instance_polys.len()]; + + // Custom gates + for (cluster_idx, custom_gates) in self.custom_gate_clusters.iter().enumerate() { + if !need_to_compute(part_idx, cluster_idx) + || custom_gates.last_value_source.is_none() + { + continue; + } + let values = &mut value_part_clusters[cluster_idx] + [compute_part_idx_in_cluster(part_idx, cluster_idx)]; + for fixed_idx in custom_gates.used_fixed_columns.iter() { + if fixed[*fixed_idx].is_none() { + fixed[*fixed_idx] = Some(domain.coeff_to_extended_part( + pk.fixed_polys[*fixed_idx].clone(), + current_extended_omega, + )); + } + } + for instance_idx in custom_gates.used_instance_columns.iter() { + if instance[*instance_idx].is_none() { + instance[*instance_idx] = Some(domain.coeff_to_extended_part( + instance_polys[*instance_idx].clone(), + current_extended_omega, + )); + } + } + for advice_idx in custom_gates.used_advice_columns.iter() { + if advice[*advice_idx].is_none() { + advice[*advice_idx] = Some(domain.coeff_to_extended_part( + advice_polys[*advice_idx].clone(), + current_extended_omega, + )); + } + } + let fixed_slice = &fixed[..]; + let advice_slice = &advice[..]; + let instance_slice = &instance[..]; + let y_power_slice = &y_powers[..]; + let y_power = y_powers[constraint_idx + custom_gates.first_constraint_idx + - cluster_last_constraint_idx[cluster_idx]]; + multicore::scope(|scope| { + let chunk_size = (size + num_threads - 1) / num_threads; + for (thread_idx, values) in values.chunks_mut(chunk_size).enumerate() { + let start = thread_idx * chunk_size; + scope.spawn(move |_| { + let mut eval_data = custom_gates.evaluator.instance(); + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + *value = *value * y_power + + custom_gates.evaluator.evaluate( + &mut eval_data, + fixed_slice, + advice_slice, + instance_slice, + challenges, + y_power_slice, + &beta, + &gamma, + &theta, + idx, + rot_scale, + isize, + ); + } + }); } }); + + // Update the constraint index + cluster_last_constraint_idx[cluster_idx] = + constraint_idx + custom_gates.last_constraint_idx; } - }); - - // Permutations - let sets = &permutation.sets; - if !sets.is_empty() { - let blinding_factors = pk.vk.cs.blinding_factors(); - let last_rotation = Rotation(-((blinding_factors + 1) as i32)); - let chunk_len = pk.vk.cs.degree() - 2; - let delta_start = beta * C::Scalar::ZETA; - - let first_set = sets.first().unwrap(); - let last_set = sets.last().unwrap(); - - // Permutation constraints - parallelize(&mut values, |values, start| { - let mut beta_term = extended_omega.pow_vartime([start as u64, 0, 0, 0]); - for (i, value) in values.iter_mut().enumerate() { - let idx = start + i; - let r_next = get_rotation_idx(idx, 1, rot_scale, isize); - let r_last = get_rotation_idx(idx, last_rotation.0, rot_scale, isize); - - // Enforce only for the first set. - // l_0(X) * (1 - z_0(X)) = 0 - *value = *value * y - + ((one - first_set.permutation_product_coset[idx]) * l0[idx]); - // Enforce only for the last set. - // l_last(X) * (z_l(X)^2 - z_l(X)) = 0 - *value = *value * y - + ((last_set.permutation_product_coset[idx] - * last_set.permutation_product_coset[idx] - - last_set.permutation_product_coset[idx]) - * l_last[idx]); - // Except for the first set, enforce. - // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0 - for (set_idx, set) in sets.iter().enumerate() { - if set_idx != 0 { - *value = *value * y - + ((set.permutation_product_coset[idx] - - permutation.sets[set_idx - 1].permutation_product_coset - [r_last]) - * l0[idx]); + constraint_idx += self.num_custom_gate_constraints; + + // Permutations + let sets = &permutation.sets; + if !sets.is_empty() { + let blinding_factors = pk.vk.cs.blinding_factors(); + let last_rotation = Rotation(-((blinding_factors + 1) as i32)); + let chunk_len = pk.vk.cs.degree() - 2; + let delta_start = beta * &C::Scalar::ZETA; + + let permutation_product_cosets: Vec> = + sets.iter() + .map(|set| { + domain.coeff_to_extended_part( + set.permutation_product_poly.clone(), + current_extended_omega, + ) + }) + .collect(); + + let first_set_permutation_product_coset = + permutation_product_cosets.first().unwrap(); + let last_set_permutation_product_coset = + permutation_product_cosets.last().unwrap(); + + // Permutation constraints + constraint_idx += 1; + if need_to_compute(part_idx, 1) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; + parallelize( + &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + // Enforce only for the first set. + // l_0(X) * (1 - z_0(X)) = 0, degree = 2 + *value = *value * y_power + + ((one - first_set_permutation_product_coset[idx]) + * l0[idx]); + } + }, + ); + cluster_last_constraint_idx[1] = constraint_idx; + } + + constraint_idx += 1; + if need_to_compute(part_idx, 2) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; + parallelize( + &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + // Enforce only for the last set. + // l_last(X) * (z_l(X)^2 - z_l(X)) = 0, degree = 3 + *value = *value * y_power + + ((last_set_permutation_product_coset[idx] + * last_set_permutation_product_coset[idx] + - last_set_permutation_product_coset[idx]) + * l_last[idx]); + } + }, + ); + cluster_last_constraint_idx[2] = constraint_idx; + } + + constraint_idx += sets.len() - 1; + if need_to_compute(part_idx, 1) { + let y_skip = y_powers + [constraint_idx + 1 - sets.len() - cluster_last_constraint_idx[1]]; + parallelize( + &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + // Except for the first set, enforce. + // l_0(X) * (z_i(X) - z_{i-1}(\omega^(last) X)) = 0, degree = 2 + let r_last = + get_rotation_idx(idx, last_rotation.0, rot_scale, isize); + + *value = *value * y_skip; + + for (set_idx, permutation_product_coset) in + permutation_product_cosets.iter().enumerate() + { + if set_idx != 0 { + *value = *value * y + + ((permutation_product_coset[idx] + - permutation_product_cosets[set_idx - 1] + [r_last]) + * l0[idx]); + } + } + } + }, + ); + cluster_last_constraint_idx[1] = constraint_idx; + } + + constraint_idx += sets.len(); + let running_prod_cluster = + Self::compute_cluster_idx(2 + chunk_len, num_clusters - 1); + if need_to_compute(part_idx, running_prod_cluster) { + for column in p.columns.iter() { + match column.column_type() { + Any::Advice(_) => { + let advice = &mut advice[column.index()]; + if (*advice).is_none() { + *advice = Some(domain.coeff_to_extended_part( + advice_polys[column.index()].clone(), + current_extended_omega, + )); + } + } + Any::Instance => { + let instance = &mut instance[column.index()]; + if instance.is_none() { + *instance = Some(domain.coeff_to_extended_part( + instance_polys[column.index()].clone(), + current_extended_omega, + )); + } + } + Any::Fixed => { + let fixed = &mut fixed[column.index()]; + if fixed.is_none() { + *fixed = Some(domain.coeff_to_extended_part( + pk.fixed_polys[column.index()].clone(), + current_extended_omega, + )); + } + } } } - // And for all the sets we enforce: - // (1 - (l_last(X) + l_blind(X))) * ( - // z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma) - // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma) - // ) - let mut current_delta = delta_start * beta_term; - for ((set, columns), cosets) in sets + + let permutation_cosets: Vec> = pk + .permutation + .polys .iter() - .zip(p.columns.chunks(chunk_len)) - .zip(pk.permutation.cosets.chunks(chunk_len)) - { - let mut left = set.permutation_product_coset[r_next]; - for (values, permutation) in columns - .iter() - .map(|&column| match column.column_type() { - Any::Advice(_) => &advice[column.index()], - Any::Fixed => &fixed[column.index()], - Any::Instance => &instance[column.index()], - }) - .zip(cosets.iter()) - { - left *= values[idx] + beta * permutation[idx] + gamma; - } + .map(|p| { + domain.coeff_to_extended_part(p.clone(), current_extended_omega) + }) + .collect(); - let mut right = set.permutation_product_coset[idx]; - for values in columns.iter().map(|&column| match column.column_type() { - Any::Advice(_) => &advice[column.index()], - Any::Fixed => &fixed[column.index()], - Any::Instance => &instance[column.index()], - }) { - right *= values[idx] + current_delta + gamma; - current_delta *= &C::Scalar::DELTA; - } + let y_skip = y_powers[constraint_idx + - sets.len() + - cluster_last_constraint_idx[running_prod_cluster]]; - *value = *value * y + ((left - right) * l_active_row[idx]); - } - beta_term *= &extended_omega; + parallelize( + &mut value_part_clusters[running_prod_cluster] + [compute_part_idx_in_cluster(part_idx, running_prod_cluster)], + |values, start| { + let mut beta_term = current_extended_omega + * omega.pow_vartime(&[start as u64, 0, 0, 0]); + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + let r_next = get_rotation_idx(idx, 1, rot_scale, isize); + + *value = *value * y_skip; + + // And for all the sets we enforce: + // (1 - (l_last(X) + l_blind(X))) * ( + // z_i(\omega X) \prod_j (p(X) + \beta s_j(X) + \gamma) + // - z_i(X) \prod_j (p(X) + \delta^j \beta X + \gamma) + // ), degree = 2 + chunk_len + let mut current_delta = delta_start * beta_term; + for ( + (columns, permutation_product_coset), + permutation_coset_chunk, + ) in p + .columns + .chunks(chunk_len) + .zip(permutation_product_cosets.iter()) + .zip(permutation_cosets.chunks(chunk_len)) + { + let mut left = permutation_product_coset[r_next]; + for (values, permutation) in columns + .iter() + .map(|&column| match column.column_type() { + Any::Advice(_) => { + advice[column.index()].as_ref().unwrap() + } + Any::Fixed => { + fixed[column.index()].as_ref().unwrap() + } + Any::Instance => { + instance[column.index()].as_ref().unwrap() + } + }) + .zip(permutation_coset_chunk.iter()) + { + left *= values[idx] + beta * permutation[idx] + gamma; + } + + let mut right = permutation_product_coset[idx]; + for values in columns.iter().map(|&column| { + match column.column_type() { + Any::Advice(_) => { + advice[column.index()].as_ref().unwrap() + } + Any::Fixed => { + fixed[column.index()].as_ref().unwrap() + } + Any::Instance => { + instance[column.index()].as_ref().unwrap() + } + } + }) { + right *= values[idx] + current_delta + gamma; + current_delta *= &C::Scalar::DELTA; + } + + *value = *value * y + ((left - right) * l_active_row[idx]); + } + beta_term *= ω + } + }, + ); + cluster_last_constraint_idx[running_prod_cluster] = constraint_idx; } - }); - } + } + + // Lookups + for (n, lookup) in lookups.iter().enumerate() { + let (lookup_evaluator, max_degree, used_columns) = &self.lookups[n]; + let running_prod_cluster = + Self::compute_cluster_idx(max_degree + 2, num_clusters - 1); + if !need_to_compute(part_idx, 1) + && !need_to_compute(part_idx, 2) + && !need_to_compute(part_idx, running_prod_cluster) + { + constraint_idx += 5; + continue; + } + + // Polynomials required for this lookup. + // Calculated here so these only have to be kept in memory for the short time + // they are actually needed. + let product_coset = pk.vk.domain.coeff_to_extended_part( + lookup.product_poly.clone(), + current_extended_omega, + ); + let permuted_input_coset = pk.vk.domain.coeff_to_extended_part( + lookup.permuted_input_poly.clone(), + current_extended_omega, + ); + let permuted_table_coset = pk.vk.domain.coeff_to_extended_part( + lookup.permuted_table_poly.clone(), + current_extended_omega, + ); - // Lookups - for (n, lookup) in lookups.iter().enumerate() { - // Polynomials required for this lookup. - // Calculated here so these only have to be kept in memory for the short time - // they are actually needed. - let product_coset = pk.vk.domain.coeff_to_extended(lookup.product_poly.clone()); - let permuted_input_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_input_poly.clone()); - let permuted_table_coset = pk - .vk - .domain - .coeff_to_extended(lookup.permuted_table_poly.clone()); - - // Lookup constraints - parallelize(&mut values, |values, start| { - let lookup_evaluator = &self.lookups[n]; - let mut eval_data = lookup_evaluator.instance(); - for (i, value) in values.iter_mut().enumerate() { - let idx = start + i; - - let table_value = lookup_evaluator.evaluate( - &mut eval_data, - fixed, - advice, - instance, - challenges, - &beta, - &gamma, - &theta, - &y, - &C::ScalarExt::ZERO, - idx, - rot_scale, - isize, + // Lookup constraints + constraint_idx += 1; + if need_to_compute(part_idx, 1) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; + + parallelize( + &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + // l_0(X) * (1 - z(X)) = 0, degree = 2 + *value = + *value * y_power + ((one - product_coset[idx]) * l0[idx]); + } + }, ); + cluster_last_constraint_idx[1] = constraint_idx; + } - let r_next = get_rotation_idx(idx, 1, rot_scale, isize); - let r_prev = get_rotation_idx(idx, -1, rot_scale, isize); - - let a_minus_s = permuted_input_coset[idx] - permuted_table_coset[idx]; - // l_0(X) * (1 - z(X)) = 0 - *value = *value * y + ((one - product_coset[idx]) * l0[idx]); - // l_last(X) * (z(X)^2 - z(X)) = 0 - *value = *value * y - + ((product_coset[idx] * product_coset[idx] - product_coset[idx]) - * l_last[idx]); - // (1 - (l_last(X) + l_blind(X))) * ( - // z(\omega X) (a'(X) + \beta) (s'(X) + \gamma) - // - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta) - // (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma) - // ) = 0 - *value = *value * y - + ((product_coset[r_next] - * (permuted_input_coset[idx] + beta) - * (permuted_table_coset[idx] + gamma) - - product_coset[idx] * table_value) - * l_active_row[idx]); - // Check that the first values in the permuted input expression and permuted - // fixed expression are the same. - // l_0(X) * (a'(X) - s'(X)) = 0 - *value = *value * y + (a_minus_s * l0[idx]); - // Check that each value in the permuted lookup input expression is either - // equal to the value above it, or the value at the same index in the - // permuted table expression. - // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0 - *value = *value * y - + (a_minus_s - * (permuted_input_coset[idx] - permuted_input_coset[r_prev]) - * l_active_row[idx]); + constraint_idx += 1; + if need_to_compute(part_idx, 2) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; + parallelize( + &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + // l_last(X) * (z(X)^2 - z(X)) = 0, degree = 3 + *value = *value * y_power + + ((product_coset[idx] * product_coset[idx] + - product_coset[idx]) + * l_last[idx]); + } + }, + ); + cluster_last_constraint_idx[2] = constraint_idx; } - }); - } + constraint_idx += 1; + if need_to_compute(part_idx, running_prod_cluster) { + for fixed_column in used_columns.0.iter() { + let fixed = &mut fixed[*fixed_column]; + if fixed.is_none() { + *fixed = Some(domain.coeff_to_extended_part( + pk.fixed_polys[*fixed_column].clone(), + current_extended_omega, + )); + } + } + for instance_column in used_columns.1.iter() { + let instance = &mut instance[*instance_column]; + if instance.is_none() { + *instance = Some(domain.coeff_to_extended_part( + instance_polys[*instance_column].clone(), + current_extended_omega, + )); + } + } + + for advice_column in used_columns.2.iter() { + let advice = &mut advice[*advice_column]; + if (*advice).is_none() { + *advice = Some(domain.coeff_to_extended_part( + advice_polys[*advice_column].clone(), + current_extended_omega, + )); + } + } + + let y_power = y_powers + [constraint_idx - cluster_last_constraint_idx[running_prod_cluster]]; + let fixed_slice = &fixed[..]; + let advice_slice = &advice[..]; + let instance_slice = &instance[..]; + let y_power_slice = &y_powers[..]; + parallelize( + &mut value_part_clusters[running_prod_cluster] + [compute_part_idx_in_cluster(part_idx, running_prod_cluster)], + |values, start| { + let mut eval_data = lookup_evaluator.instance(); + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + let table_value = lookup_evaluator.evaluate( + &mut eval_data, + fixed_slice, + advice_slice, + instance_slice, + challenges, + y_power_slice, + &beta, + &gamma, + &theta, + idx, + rot_scale, + isize, + ); + + let r_next = get_rotation_idx(idx, 1, rot_scale, isize); - // Shuffle constraints - for (n, shuffle) in shuffles.iter().enumerate() { - let product_coset = pk.vk.domain.coeff_to_extended(shuffle.product_poly.clone()); - - // Shuffle constraints - parallelize(&mut values, |values, start| { - let input_evaluator = &self.shuffles[2 * n]; - let shuffle_evaluator = &self.shuffles[2 * n + 1]; - let mut eval_data_input = shuffle_evaluator.instance(); - let mut eval_data_shuffle = shuffle_evaluator.instance(); - for (i, value) in values.iter_mut().enumerate() { - let idx = start + i; - - let input_value = input_evaluator.evaluate( - &mut eval_data_input, - fixed, - advice, - instance, - challenges, - &beta, - &gamma, - &theta, - &y, - &C::ScalarExt::ZERO, - idx, - rot_scale, - isize, + // (1 - (l_last(X) + l_blind(X))) * ( + // z(\omega X) (a'(X) + \beta) (s'(X) + \gamma) + // - z(X) (\theta^{m-1} a_0(X) + ... + a_{m-1}(X) + \beta) + // (\theta^{m-1} s_0(X) + ... + s_{m-1}(X) + \gamma) + // ) = 0, degree = 2 + max(deg(a)) + max(deg(s)) + *value = *value * y_power + + ((product_coset[r_next] + * (permuted_input_coset[idx] + beta) + * (permuted_table_coset[idx] + gamma) + - product_coset[idx] * table_value) + * l_active_row[idx]); + } + }, ); + cluster_last_constraint_idx[running_prod_cluster] = constraint_idx; + } - let shuffle_value = shuffle_evaluator.evaluate( - &mut eval_data_shuffle, - fixed, - advice, - instance, - challenges, - &beta, - &gamma, - &theta, - &y, - &C::ScalarExt::ZERO, - idx, - rot_scale, - isize, + constraint_idx += 1; + if need_to_compute(part_idx, 1) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[1]]; + parallelize( + &mut value_part_clusters[1][compute_part_idx_in_cluster(part_idx, 1)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + let a_minus_s = + permuted_input_coset[idx] - permuted_table_coset[idx]; + // Check that the first values in the permuted input expression and permuted + // fixed expression are the same. + // l_0(X) * (a'(X) - s'(X)) = 0, degree = 2 + *value = *value * y_power + (a_minus_s * l0[idx]); + } + }, ); + cluster_last_constraint_idx[1] = constraint_idx; + } - let r_next = get_rotation_idx(idx, 1, rot_scale, isize); - - // l_0(X) * (1 - z(X)) = 0 - *value = *value * y + ((one - product_coset[idx]) * l0[idx]); - // l_last(X) * (z(X)^2 - z(X)) = 0 - *value = *value * y - + ((product_coset[idx] * product_coset[idx] - product_coset[idx]) - * l_last[idx]); - // (1 - (l_last(X) + l_blind(X))) * (z(\omega X) (s(X) + \gamma) - z(X) (a(X) + \gamma)) = 0 - *value = *value * y - + l_active_row[idx] - * (product_coset[r_next] * shuffle_value - - product_coset[idx] * input_value) + constraint_idx += 1; + if need_to_compute(part_idx, 2) { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[2]]; + parallelize( + &mut value_part_clusters[2][compute_part_idx_in_cluster(part_idx, 2)], + |values, start| { + for (i, value) in values.iter_mut().enumerate() { + let idx = start + i; + let r_prev = get_rotation_idx(idx, -1, rot_scale, isize); + + // Check that each value in the permuted lookup input expression is either + // equal to the value above it, or the value at the same index in the + // permuted table expression. + // (1 - (l_last + l_blind)) * (a′(X) − s′(X))⋅(a′(X) − a′(\omega^{-1} X)) = 0, degree = 3 + let a_minus_s = + permuted_input_coset[idx] - permuted_table_coset[idx]; + *value = *value * y_power + + (a_minus_s + * (permuted_input_coset[idx] + - permuted_input_coset[r_prev]) + * l_active_row[idx]); + } + }, + ); + cluster_last_constraint_idx[2] = constraint_idx; } - }); + } } + // Align the constraints by different powers of y. + for (i, cluster) in value_part_clusters.iter_mut().enumerate() { + if need_to_compute(part_idx, i) && cluster_last_constraint_idx[i] > 0 { + let y_power = y_powers[constraint_idx - cluster_last_constraint_idx[i]]; + parallelize( + &mut cluster[compute_part_idx_in_cluster(part_idx, i)], + |values, _| { + for value in values.iter_mut() { + *value = *value * y_power; + } + }, + ); + } + } + current_extended_omega *= extended_omega; + } + domain.lagrange_vecs_to_extended(value_part_clusters) + } + + fn compute_cluster_idx(degree: usize, max_cluster_idx: usize) -> usize { + let mut idx = (31 - (degree as u32).leading_zeros()) as usize; + if 1 << idx < degree { + idx = idx + 1; } - values + std::cmp::min(max_cluster_idx, idx) } } @@ -783,19 +1134,17 @@ impl GraphEvaluator { } } - #[allow(clippy::too_many_arguments)] pub fn evaluate( &self, data: &mut EvaluationData, - fixed: &[Polynomial], - advice: &[Polynomial], - instance: &[Polynomial], + fixed: &[Option>], + advice: &[Option>], + instance: &[Option>], challenges: &[C::ScalarExt], + y_powers: &[C::ScalarExt], beta: &C::ScalarExt, gamma: &C::ScalarExt, theta: &C::ScalarExt, - y: &C::ScalarExt, - previous_value: &C::ScalarExt, idx: usize, rot_scale: i32, isize: i32, @@ -818,8 +1167,7 @@ impl GraphEvaluator { beta, gamma, theta, - y, - previous_value, + y_powers, ); } @@ -864,7 +1212,7 @@ pub fn evaluate( }, &|challenge| challenges[challenge.index()], &|a| -a, - &|a, b| a + b, + &|a, b| a + &b, &|a, b| a * b, &|a, scalar| a * scalar, ); diff --git a/halo2_backend/src/plonk/keygen.rs b/halo2_backend/src/plonk/keygen.rs index 7d3bf8c1b4..b5c05de8a8 100644 --- a/halo2_backend/src/plonk/keygen.rs +++ b/halo2_backend/src/plonk/keygen.rs @@ -115,11 +115,6 @@ where }) .collect(); - let fixed_cosets = fixed_polys - .iter() - .map(|poly| vk.domain.coeff_to_extended(poly.clone())) - .collect(); - let permutation_pk = permutation::keygen::Assembly::new_from_assembly_mid( params.n() as usize, &cs.permutation, @@ -133,7 +128,6 @@ where let mut l0 = vk.domain.empty_lagrange(); l0[0] = C::Scalar::ONE; let l0 = vk.domain.lagrange_to_coeff(l0); - let l0 = vk.domain.coeff_to_extended(l0); // Compute l_blind(X) which evaluates to 1 for each blinding factor row // and 0 otherwise over the domain. @@ -141,19 +135,15 @@ where for evaluation in l_blind[..].iter_mut().rev().take(vk.cs.blinding_factors()) { *evaluation = C::Scalar::ONE; } - let l_blind = vk.domain.lagrange_to_coeff(l_blind); - let l_blind = vk.domain.coeff_to_extended(l_blind); // Compute l_last(X) which evaluates to 1 on the first inactive row (just // before the blinding factors) and 0 otherwise over the domain - let mut l_last = vk.domain.empty_lagrange(); + let mut l_last = vk.domain.empty_coeff(); l_last[params.n() as usize - vk.cs.blinding_factors() - 1] = C::Scalar::ONE; - let l_last = vk.domain.lagrange_to_coeff(l_last); - let l_last = vk.domain.coeff_to_extended(l_last); // Compute l_active_row(X) let one = C::Scalar::ONE; - let mut l_active_row = vk.domain.empty_extended(); + let mut l_active_row = vk.domain.empty_coeff(); parallelize(&mut l_active_row, |values, start| { for (i, value) in values.iter_mut().enumerate() { let idx = i + start; @@ -177,7 +167,6 @@ where .map(Polynomial::new_lagrange_from_vec) .collect(), fixed_polys, - fixed_cosets, permutation: permutation_pk, ev, }) diff --git a/halo2_backend/src/plonk/permutation.rs b/halo2_backend/src/plonk/permutation.rs index ac2d6fc302..2d67695fcb 100644 --- a/halo2_backend/src/plonk/permutation.rs +++ b/halo2_backend/src/plonk/permutation.rs @@ -2,11 +2,13 @@ use crate::{ arithmetic::CurveAffine, - helpers::{polynomial_slice_byte_length, read_polynomial_vec, write_polynomial_slice}, - poly::{Coeff, ExtendedLagrangeCoeff, LagrangeCoeff, Polynomial}, + helpers::{ + polynomial_slice_byte_length, read_polynomial_vec, write_polynomial_slice, + SerdeCurveAffine, SerdePrimeField, + }, + poly::{Coeff, LagrangeCoeff, Polynomial}, SerdeFormat, }; -use halo2_common::helpers::{SerdeCurveAffine, SerdePrimeField}; pub use halo2_common::plonk::permutation::Argument; use std::io; @@ -63,8 +65,7 @@ impl VerifyingKey { #[derive(Clone, Debug)] pub(crate) struct ProvingKey { permutations: Vec>, - polys: Vec>, - pub(super) cosets: Vec>, + pub(crate) polys: Vec>, } impl ProvingKey @@ -75,11 +76,9 @@ where pub(super) fn read(reader: &mut R, format: SerdeFormat) -> io::Result { let permutations = read_polynomial_vec(reader, format)?; let polys = read_polynomial_vec(reader, format)?; - let cosets = read_polynomial_vec(reader, format)?; Ok(ProvingKey { permutations, polys, - cosets, }) } @@ -91,7 +90,6 @@ where ) -> io::Result<()> { write_polynomial_slice(&self.permutations, writer, format)?; write_polynomial_slice(&self.polys, writer, format)?; - write_polynomial_slice(&self.cosets, writer, format)?; Ok(()) } } @@ -99,8 +97,6 @@ where impl ProvingKey { /// Gets the total number of bytes in the serialization of `self` pub(super) fn bytes_length(&self) -> usize { - polynomial_slice_byte_length(&self.permutations) - + polynomial_slice_byte_length(&self.polys) - + polynomial_slice_byte_length(&self.cosets) + polynomial_slice_byte_length(&self.permutations) + polynomial_slice_byte_length(&self.polys) } } diff --git a/halo2_backend/src/plonk/permutation/keygen.rs b/halo2_backend/src/plonk/permutation/keygen.rs index a4a6ee6cdc..b2eaeb64ea 100644 --- a/halo2_backend/src/plonk/permutation/keygen.rs +++ b/halo2_backend/src/plonk/permutation/keygen.rs @@ -380,7 +380,7 @@ pub(crate) fn build_pk<'params, C: CurveAffine, P: Params<'params, C>>( }); } - // Compute permutation polynomials, convert to coset form. + // Compute permutation polynomials. let mut permutations = vec![domain.empty_lagrange(); p.columns.len()]; { parallelize(&mut permutations, |o, start| { @@ -405,21 +405,9 @@ pub(crate) fn build_pk<'params, C: CurveAffine, P: Params<'params, C>>( }); } - let mut cosets = vec![domain.empty_extended(); p.columns.len()]; - { - parallelize(&mut cosets, |o, start| { - for (x, coset) in o.iter_mut().enumerate() { - let i = start + x; - let poly = polys[i].clone(); - *coset = domain.coeff_to_extended(poly); - } - }); - } - ProvingKey { permutations, polys, - cosets, } } diff --git a/halo2_backend/src/plonk/permutation/prover.rs b/halo2_backend/src/plonk/permutation/prover.rs index fda5a21156..4f96cfd7f5 100644 --- a/halo2_backend/src/plonk/permutation/prover.rs +++ b/halo2_backend/src/plonk/permutation/prover.rs @@ -12,7 +12,7 @@ use crate::{ plonk::{self, permutation::ProvingKey, ChallengeBeta, ChallengeGamma, ChallengeX}, poly::{ commitment::{Blind, Params}, - Coeff, ExtendedLagrangeCoeff, LagrangeCoeff, Polynomial, ProverQuery, + Coeff, LagrangeCoeff, Polynomial, ProverQuery, }, transcript::{EncodedChallenge, TranscriptWrite}, }; @@ -25,7 +25,6 @@ use halo2_middleware::poly::Rotation; pub(crate) struct CommittedSet { pub(crate) permutation_product_poly: Polynomial, - pub(crate) permutation_product_coset: Polynomial, permutation_product_blind: Blind, } @@ -175,10 +174,7 @@ pub(in crate::plonk) fn permutation_commit< let permutation_product_commitment_projective = params.commit_lagrange(&z, blind); let permutation_product_blind = blind; let z = domain.lagrange_to_coeff(z); - let permutation_product_poly = z.clone(); - - let permutation_product_coset = domain.coeff_to_extended(z.clone()); - + let permutation_product_poly = z; let permutation_product_commitment = permutation_product_commitment_projective.to_affine(); // Hash the permutation product commitment @@ -186,7 +182,6 @@ pub(in crate::plonk) fn permutation_commit< sets.push(CommittedSet { permutation_product_poly, - permutation_product_coset, permutation_product_blind, }); } diff --git a/halo2_backend/src/plonk/prover.rs b/halo2_backend/src/plonk/prover.rs index 8d5c848b27..c113b8b4ae 100644 --- a/halo2_backend/src/plonk/prover.rs +++ b/halo2_backend/src/plonk/prover.rs @@ -581,7 +581,6 @@ impl< *gamma, *theta, &lookups, - &shuffles, &permutations, ); diff --git a/halo2_backend/src/poly/domain.rs b/halo2_backend/src/poly/domain.rs index dabc797da4..feb74f0bf6 100644 --- a/halo2_backend/src/poly/domain.rs +++ b/halo2_backend/src/poly/domain.rs @@ -167,6 +167,32 @@ impl> EvaluationDomain { } } + /// Obtains a polynomial in ExtendedLagrange form when given a vector of + /// Lagrange polynomials with total size `extended_n`; panics if the + /// provided vector is the wrong length. + pub fn lagrange_vec_to_extended( + &self, + values: Vec>, + ) -> Polynomial { + assert_eq!(values.len(), (self.extended_len() >> self.k) as usize); + assert_eq!(values[0].len(), self.n as usize); + + // transpose the values in parallel + let mut transposed = vec![vec![F::ZERO; values.len()]; self.n as usize]; + values.into_iter().enumerate().for_each(|(i, p)| { + parallelize(&mut transposed, |transposed, start| { + for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { + transposed[i] = *p; + } + }); + }); + + Polynomial { + values: transposed.into_iter().flatten().collect(), + _marker: PhantomData, + } + } + /// Returns an empty (zero) polynomial in the coefficient basis pub fn empty_coeff(&self) -> Polynomial { Polynomial { @@ -175,6 +201,81 @@ impl> EvaluationDomain { } } + /// This takes us from an n-length coefficient vector into parts of the + /// extended evaluation domain. For example, for a polynomial with size n, + /// and an extended domain of size mn, we can compute all parts + /// independently, which are + /// `FFT(f(zeta * X), n)` + /// `FFT(f(zeta * extended_omega * X), n)` + /// ... + /// `FFT(f(zeta * extended_omega^{m-1} * X), n)` + pub fn coeff_to_extended_parts( + &self, + a: &Polynomial, + ) -> Vec> { + assert_eq!(a.values.len(), 1 << self.k); + + let num_parts = self.extended_len() >> self.k; + let mut extended_omega_factor = F::ONE; + (0..num_parts) + .map(|_| { + let part = self.coeff_to_extended_part(a.clone(), extended_omega_factor); + extended_omega_factor *= self.extended_omega; + part + }) + .collect() + } + + /// This takes us from several n-length coefficient vectors each into parts + /// of the extended evaluation domain. For example, for a polynomial with + /// size n, and an extended domain of size mn, we can compute all parts + /// independently, which are + /// `FFT(f(zeta * X), n)` + /// `FFT(f(zeta * extended_omega * X), n)` + /// ... + /// `FFT(f(zeta * extended_omega^{m-1} * X), n)` + pub fn batched_coeff_to_extended_parts( + &self, + a: &[Polynomial], + ) -> Vec>> { + assert_eq!(a[0].values.len(), 1 << self.k); + + let mut extended_omega_factor = F::ONE; + let num_parts = self.extended_len() >> self.k; + (0..num_parts) + .map(|_| { + let a_lagrange = a + .iter() + .map(|poly| self.coeff_to_extended_part(poly.clone(), extended_omega_factor)) + .collect(); + extended_omega_factor *= self.extended_omega; + a_lagrange + }) + .collect() + } + + /// This takes us from an n-length coefficient vector into a part of the + /// extended evaluation domain. For example, for a polynomial with size n, + /// and an extended domain of size mn, we can compute one of the m parts + /// separately, which is + /// `FFT(f(zeta * extended_omega_factor * X), n)` + /// where `extended_omega_factor` is `extended_omega^i` with `i` in `[0, m)`. + pub fn coeff_to_extended_part( + &self, + mut a: Polynomial, + extended_omega_factor: F, + ) -> Polynomial { + assert_eq!(a.values.len(), 1 << self.k); + + self.distribute_powers(&mut a.values, self.g_coset * extended_omega_factor); + best_fft(&mut a.values, self.omega, self.k); + + Polynomial { + values: a.values, + _marker: PhantomData, + } + } + /// Returns an empty (zero) polynomial in the Lagrange coefficient basis pub fn empty_lagrange(&self) -> Polynomial { Polynomial { @@ -243,6 +344,65 @@ impl> EvaluationDomain { } } + /// This takes us from the a list of lagrange-based polynomials with + /// different degrees and gets their extended lagrange-based summation. + pub fn lagrange_vecs_to_extended( + &self, + mut a: Vec>>, + ) -> Polynomial { + let mut result_poly = if a[a.len() - 1].len() == 1 << (self.extended_k - self.k) { + self.lagrange_vec_to_extended(a.pop().unwrap()) + } else { + self.empty_extended() + }; + + // Transform from each cluster of lagrange representations to coeff representations. + let mut ifft_divisor = self.extended_ifft_divisor; + let mut omega_inv = self.extended_omega_inv; + { + let mut i = a.last().unwrap().len() << self.k; + while i < (1 << self.extended_k) { + ifft_divisor = ifft_divisor + ifft_divisor; + omega_inv = omega_inv * omega_inv; + i = i << 1; + } + } + + let mut result = vec![F::ZERO; 1 << self.extended_k as usize]; + for (i, a_parts) in a.into_iter().enumerate().rev() { + // transpose the values in parallel + assert_eq!(1 << i, a_parts.len()); + let mut a_poly: Vec = { + let mut transposed = vec![vec![F::ZERO; a_parts.len()]; self.n as usize]; + a_parts.into_iter().enumerate().for_each(|(j, p)| { + parallelize(&mut transposed, |transposed, start| { + for (transposed, p) in transposed.iter_mut().zip(p.values[start..].iter()) { + transposed[j] = *p; + } + }); + }); + transposed.into_iter().flatten().collect() + }; + + Self::ifft(&mut a_poly, omega_inv, self.k + i as u32, ifft_divisor); + ifft_divisor = ifft_divisor + ifft_divisor; + omega_inv = omega_inv * omega_inv; + + parallelize(&mut result[0..(self.n << i) as usize], |result, start| { + for (other, current) in result.iter_mut().zip(a_poly[start..].iter()) { + other.add(current); + } + }); + } + best_fft(&mut result, self.extended_omega, self.extended_k); + parallelize(&mut result_poly.values, |values, start| { + for (value, other) in values.iter_mut().zip(result[start..].into_iter()) { + value.add(other); + } + }); + result_poly + } + /// Rotate the extended domain polynomial over the original domain. pub fn rotate_extended( &self, @@ -340,6 +500,19 @@ impl> EvaluationDomain { }); } + /// Given a slice of group elements `[a_0, a_1, a_2, ...]`, this returns + /// `[a_0, [c]a_1, [c^2]a_2, [c^3]a_3, [c^4]a_4, ...]`, + /// + fn distribute_powers(&self, a: &mut [F], c: F) { + parallelize(a, |a, index| { + let mut c_power = c.pow_vartime(&[index as u64, 0, 0, 0]); + for a in a { + *a *= c_power; + c_power = c_power * c; + } + }); + } + fn ifft(a: &mut [F], omega_inv: F, log_n: u32, divisor: F) { best_fft(a, omega_inv, log_n); parallelize(a, |a, _| { @@ -475,73 +648,236 @@ pub struct PinnedEvaluationDomain<'a, F: Field> { omega: &'a F, } -#[test] -fn test_rotate() { - use rand_core::OsRng; - - use crate::arithmetic::eval_polynomial; - use halo2curves::pasta::pallas::Scalar; - - let domain = EvaluationDomain::::new(1, 3); - let rng = OsRng; - - let mut poly = domain.empty_lagrange(); - assert_eq!(poly.len(), 8); - for value in poly.iter_mut() { - *value = Scalar::random(rng); - } - - let poly_rotated_cur = poly.rotate(Rotation::cur()); - let poly_rotated_next = poly.rotate(Rotation::next()); - let poly_rotated_prev = poly.rotate(Rotation::prev()); - - let poly = domain.lagrange_to_coeff(poly); - let poly_rotated_cur = domain.lagrange_to_coeff(poly_rotated_cur); - let poly_rotated_next = domain.lagrange_to_coeff(poly_rotated_next); - let poly_rotated_prev = domain.lagrange_to_coeff(poly_rotated_prev); - - let x = Scalar::random(rng); - - assert_eq!( - eval_polynomial(&poly[..], x), - eval_polynomial(&poly_rotated_cur[..], x) - ); - assert_eq!( - eval_polynomial(&poly[..], x * domain.omega), - eval_polynomial(&poly_rotated_next[..], x) - ); - assert_eq!( - eval_polynomial(&poly[..], x * domain.omega_inv), - eval_polynomial(&poly_rotated_prev[..], x) - ); -} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_rotate() { + use rand_core::OsRng; + + use crate::arithmetic::eval_polynomial; + use halo2curves::pasta::pallas::Scalar; + + let domain = EvaluationDomain::::new(1, 3); + let rng = OsRng; + + let mut poly = domain.empty_lagrange(); + assert_eq!(poly.len(), 8); + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + + let poly_rotated_cur = poly.rotate(Rotation::cur()); + let poly_rotated_next = poly.rotate(Rotation::next()); + let poly_rotated_prev = poly.rotate(Rotation::prev()); -#[test] -fn test_l_i() { - use rand_core::OsRng; + let poly = domain.lagrange_to_coeff(poly); + let poly_rotated_cur = domain.lagrange_to_coeff(poly_rotated_cur); + let poly_rotated_next = domain.lagrange_to_coeff(poly_rotated_next); + let poly_rotated_prev = domain.lagrange_to_coeff(poly_rotated_prev); - use crate::arithmetic::{eval_polynomial, lagrange_interpolate}; - use halo2curves::pasta::pallas::Scalar; - let domain = EvaluationDomain::::new(1, 3); + let x = Scalar::random(rng); - let mut l = vec![]; - let mut points = vec![]; - for i in 0..8 { - points.push(domain.omega.pow([i])); + assert_eq!( + eval_polynomial(&poly[..], x), + eval_polynomial(&poly_rotated_cur[..], x) + ); + assert_eq!( + eval_polynomial(&poly[..], x * domain.omega), + eval_polynomial(&poly_rotated_next[..], x) + ); + assert_eq!( + eval_polynomial(&poly[..], x * domain.omega_inv), + eval_polynomial(&poly_rotated_prev[..], x) + ); } - for i in 0..8 { - let mut l_i = vec![Scalar::zero(); 8]; - l_i[i] = Scalar::ONE; - let l_i = lagrange_interpolate(&points[..], &l_i[..]); - l.push(l_i); + + #[test] + fn test_l_i() { + use rand_core::OsRng; + + use crate::arithmetic::{eval_polynomial, lagrange_interpolate}; + use halo2curves::pasta::pallas::Scalar; + let domain = EvaluationDomain::::new(1, 3); + + let mut l = vec![]; + let mut points = vec![]; + for i in 0..8 { + points.push(domain.omega.pow([i])); + } + for i in 0..8 { + let mut l_i = vec![Scalar::zero(); 8]; + l_i[i] = Scalar::ONE; + let l_i = lagrange_interpolate(&points[..], &l_i[..]); + l.push(l_i); + } + + let x = Scalar::random(OsRng); + let xn = x.pow([8]); + + let evaluations = domain.l_i_range(x, xn, -7..=7); + for i in 0..8 { + assert_eq!(eval_polynomial(&l[i][..], x), evaluations[7 + i]); + assert_eq!(eval_polynomial(&l[(8 - i) % 8][..], x), evaluations[7 - i]); + } } - let x = Scalar::random(OsRng); - let xn = x.pow([8]); + #[test] + fn test_coeff_to_extended_part() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + + let domain = EvaluationDomain::::new(1, 3); + let rng = OsRng; + let mut poly = domain.empty_coeff(); + assert_eq!(poly.len(), 8); + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + + let want = domain.coeff_to_extended(poly.clone()); + let got = { + let parts = domain.coeff_to_extended_parts(&poly); + domain.lagrange_vec_to_extended(parts) + }; + assert_eq!(want.values, got.values); + } + + #[test] + fn bench_coeff_to_extended_parts() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + use std::time::Instant; + + let k = 20; + let domain = EvaluationDomain::::new(3, k); + let rng = OsRng; + let mut poly1 = domain.empty_coeff(); + assert_eq!(poly1.len(), 1 << k); + + for value in poly1.iter_mut() { + *value = Scalar::random(rng); + } + + let poly2 = poly1.clone(); + + let coeff_to_extended_timer = Instant::now(); + let _ = domain.coeff_to_extended(poly1); + println!( + "domain.coeff_to_extended time: {}s", + coeff_to_extended_timer.elapsed().as_secs_f64() + ); + + let coeff_to_extended_parts_timer = Instant::now(); + let _ = domain.coeff_to_extended_parts(&poly2); + println!( + "domain.coeff_to_extended_parts time: {}s", + coeff_to_extended_parts_timer.elapsed().as_secs_f64() + ); + } + + #[test] + fn test_lagrange_vecs_to_extended() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + + let rng = OsRng; + let domain = EvaluationDomain::::new(8, 3); + let mut poly_vec = vec![]; + let mut poly_lagrange_vecs = vec![]; + let mut want = domain.empty_extended(); + let mut omega = domain.extended_omega; + for i in (0..(domain.extended_k - domain.k + 1)).rev() { + let mut poly = vec![Scalar::zero(); (1 << i) * domain.n as usize]; + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + // poly under coeff representation. + poly_vec.push(poly.clone()); + // poly under lagrange vector representation. + let mut poly2 = poly.clone(); + best_fft(&mut poly2, omega, i + domain.k); + let transposed_poly: Vec> = (0..(1 << i)) + .map(|j| { + let mut p = domain.empty_lagrange(); + for k in 0..domain.n { + p[k as usize] = poly2[j + (k as usize) * (1 << i)]; + } + p + }) + .collect(); + poly_lagrange_vecs.push(transposed_poly); + // poly under extended representation. + poly.resize(domain.extended_len() as usize, Scalar::zero()); + best_fft(&mut poly, domain.extended_omega, domain.extended_k); + let poly = { + let mut p = domain.empty_extended(); + p.values = poly; + p + }; + want = want + &poly; + omega = omega * omega; + } + + poly_lagrange_vecs.reverse(); + let got = domain.lagrange_vecs_to_extended(poly_lagrange_vecs); + assert_eq!(want.values, got.values); + } + + #[test] + fn bench_lagrange_vecs_to_extended() { + use halo2curves::pasta::pallas::Scalar; + use rand_core::OsRng; + use std::time::Instant; + + let rng = OsRng; + let domain = EvaluationDomain::::new(8, 10); + let mut poly_vec = vec![]; + let mut poly_lagrange_vecs = vec![]; + let mut poly_extended_vecs = vec![]; + let mut omega = domain.extended_omega; + + for i in (0..(domain.extended_k - domain.k + 1)).rev() { + let mut poly = vec![Scalar::zero(); (1 << i) * domain.n as usize]; + for value in poly.iter_mut() { + *value = Scalar::random(rng); + } + // poly under coeff representation. + poly_vec.push(poly.clone()); + // poly under lagrange vector representation. + let mut poly2 = poly.clone(); + best_fft(&mut poly2, omega, i + domain.k); + let transposed_poly: Vec> = (0..(1 << i)) + .map(|j| { + let mut p = domain.empty_lagrange(); + for k in 0..domain.n { + p[k as usize] = poly2[j + (k as usize) * (1 << i)]; + } + p + }) + .collect(); + poly_lagrange_vecs.push(transposed_poly); + // poly under extended representation. + poly.resize(domain.extended_len() as usize, Scalar::zero()); + best_fft(&mut poly, domain.extended_omega, domain.extended_k); + let poly = { + let mut p = domain.empty_extended(); + p.values = poly; + p + }; + poly_extended_vecs.push(poly); + omega = omega * omega; + } - let evaluations = domain.l_i_range(x, xn, -7..=7); - for i in 0..8 { - assert_eq!(eval_polynomial(&l[i][..], x), evaluations[7 + i]); - assert_eq!(eval_polynomial(&l[(8 - i) % 8][..], x), evaluations[7 - i]); + let want_timer = Instant::now(); + let _ = poly_extended_vecs + .iter() + .fold(domain.empty_extended(), |acc, p| acc + p); + println!("want time: {}s", want_timer.elapsed().as_secs_f64()); + poly_lagrange_vecs.reverse(); + let got_timer = Instant::now(); + let _ = domain.lagrange_vecs_to_extended(poly_lagrange_vecs); + println!("got time: {}s", got_timer.elapsed().as_secs_f64()); } } diff --git a/halo2_common/src/plonk/circuit.rs b/halo2_common/src/plonk/circuit.rs index a0d0304126..8faedbc86d 100644 --- a/halo2_common/src/plonk/circuit.rs +++ b/halo2_common/src/plonk/circuit.rs @@ -11,6 +11,7 @@ use halo2_middleware::circuit::{ use halo2_middleware::ff::Field; use halo2_middleware::metadata; use halo2_middleware::poly::Rotation; +use itertools::Itertools; use sealed::SealedPhase; use std::collections::HashMap; use std::fmt::Debug; @@ -1179,6 +1180,72 @@ impl Expression { &|a, _| a, ) } + + /// Extracts all used instance columns in this expression + pub fn extract_instances(&self) -> Vec { + self.evaluate( + &|_| vec![], + &|_| vec![], + &|_| vec![], + &|_| vec![], + &|query| vec![query.column_index], + &|_| vec![], + &|a| a, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|a, _| a, + ) + } + + /// Extracts all used advice columns in this expression + pub fn extract_advices(&self) -> Vec { + self.evaluate( + &|_| vec![], + &|_| vec![], + &|_| vec![], + &|query| vec![query.column_index], + &|_| vec![], + &|_| vec![], + &|a| a, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|a, _| a, + ) + } + + /// Extracts all used fixed columns in this expression + pub fn extract_fixed(&self) -> Vec { + self.evaluate( + &|_| vec![], + &|_| vec![], + &|query| vec![query.column_index], + &|_| vec![], + &|_| vec![], + &|_| vec![], + &|a| a, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|mut a, b| { + a.extend(b); + a.into_iter().unique().collect() + }, + &|a, _| a, + ) + } } impl std::fmt::Debug for Expression {