From df2a1bbd9406ce5150143de696aa90e44975e5f2 Mon Sep 17 00:00:00 2001 From: Eduard S Date: Fri, 9 Feb 2024 16:28:00 +0000 Subject: [PATCH] feat: use peak memory estimation from Han --- zkevm-circuits/src/bin/stats/halo2_stats.rs | 373 ++++++++++++++++++++ zkevm-circuits/src/bin/stats/main.rs | 193 ++-------- 2 files changed, 394 insertions(+), 172 deletions(-) create mode 100644 zkevm-circuits/src/bin/stats/halo2_stats.rs diff --git a/zkevm-circuits/src/bin/stats/halo2_stats.rs b/zkevm-circuits/src/bin/stats/halo2_stats.rs new file mode 100644 index 00000000000..66844af05b1 --- /dev/null +++ b/zkevm-circuits/src/bin/stats/halo2_stats.rs @@ -0,0 +1,373 @@ +//! Utility functions and types to get circuit stats from any halo2 circuit + +use eth_types::Field; +use halo2_proofs::plonk::ConstraintSystem; +use std::collections::BTreeSet; + +// From Scroll https://github.com/scroll-tech/zkevm-circuits/blob/7d9bc181953cfc6e7baf82ff0ce651281fd70a8a/zkevm-circuits/src/util.rs#L275 +#[allow(dead_code)] +#[derive(Debug, Default)] +pub(crate) struct CircuitStats { + pub num_constraints: usize, + pub num_fixed_columns: usize, + pub num_lookups: usize, + pub num_shuffles: usize, + pub num_advice_columns: usize, + pub num_instance_columns: usize, + pub num_selectors: usize, + // num_simple_selectors: usize, + pub num_permutation_columns: usize, + pub degree: usize, + pub blinding_factors: usize, + pub num_challenges: usize, + pub max_phase: u8, + pub num_rotation: usize, + pub min_rotation: i32, + pub max_rotation: i32, + pub num_verification_ecmul: usize, + // Aux data to diff between records + num_advice_queries: usize, + num_gates: usize, +} + +impl CircuitStats { + // Peak memory analysis by Han: + // + // fn create_proof(params: &KZGParams, pk: &ProvingKey, circuit: &Circuit, instances: &[&[F]]) { + // Let: + // + // - k: log 2 of number of rows + // - n: `1 << k` + // - d: Degree of circuit + // - e: Extension magnitude, equal to `(d - 1).next_power_of_two()` + // - c_f: number of fixed columns + // - c_a: number of advice columns + // - c_i: number of instance columns + // - c_p: number of columns enabled with copy constraint + // - c_pg: number of grand product in permutation argument, equal to `div_ceil(c_p, d - 2)` + // - c_l: number of lookup argument + // + // The memory usage M.C and M.S stands for: + // + // - M.C: number of "n elliptic curve points" (with symbol ◯) + // - M.S: number of "n field elements" (with symbol △) + // - M.E: number of "e * n field elements" (with symbol ⬡) + // + // So the actual memory usage in terms of bytes will be: + // + // M = 32 * n * (2 * M.C + M.S + e * M.E) + // + // We'll ignore other values with sublinear amount to n. + // + // + // 0. In the beginning: + // + // `params` has: + // ◯ powers_of_tau + // ◯ ifft(powers_of_tau) + // + // M.C = 2 (+= 2) + // M.S = 0 + // M.E = 0 + // + // `pk` has: + // ⬡ l0 + // ⬡ l_last + // ⬡ l_active_row + // △ fixed_lagranges (c_f) + // △ fixed_monomials (c_f) + // ⬡ fixed_extended_lagranges (c_f) + // △ permutation_lagranges (c_p) + // △ permutation_monomials (c_p) + // ⬡ permutation_extended_lagranges (c_p) + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p (+= 2 * c_f + 2 * c_p) + // M.E = 3 + c_f + c_p (+= 3 + c_f + c_p) + // + // And let's ignore `circuit` + // + // + // ### 1. Pad instances as lagrange form and compute its monomial form. + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i (+= 2 * c_i) + // M.E = 3 + c_f + c_p + // ``` + // let instance_lagranges = instances.to_lagranges(); + // let instance_monomials = instance_lagranges.to_monomials(); + // ``` + // + // + // ### 2. Synthesize circuit and collect advice column values. + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i + c_a (+= c_a) + // M.E = 3 + c_f + c_p + // ``` + // let advice_lagranges = circuit.synthesize_all_phases(); + // ``` + // + // + // ### 3. Generate permuted input and table of lookup argument. + // For each lookup argument, we have: + // + // △ compressed_input_lagranges - cached for later computation + // △ permuted_input_lagranges + // △ permuted_input_monomials + // △ compressed_table_lagranges - cached for later computation + // △ permuted_table_lagranges + // △ permuted_table_monomials + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i + c_a + 6 * c_l (+= 6 * c_l) + // M.E = 3 + c_f + c_p + // ``` + // let ( + // compressed_input_lagranges, + // permuted_input_lagranges, + // permuted_input_monomials, + // compressed_table_lagranges, + // permuted_table_lagranges, + // permuted_table_monomials, + // ) = lookup_permuted() + // ``` + // + // + // ### 4. Generate grand products of permutation argument. + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i + c_a + 6 * c_l + c_pg (+= c_pg) + // M.E = 3 + c_f + c_p + c_pg (+= c_pg) + // ``` + // let ( + // perm_grand_product_monomials, + // perm_grand_product_extended_lagranges, + // ) = permutation_grand_products(); + // ``` + // + // + // ### 5. Generate grand products of lookup argument. + // And then drops unnecessary lagranges values. + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg (-= 3 * c_l) + // M.E = 3 + c_f + c_p + c_pg + // > let lookup_product_monomials = lookup_grand_products(); + // > drop(compressed_input_lagranges); + // > drop(permuted_input_lagranges); + // > drop(compressed_table_lagranges); + // > drop(permuted_table_lagranges); + // + // + // ### 6. Generate random polynomial. + // + // M.C = 2 + // M.S = 1 + 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg (+= 1) + // M.E = 3 + c_f + c_p + c_pg + // ``` + // let random_monomial = random(); + // ``` + // + // + // ### 7. Turn advice_lagranges into advice_monomials. + // ``` + // let advice_monomials = advice_lagranges.to_monomials(); + // drop(advice_lagranges); + // ``` + // + // + // ### 8. Generate necessary extended lagranges. + // + // M.C = 2 + // M.S = 1 + 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg + // M.E = 3 + c_f + c_p + c_pg + c_i + c_a (+= c_i + c_a) + // ``` + // let instances_extended_lagrnages = instances_monomials.to_extended_lagranges(); + // let advice_extended_lagrnages = advice_monomials.to_extended_lagranges(); + // ``` + // + // + // ### 9. While computing the quotient, these extended lagranges: + // + // ⬡ permuted_input_extended_lagranges + // ⬡ permuted_table_extended_lagranges + // ⬡ lookup_product_extended_lagranges + // + // of each lookup argument are generated on the fly and drop before next. + // + // And 1 extra quotient_extended_lagrange is created. So the peak memory: + // + // M.C = 2 + // M.S = 1 + 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg + // M.E = 4 + c_f + c_p + c_pg + c_i + c_a + 3 * (c_l > 0) (+= 3 * (c_l > 0) + 1) + // ``` + // let quotient_extended_lagrange = quotient_extended_lagrange(); + // ``` + // + // + // ### 10. After quotient is comuputed, drop all the other extended lagranges. + // + // M.C = 2 + // M.S = 1 + 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg + // M.E = 4 + c_f + c_p (-= c_pg + c_i + c_a + 3 * (c_l > 0)) + // drop(instances_extended_lagrnages) + // drop(advice_extended_lagrnages) + // drop(perm_grand_product_extended_lagranges) + // + // + // ### 11. Turn quotient_extended_lagrange into monomial form. + // And then cut int `d - 1` pieces. + // + // M.C = 2 + // M.S = 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg + d (+= d - 1) + // M.E = 3 + c_f + c_p (-= 1) + // ``` + // let quotient_monomials = quotient_monomials() + // drop(quotient_extended_lagrange) + // ``` + // + // + // ### 12. Evaluate and open all polynomial except instance ones. + // } + pub(crate) fn estimate_peak_mem(&self, k: u32) -> usize { + let field_bytes = 32; + let c_f = self.num_fixed_columns; + let c_a = self.num_advice_columns; + let c_i = self.num_instance_columns; + let c_p = self.num_permutation_columns; + let c_l = self.num_lookups; + let c_pg = (c_p + self.degree - 3) / (self.degree - 2); + let e = (self.degree - 1).next_power_of_two(); + // number of "n elliptic curve points" + let m_c = 2; + // number of "n field elements" + let m_s = 1 + 2 * c_f + 2 * c_p + 2 * c_i + c_a + 3 * c_l + c_pg; + // number of "e * n field elements" + let m_e = 4 + c_f + c_p + c_pg + c_i + c_a + 3 * (c_l > 0) as usize; + let unit = m_c + m_s + e * m_e; + unit * 2usize.pow(k) * field_bytes + } +} + +// Return the stats in `meta`, accounting only for the circuit delta from the last aggregated stats +// in `agg`. +// Adaptaed from Scroll https://github.com/scroll-tech/zkevm-circuits/blob/7d9bc181953cfc6e7baf82ff0ce651281fd70a8a/zkevm-circuits/src/util.rs#L294 +pub(crate) fn circuit_stats( + agg: &CircuitStats, + meta: &ConstraintSystem, +) -> CircuitStats { + let max_phase = meta + .advice_column_phase() + .iter() + .skip(agg.num_advice_columns) + .max() + .copied() + .unwrap_or_default(); + + let rotations = meta + .advice_queries() + .iter() + .skip(agg.num_advice_queries) + .map(|(_, q)| q.0) + .collect::>(); + + let num_fixed_columns = meta.num_fixed_columns() - agg.num_fixed_columns; + let num_lookups = meta.lookups().len() - agg.num_lookups; + let num_shuffles = meta.shuffles().len() - agg.num_shuffles; + let num_advice_columns = meta.num_advice_columns() - agg.num_advice_columns; + let num_instance_columns = meta.num_instance_columns() - agg.num_instance_columns; + let num_selectors = meta.num_selectors() - agg.num_selectors; + let num_permutation_columns = + meta.permutation().get_columns().len() - agg.num_permutation_columns; + + CircuitStats { + num_constraints: meta + .gates() + .iter() + .skip(agg.num_gates) + .map(|g| g.polynomials().len()) + .sum::(), + num_fixed_columns, + num_lookups, + num_shuffles, + num_advice_columns, + num_instance_columns, + num_selectors, + // num_simple_selectors: meta.num_simple_selectors(), + num_permutation_columns, + degree: meta.degree(), + blinding_factors: meta.blinding_factors(), + num_challenges: meta.num_challenges() - agg.num_challenges, + max_phase, + num_rotation: rotations.len(), + min_rotation: rotations.first().cloned().unwrap_or_default(), + max_rotation: rotations.last().cloned().unwrap_or_default(), + num_verification_ecmul: num_advice_columns + + num_instance_columns + + num_permutation_columns + + num_shuffles + + num_selectors + + num_fixed_columns + + 3 * num_lookups + + rotations.len(), + num_advice_queries: meta.advice_queries().len() - agg.num_advice_queries, + num_gates: meta.gates().len() - agg.num_gates, + } +} + +pub(crate) struct StatsCollection { + aggregate: bool, + shared_cs: ConstraintSystem, + pub(crate) agg: CircuitStats, + pub(crate) list: Vec<(String, CircuitStats)>, +} + +impl StatsCollection { + // With aggregate=true, all records are overwritten each time, leading to a single + // aggregate stats that represents the final circuit. + // With aggregate=false, each record is stored in a different entry with a name, and the + // ConstraintSystem is reset so that each entry is independent. + pub(crate) fn new(aggregate: bool) -> Self { + Self { + aggregate, + shared_cs: ConstraintSystem::default(), + agg: CircuitStats::default(), + list: Vec::new(), + } + } + + // Record a shared table + pub(crate) fn record_shared(&mut self, name: &str, meta: &mut ConstraintSystem) { + // Shared tables should only add columns, and nothing more + assert_eq!(meta.lookups().len(), 0); + assert_eq!(meta.shuffles().len(), 0); + assert_eq!(meta.permutation().get_columns().len(), 0); + assert_eq!(meta.degree(), 3); // 3 comes from the permutation argument + assert_eq!(meta.blinding_factors(), 5); // 5 is the minimum blinding factor + assert_eq!(meta.advice_queries().len(), 0); + assert_eq!(meta.gates().len(), 0); + + if self.aggregate { + self.agg = circuit_stats(&CircuitStats::default(), meta); + } else { + let stats = circuit_stats(&self.agg, meta); + self.agg = circuit_stats(&CircuitStats::default(), meta); + self.list.push((name.to_string(), stats)); + // Keep the ConstraintSystem with all the tables + self.shared_cs = meta.clone(); + } + } + + // Record a subcircuit + pub(crate) fn record(&mut self, name: &str, meta: &mut ConstraintSystem) { + if self.aggregate { + self.agg = circuit_stats(&CircuitStats::default(), meta); + } else { + let stats = circuit_stats(&self.agg, meta); + self.list.push((name.to_string(), stats)); + // Revert meta to the ConstraintSystem just with the tables + *meta = self.shared_cs.clone(); + } + } +} diff --git a/zkevm-circuits/src/bin/stats/main.rs b/zkevm-circuits/src/bin/stats/main.rs index 504024cfea3..688493073ee 100644 --- a/zkevm-circuits/src/bin/stats/main.rs +++ b/zkevm-circuits/src/bin/stats/main.rs @@ -1,20 +1,20 @@ //! Build with `--features="stats"` +mod halo2_stats; +mod helpers; + use bus_mapping::circuit_input_builder::FeatureConfig; use cli_table::{print_stdout, Cell, Style, Table}; use eth_types::{bytecode, evm_types::OpcodeId, ToWord}; use halo2_proofs::{ halo2curves::bn256::Fr, - plonk::{Circuit, ConstraintSystem}, + plonk::{Circuit, ConstraintSystem, Expression}, }; -use std::{array, collections::BTreeSet, iter}; -mod helpers; -use eth_types::Field; -use halo2_proofs::plonk::Expression; +use halo2_stats::StatsCollection; use helpers::{bytecode_prefix_op_big_rws, print_circuit_stats_by_states}; use itertools::Itertools; use mock::MOCK_ACCOUNTS; -use std::env; +use std::{array, env, iter}; use zkevm_circuits::{ bytecode_circuit::{BytecodeCircuitConfig, BytecodeCircuitConfigArgs}, copy_circuit::{CopyCircuitConfig, CopyCircuitConfigArgs}, @@ -226,155 +226,6 @@ fn get_exec_steps_occupancy() { ); } -// From Scroll https://github.com/scroll-tech/zkevm-circuits/blob/7d9bc181953cfc6e7baf82ff0ce651281fd70a8a/zkevm-circuits/src/util.rs#L275 -#[derive(Debug, Default)] -pub(crate) struct CircuitStats { - num_constraints: usize, - num_fixed_columns: usize, - num_lookups: usize, - num_shuffles: usize, - num_advice_columns: usize, - num_instance_columns: usize, - num_selectors: usize, - // num_simple_selectors: usize, - num_permutation_columns: usize, - degree: usize, - blinding_factors: usize, - num_challenges: usize, - max_phase: u8, - num_rotation: usize, - min_rotation: i32, - max_rotation: i32, - num_verification_ecmul: usize, - // Aux data to diff between records - num_advice_queries: usize, - num_gates: usize, -} - -// Return the stats in `meta`, accounting only for the circuit delta from the last aggregated stats -// in `agg`. -// Adaptaed from Scroll https://github.com/scroll-tech/zkevm-circuits/blob/7d9bc181953cfc6e7baf82ff0ce651281fd70a8a/zkevm-circuits/src/util.rs#L294 -pub(crate) fn circuit_stats( - agg: &CircuitStats, - meta: &ConstraintSystem, -) -> CircuitStats { - let max_phase = meta - .advice_column_phase() - .iter() - .skip(agg.num_advice_columns) - .max() - .copied() - .unwrap_or_default(); - - let rotations = meta - .advice_queries() - .iter() - .skip(agg.num_advice_queries) - .map(|(_, q)| q.0) - .collect::>(); - - let num_fixed_columns = meta.num_fixed_columns() - agg.num_fixed_columns; - let num_lookups = meta.lookups().len() - agg.num_lookups; - let num_shuffles = meta.shuffles().len() - agg.num_shuffles; - let num_advice_columns = meta.num_advice_columns() - agg.num_advice_columns; - let num_instance_columns = meta.num_instance_columns() - agg.num_instance_columns; - let num_selectors = meta.num_selectors() - agg.num_selectors; - let num_permutation_columns = - meta.permutation().get_columns().len() - agg.num_permutation_columns; - - let max_queries_per_advice = 0; // TODO - CircuitStats { - num_constraints: meta - .gates() - .iter() - .skip(agg.num_gates) - .map(|g| g.polynomials().len()) - .sum::(), - num_fixed_columns, - num_lookups, - num_shuffles, - num_advice_columns, - num_instance_columns, - num_selectors, - // num_simple_selectors: meta.num_simple_selectors(), - num_permutation_columns, - degree: meta.degree(), - blinding_factors: meta.blinding_factors(), - num_challenges: meta.num_challenges() - agg.num_challenges, - max_phase, - num_rotation: rotations.len(), - min_rotation: rotations.first().cloned().unwrap_or_default(), - max_rotation: rotations.last().cloned().unwrap_or_default(), - num_verification_ecmul: num_advice_columns - + num_instance_columns - + num_permutation_columns - + num_shuffles - + num_selectors - + num_fixed_columns - + 3 * num_lookups - + rotations.len(), - num_advice_queries: meta.advice_queries().len() - agg.num_advice_queries, - num_gates: meta.gates().len() - agg.num_gates, - } -} - -struct StatsCollection { - aggregate: bool, - shared_cs: ConstraintSystem, - agg: CircuitStats, - list: Vec<(String, CircuitStats)>, -} - -impl StatsCollection { - // With aggregate=true, all records are overwritten each time, leading to a single - // aggregate stats that represents the final circuit. - // With aggregate=false, each record is stored in a different entry with a name, and the - // ConstraintSystem is reset so that each entry is independent. - fn new(aggregate: bool) -> Self { - Self { - aggregate, - shared_cs: ConstraintSystem::default(), - agg: CircuitStats::default(), - list: Vec::new(), - } - } - - // Record a shared table - fn record_shared(&mut self, name: &str, meta: &mut ConstraintSystem) { - // Shared tables should only add columns, and nothing more - assert_eq!(meta.lookups().len(), 0); - assert_eq!(meta.shuffles().len(), 0); - assert_eq!(meta.permutation().get_columns().len(), 0); - // assert_eq!(meta.degree(), 3); // 3 comes from the permutation argument - assert_eq!(meta.blinding_factors(), 5); // 5 is the minimum blinding factor - - // assert_eq!(meta.advice_queries().len(), 0); - // assert_eq!(meta.gates().len(), 0); - - if self.aggregate { - self.agg = circuit_stats(&CircuitStats::default(), meta); - } else { - let stats = circuit_stats(&self.agg, meta); - self.agg = circuit_stats(&CircuitStats::default(), meta); - self.list.push((name.to_string(), stats)); - // Keep the ConstraintSystem with all the tables - self.shared_cs = meta.clone(); - } - } - - // Record a subcircuit - fn record(&mut self, name: &str, meta: &mut ConstraintSystem) { - if self.aggregate { - self.agg = circuit_stats(&CircuitStats::default(), meta); - } else { - let stats = circuit_stats(&self.agg, meta); - self.list.push((name.to_string(), stats)); - // Revert meta to the ConstraintSystem just with the tables - *meta = self.shared_cs.clone(); - } - } -} - #[allow(unused_variables)] fn record_stats( stats: &mut StatsCollection, @@ -497,7 +348,7 @@ fn record_stats( tx_table, rw_table, bytecode_table, - block_table: block_table.clone(), + block_table, copy_table, keccak_table, exp_table, @@ -523,28 +374,22 @@ fn general_subcircuit_stats() { .iter() .chain(iter::once(&("super".to_string(), stats_agg.agg))) { - let commitments = stats.num_advice_columns - + stats.num_permutation_columns // TODO: Adjust this, I think the permutation argument batches several permutation columns into one. - + stats.num_shuffles // Z_p // TODO: Are shuffles batched? - + 3 * stats.num_lookups; // Z, A', S' - - // Assume extended evaluation domain 2^k = 8 (k = 3), assuming max expression degree = 7. - let ext_k = 3; - // Assume k = 25. - let k = 26; // At 0.0139 gas/row this gives us 2^26 * 0.0139 = ~900k gas. For 30M gas we would need 33 // chunks. - let f_size_bytes = 32; // 4 limbs * 64 bits - let lagrange_mem_gb = - commitments * 2usize.pow(k) * 2usize.pow(ext_k) * f_size_bytes / 1024 / 1024 / 1024; + let k = 26; + let peak_mem_gb = stats.estimate_peak_mem(k) / 1024 / 1024 / 1024; table.push(vec![ name.cell(), + stats.num_constraints.cell(), + stats.num_rotation.cell(), + format!("{}/{}", stats.min_rotation, stats.max_rotation).cell(), + stats.num_fixed_columns.cell(), + stats.num_selectors.cell(), stats.num_advice_columns.cell(), stats.num_permutation_columns.cell(), stats.num_lookups.cell(), stats.degree.cell(), - commitments.cell(), - lagrange_mem_gb.cell(), + peak_mem_gb.cell(), ]); } let table = table @@ -552,12 +397,16 @@ fn general_subcircuit_stats() { .title( [ "circuit", + "constraints", + "rots", + "min/max(rots)", + "fix_cols", + "selectors", "adv_cols", "perm_cols", "lookups", "degree", - "commitments", - "mem(gb)", + "mem_gb", ] .iter() .map(|s| s.cell().bold(true)),