From 7dccc5755a9a41d292131ad9455520bcd53c2567 Mon Sep 17 00:00:00 2001
From: Danny Willems <be.danny.willems@gmail.com>
Date: Thu, 20 Jun 2024 11:26:32 +0200
Subject: [PATCH] IVC/Columns: simply move circuit layout to top-level

This is useful to generate the doc, and verifying the code matches the
specification while reviewing/implementing.
---
 ivc/src/ivc/columns.rs | 422 +++++++++++++++++++++--------------------
 1 file changed, 212 insertions(+), 210 deletions(-)

diff --git a/ivc/src/ivc/columns.rs b/ivc/src/ivc/columns.rs
index a986afa72c..27b65e8bcc 100644
--- a/ivc/src/ivc/columns.rs
+++ b/ivc/src/ivc/columns.rs
@@ -1,4 +1,216 @@
+//! IVC circuit layout - Top level documentation outdated
+//!
+//! The IVC circuit is tiled vertically. We assume we have as many
+//! rows as we need: if we don't, we wrap around and continue.
+//!
+//! The biggest blocks are hashes and ECAdds, so other blocks may be wider.
+//!
+//! `N := N_IVC + N_APP` is the total number of columns in the circuit.
+//!
+//! Vertically stacked blocks are as follows:
+//!
+//!```text
+//!
+//!         Inputs:
+//!      Each point is 2 base field coordinates in 17 15-bit limbs
+//!       recomposed as 8 75-bit limbs
+//!       recomposed as 4 150-bit limbs.
+//!
+//!              34            8      4
+//!            Input1         R75   R150
+//!  1   |-----------------|-------|----|
+//!      |      C_L1       |       |    |
+//!      |      C_L2       |       |    |
+//!      |                 |       |    |
+//!      |      ...        |       |    |
+//!   N  |-----------------|       |    |
+//!      |      C_R1       |       |    |
+//!      |                 |       |    |
+//!      |      ...        |       |    |
+//!  2N  |-----------------|       |    |
+//!      |      C_O1       |       |    |
+//!      |                 |       |    |
+//!      |      ...        |       |    |
+//!  3N  |-----------------|-------|----|
+//!      0       ...     34*2    76    80
+//!
+//!
+//!                      Hashes (temporarily DISABLED)
+//!     (one hash at a row, passing data to the next one)
+//!     (for i∈N, the input row #i containing 4 150-bit elements
+//!      is processed by hash rows 2*i and 2*i+1)
+//!
+//!  1   |------------------------------------------|
+//!      |                                          |
+//!      |                                         .| . here is h_l
+//!  2N  |------------------------------------------|   must be equal to public input!
+//!      |                                          |                    (H_i in nova)
+//!      |                                         .| . here is h_r
+//!  4N  |------------------------------------------|
+//!      |                                          |
+//!      |                                         .| . here is h_o, equal to H_{i+1} in Nova
+//!  6N  |------------------------------------------|
+//!      |                                         .| r = h_lr = h(h_l,h_r)
+//!      |                                         .| ϕ = h_lro = h(r,h_o)
+//! 6N+2 |------------------------------------------|
+//!
+//!       TODO: we also need to squeeze challenges for
+//!       the right (strict) instance: β, γ, j (joint_combiner)
+//!
+//!       TODO: we can hash (x0,x1+b*2^150) instead of (x0,x1,y0,y1).
+//!
+//! Scalars block.
+//!
+//! Most of the r^2 and r^3 cells are /unused/, and this design can be
+//! much more optimal if r^2|r^3 elements come in a separate block.
+//! But the overhead is not big and it's a very easy layout, so
+//! keeping it for now.
+//!
+//!     constϕ
+//!         constr
+//!                ϕ^i             r^3·ϕ^i    ϕ^i        r*ϕ^i        r^2·ϕ^i_k    r^3·ϕ^i_k
+//!                      r*ϕ^i             in 17 limbs  in 17 limbs      ...         ...
+//!                           r^2·ϕ^i         each        each
+//!   1  |---|---|-----|-------|----|----|------------|------------|------------|------------|
+//!      | ϕ   r    ϕ     rϕ    r^2ϕ r^3ϕ|            |            |            |            |
+//!      | ϕ   r   ϕ^2   rϕ^2            |            |            |            |            |
+//!      | ϕ   r   ϕ^3   rϕ^3            |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!  i   |                               |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!      |                               |            |            |            |            |
+//!      |       ϕ^{N+1}                 |            |            |            |            |
+//!  N+1 |-------------------------------|------------|------------|------------|------------|
+//!       1    2   3   4      5      6   ...        6+17          6+2*17                    6+4*17
+//!
+//!
+//! We compute the following equations, where equations in "quotes" are
+//! what we /want/ to prove, and non-quoted is an equavilant version
+//! that we actually prove instead:
+//! - "C_{O,i} = C_{L,i} + r·C_{R,i}":
+//!   - C_{O,i} = C_{L,i} + C_{R',i}
+//!   - "C_{R',i} = r·C_{R,i}"
+//!     - bucket[(ϕ^i)_k] -= C_{R',i}
+//!     - bucket[(r·ϕ^i)_k] += C_{R,i}
+//! - "E_O = E_L + r·T_0 + r^2·T_1 + r^3·E_R":
+//!   - E_O = E_L + E_R'
+//!   - "E_R' = r·T_0 + r^2·T_1 + r^3·E_R"
+//!     - bucket[(ϕ^{n+1})_k] += E_R'
+//!     - bucket[(r·ϕ^{n+1})_k] += T_0
+//!     - bucket[(r^2·ϕ^{n+1})_k] += T_1
+//!     - bucket[(r^3·ϕ^{n+1})_k] += E_R
+//!
+//! Runtime access time is represented by ? because it's not known in advance.
+//!
+//! Output and input RAM invocations in the same row use the same coeff/memory index.
+//!
+//! TODO FIXME we need to write into /different/ buckets.
+//!
+//! FEC Additions, one per row, each one is ~230 columns:
+//!
+//!                  .           input #2 (looked up)                  .                  Output
+//!                  .                  Access        input#2          .     FEC ADD      access    output
+//!          input#1 .  Coeff/mem ix     Time         Value            .   computation     time     value
+//!   1   |-------------------------------------------------------------------------------|-----|------------|
+//!       | C_{R',1} | ϕ^1_0           |  ?  | bucket[ϕ^1_0]           |                  |  ?  | newbucket  |
+//!       | C_{R',2} | ϕ^2_0           |  ?  | bucket[ϕ^2_0]           |                  |  ?  | newbucket  |
+//!       |          |      ...        | ... |                         |                  | ... |            |
+//!       | C_{R',N} | ϕ^N_0           |  ?  | bucket[ϕ^N_0]           |                  |  ?  | newbucket  |
+//!       | C_{R',1} | ϕ^1_1           |  ?  | bucket[ϕ^0_1]           |                  |  ?  | newbucket  |
+//!       |          |      ...        | ... |                         |                  | ... |            |
+//!       | C_{R',i} | ϕ^i_k           |  ?  | bucket[ϕ^i_k]           |                  |  ?  | newbucket  |
+//!       |          |                 |     |                         |                  |     |            |
+//!       |          |      ...        | ... |                         |                  | ... |            |
+//!  17*N |--------------------------------------------------------------------------------------------------|
+//!       | C_{R,i}  | r·(ϕ^i_k)       |  ?  | bucket[r·(ϕ^i_k)]       |                  |  ?  | newbucket  |
+//!       |          |                 |     |                         |                  |     |            |
+//!       |          |      ...        | ... |                         |                  | ... |            |
+//!  34*N |--------------------------------------------------------------------------------------------------|
+//!       |-C_{R',i} |      -          |  -  | C_{L,i}                 |                  |  -  | C_{O,i}    |
+//!       |          |     ...         | ... |                         |                  | ... |            |
+//!  35*N |--------------------------------------------------------------------------------------------------|
+//!       | -E_R'    | ϕ^{n+1}_k       |  ?  | bucket[ϕ^{n+1}_k]       |                  |     | newbucket  |
+//!       |          |     ...         | ... |                         |                  | ... |            |
+//!       |  T_0     | r·(ϕ^{n+1})     |  ?  | bucket[r·(ϕ^{n+1})]     |                  |     | newbucket  |
+//!       |          |     ...         | ... |                         |                  | ... |            |
+//!       |  T_1     | r^2·(ϕ^{n+1}_k) |  ?  | bucket[r^2·(ϕ^{n+1}_k)] |                  |     | newbucket  |
+//!       |          |     ...         | ... |                         |                  | ... |            |
+//!       |  E_R     | r^3·(ϕ^{n+1}_k) |  ?  | bucket[r^3·(ϕ^{n+1}_k)] |                  |     | newbucket  |
+//!       |          |     ...         | ... |                         |                  | ... |            |
+//! 35*N+ |  E_L     |      -          |  -  |      E_R'               |                  |  -  |    E_O     |
+//! 4*17+ |--------------------------------------------------------------------------------------------------|
+//! 1
+//!
+//! TODO: add different challenges: β, γ, joint_combiner
+//!
+//! Challenges block.
+//!
+//!            relaxed
+//!                       strict
+//!                    (relaxed in-place)
+//!        r   α_{L,i}    α_{R}^i     α_{O,i}
+//!  1    |--|--------|-----------|-----------------------|
+//!       |  |        | α_R = h_R |                       |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//!       |  |        | α_R^i     | α_{L,i} + r·α_{R,i}^i |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//!       |  |        |           |                       |
+//! #chal |--|--------|-----------|-----------------------|
+//!
+//! #chal is the number of constraints. Our optimistic expectation is
+//! that it is around const*N for const < 3.
+//!
+//!
+//! "u" block. In the general form we want to prove
+//! u_O = u_L + r·u_R, but u_R = 0, so we prove
+//! u_O = u_L + r.
+//!
+//!     r    u_L       u_O = u_L + r
+//!    |--|--------|--------------------|
+//!    |--|--------|--------------------|
+//!
+//!
+//! 2^15 |---- --------------------------------------|
+//!```
+//!
+//!
+//! Assume that IVC circuit takess CELL cells, e.g. CELL = 10000*N.
+//! Then we can calculate N_IVC as dependency of N_APP in this way:
+//! ```text
+//!    N = N_APP + (CELL/2^15)*N
+//!    (1 - CELL/2^15)*N = N_APP
+//!    N = (1/(1 - CELL/2^15)) * N_APP = (2^15 / (2^15 - CELL)) * N_APP
+//!    N_IVC = (1/(1 - CELL/2^15) - 1) * N_APP = (2^15 / (2^15 - CELL) - 1) * N_APP
+//! ```
+//!
+//! --- (slightly) OUTDATED BELOW ---
+//!
+//! Counting cells:
+//! - Inputs:              2 * 17 * 3N = 102N
+//! - Inputs repacked 75:  2 * 4 * 3N = 24N
+//! - Inputs repacked 150: 2 * 2 * 3N = 12N
+//! - Hashes:              2 * 165 * 3N = 990N (max 4 * 165 * 3N if we add 165 constants to every call)
+//! - scalars:             4 N + 17 * 3 * N = 55 N
+//! - ECADDs:              230 * 35 * N = 8050N
+//! Total (CELL):         ~9233*N
+//!
+//!     ...which is less than 32k*N
+//!
+//! In our particular case, CELL = 9233, so
+//!    N_IVC = 0.39 N_APP
+//!
+//! Which means for e.g. 50 columns we'll need extra 20 of IVC.
+
 use super::{N_ADDITIONAL_WIT_COL_QUAD, N_LIMBS_XLARGE};
+
 use crate::poseidon_8_56_5_3_2::{
     bn254::{
         Column as IVCPoseidonColumn, NB_FULL_ROUND as IVC_POSEIDON_NB_FULL_ROUND,
@@ -61,216 +273,6 @@ pub fn block_height<const N_COL_TOTAL: usize, const N_CHALS: usize>(block_num: u
 
 pub const IVC_NB_TOTAL_FIXED_SELECTORS: usize = IVC_POSEIDON_NB_ROUND_CONSTANTS + N_BLOCKS;
 
-/// The IVC circuit is tiled vertically. We assume we have as many
-/// rows as we need: if we don't, we wrap around and continue.
-///
-/// The biggest blocks are hashes and ECAdds, so other blocks may be wider.
-///
-/// N := N_IVC + N_APP is the total number of columns in the circuit.
-///
-/// Vertically stacked blocks are as follows:
-///
-///```text
-///
-///         Inputs:
-///      Each point is 2 base field coordinates in 17 15-bit limbs
-///       recomposed as 8 75-bit limbs
-///       recomposed as 4 150-bit limbs.
-///
-///              34            8      4
-///            Input1         R75   R150
-///  1   |-----------------|-------|----|
-///      |      C_L1       |       |    |
-///      |      C_L2       |       |    |
-///      |                 |       |    |
-///      |      ...        |       |    |
-///   N  |-----------------|       |    |
-///      |      C_R1       |       |    |
-///      |                 |       |    |
-///      |      ...        |       |    |
-///  2N  |-----------------|       |    |
-///      |      C_O1       |       |    |
-///      |                 |       |    |
-///      |      ...        |       |    |
-///  3N  |-----------------|-------|----|
-///      0       ...     34*2    76    80
-///
-///
-///                      Hashes (temporarily DISABLED)
-///     (one hash at a row, passing data to the next one)
-///     (for i∈N, the input row #i containing 4 150-bit elements
-///      is processed by hash rows 2*i and 2*i+1)
-///
-///  1   |------------------------------------------|
-///      |                                          |
-///      |                                         .| . here is h_l
-///  2N  |------------------------------------------|   must be equal to public input!
-///      |                                          |                    (H_i in nova)
-///      |                                         .| . here is h_r
-///  4N  |------------------------------------------|
-///      |                                          |
-///      |                                         .| . here is h_o, equal to H_{i+1} in Nova
-///  6N  |------------------------------------------|
-///      |                                         .| r = h_lr = h(h_l,h_r)
-///      |                                         .| ϕ = h_lro = h(r,h_o)
-/// 6N+2 |------------------------------------------|
-///
-///       TODO: we also need to squeeze challenges for
-///       the right (strict) instance: β, γ, j (joint_combiner)
-///
-///       TODO: we can hash (x0,x1+b*2^150) instead of (x0,x1,y0,y1).
-///
-/// Scalars block.
-///
-/// Most of the r^2 and r^3 cells are /unused/, and this design can be
-/// much more optimal if r^2|r^3 elements come in a separate block.
-/// But the overhead is not big and it's a very easy layout, so
-/// keeping it for now.
-///
-///     constϕ
-///         constr
-///                ϕ^i             r^3·ϕ^i    ϕ^i        r*ϕ^i        r^2·ϕ^i_k    r^3·ϕ^i_k
-///                      r*ϕ^i             in 17 limbs  in 17 limbs      ...         ...
-///                           r^2·ϕ^i         each        each
-///   1  |---|---|-----|-------|----|----|------------|------------|------------|------------|
-///      | ϕ   r    ϕ     rϕ    r^2ϕ r^3ϕ|            |            |            |            |
-///      | ϕ   r   ϕ^2   rϕ^2            |            |            |            |            |
-///      | ϕ   r   ϕ^3   rϕ^3            |            |            |            |            |
-///      |                               |            |            |            |            |
-///      |                               |            |            |            |            |
-///      |                               |            |            |            |            |
-///  i   |                               |            |            |            |            |
-///      |                               |            |            |            |            |
-///      |                               |            |            |            |            |
-///      |                               |            |            |            |            |
-///      |       ϕ^{N+1}                 |            |            |            |            |
-///  N+1 |-------------------------------|------------|------------|------------|------------|
-///       1    2   3   4      5      6   ...        6+17          6+2*17                    6+4*17
-///
-///
-
-///
-/// We compute the following equations, where equations in "quotes" are
-/// what we /want/ to prove, and non-quoted is an equavilant version
-/// that we actually prove instead:
-/// - "C_{O,i} = C_{L,i} + r·C_{R,i}":
-///   - C_{O,i} = C_{L,i} + C_{R',i}
-///   - "C_{R',i} = r·C_{R,i}"
-///     - bucket[(ϕ^i)_k] -= C_{R',i}
-///     - bucket[(r·ϕ^i)_k] += C_{R,i}
-/// - "E_O = E_L + r·T_0 + r^2·T_1 + r^3·E_R":
-///   - E_O = E_L + E_R'
-///   - "E_R' = r·T_0 + r^2·T_1 + r^3·E_R"
-///     - bucket[(ϕ^{n+1})_k] += E_R'
-///     - bucket[(r·ϕ^{n+1})_k] += T_0
-///     - bucket[(r^2·ϕ^{n+1})_k] += T_1
-///     - bucket[(r^3·ϕ^{n+1})_k] += E_R
-///
-/// Runtime access time is represented by ? because it's not known in advance.
-///
-/// Output and input RAM invocations in the same row use the same coeff/memory index.
-///
-/// TODO FIXME we need to write into /different/ buckets.
-///
-/// FEC Additions, one per row, each one is ~230 columns:
-///
-///                  .           input #2 (looked up)                  .                  Output
-///                  .                  Access        input#2          .     FEC ADD      access    output
-///          input#1 .  Coeff/mem ix     Time         Value            .   computation     time     value
-///   1   |-------------------------------------------------------------------------------|-----|------------|
-///       | C_{R',1} | ϕ^1_0           |  ?  | bucket[ϕ^1_0]           |                  |  ?  | newbucket  |
-///       | C_{R',2} | ϕ^2_0           |  ?  | bucket[ϕ^2_0]           |                  |  ?  | newbucket  |
-///       |          |      ...        | ... |                         |                  | ... |            |
-///       | C_{R',N} | ϕ^N_0           |  ?  | bucket[ϕ^N_0]           |                  |  ?  | newbucket  |
-///       | C_{R',1} | ϕ^1_1           |  ?  | bucket[ϕ^0_1]           |                  |  ?  | newbucket  |
-///       |          |      ...        | ... |                         |                  | ... |            |
-///       | C_{R',i} | ϕ^i_k           |  ?  | bucket[ϕ^i_k]           |                  |  ?  | newbucket  |
-///       |          |                 |     |                         |                  |     |            |
-///       |          |      ...        | ... |                         |                  | ... |            |
-///  17*N |--------------------------------------------------------------------------------------------------|
-///       | C_{R,i}  | r·(ϕ^i_k)       |  ?  | bucket[r·(ϕ^i_k)]       |                  |  ?  | newbucket  |
-///       |          |                 |     |                         |                  |     |            |
-///       |          |      ...        | ... |                         |                  | ... |            |
-///  34*N |--------------------------------------------------------------------------------------------------|
-///       |-C_{R',i} |      -          |  -  | C_{L,i}                 |                  |  -  | C_{O,i}    |
-///       |          |     ...         | ... |                         |                  | ... |            |
-///  35*N |--------------------------------------------------------------------------------------------------|
-///       | -E_R'    | ϕ^{n+1}_k       |  ?  | bucket[ϕ^{n+1}_k]       |                  |     | newbucket  |
-///       |          |     ...         | ... |                         |                  | ... |            |
-///       |  T_0     | r·(ϕ^{n+1})     |  ?  | bucket[r·(ϕ^{n+1})]     |                  |     | newbucket  |
-///       |          |     ...         | ... |                         |                  | ... |            |
-///       |  T_1     | r^2·(ϕ^{n+1}_k) |  ?  | bucket[r^2·(ϕ^{n+1}_k)] |                  |     | newbucket  |
-///       |          |     ...         | ... |                         |                  | ... |            |
-///       |  E_R     | r^3·(ϕ^{n+1}_k) |  ?  | bucket[r^3·(ϕ^{n+1}_k)] |                  |     | newbucket  |
-///       |          |     ...         | ... |                         |                  | ... |            |
-/// 35*N+ |  E_L     |      -          |  -  |      E_R'               |                  |  -  |    E_O     |
-/// 4*17+ |--------------------------------------------------------------------------------------------------|
-/// 1
-///
-/// TODO: add different challenges: β, γ, joint_combiner
-///
-/// Challenges block.
-///
-///            relaxed
-///                       strict
-///                    (relaxed in-place)
-///        r   α_{L,i}    α_{R}^i     α_{O,i}
-///  1    |--|--------|-----------|-----------------------|
-///       |  |        | α_R = h_R |                       |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-///       |  |        | α_R^i     | α_{L,i} + r·α_{R,i}^i |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-///       |  |        |           |                       |
-/// #chal |--|--------|-----------|-----------------------|
-///
-/// #chal is the number of constraints. Our optimistic expectation is
-/// that it is around const*N for const < 3.
-///
-///
-/// "u" block. In the general form we want to prove
-/// u_O = u_L + r·u_R, but u_R = 0, so we prove
-/// u_O = u_L + r.
-///
-///     r    u_L       u_O = u_L + r
-///    |--|--------|--------------------|
-///    |--|--------|--------------------|
-///
-///
-/// 2^15 |---- --------------------------------------|
-///```
-///
-///
-/// Assume that IVC circuit takess CELL cells, e.g. CELL = 10000*N.
-/// Then we can calculate N_IVC as dependency of N_APP in this way:
-///    N = N_APP + (CELL/2^15)*N
-///    (1 - CELL/2^15)*N = N_APP
-///    N = (1/(1 - CELL/2^15)) * N_APP = (2^15 / (2^15 - CELL)) * N_APP
-///    N_IVC = (1/(1 - CELL/2^15) - 1) * N_APP = (2^15 / (2^15 - CELL) - 1) * N_APP
-///
-///
-/// --- (slightly) OUTDATED BELOW ---
-///
-/// Counting cells:
-/// - Inputs:              2 * 17 * 3N = 102N
-/// - Inputs repacked 75:  2 * 4 * 3N = 24N
-/// - Inputs repacked 150: 2 * 2 * 3N = 12N
-/// - Hashes:              2 * 165 * 3N = 990N (max 4 * 165 * 3N if we add 165 constants to every call)
-/// - scalars:             4 N + 17 * 3 * N = 55 N
-/// - ECADDs:              230 * 35 * N = 8050N
-/// Total (CELL):         ~9233*N
-///
-///     ...which is less than 32k*N
-///
-/// In our particular case, CELL = 9233, so
-///    N_IVC = 0.39 N_APP
-///
-/// Which means for e.g. 50 columns we'll need extra 20 of IVC.
-
 // NB: We can reuse hash constants.
 // TODO: Can we pass just one coordinate and sign (x, sign) instead of (x,y) for hashing?
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]