AztecProtocol · maramihali · Dec 9, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp b/barretenberg/cpp/src/barretenberg/commitment_schemes/commitment_key.hpp
@@ -85,7 +85,7 @@ template <class Curve> class CommitmentKey {
      */
     Commitment commit(PolynomialSpan<const Fr> polynomial)
     {
-        PROFILE_THIS();
+        PROFILE_THIS_NAME("commit");
         // We must have a power-of-2 SRS points *after* subtracting by start_index.
         size_t dyadic_poly_size = numeric::round_up_power_2(polynomial.size());
         // Because pippenger prefers a power-of-2 size, we must choose a starting index for the points so that we don't
@@ -133,7 +133,7 @@ template <class Curve> class CommitmentKey {
      */
     Commitment commit_sparse(PolynomialSpan<const Fr> polynomial)
     {
-        PROFILE_THIS();
+        PROFILE_THIS_NAME("commit_sparse");
         const size_t poly_size = polynomial.size();
         ASSERT(polynomial.end_index() <= srs->get_monomial_size());
 
@@ -204,21 +204,24 @@ template <class Curve> class CommitmentKey {
      * @return Commitment
      */
     Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
-                                 const std::vector<std::pair<size_t, size_t>>& active_ranges)
+                                 const std::vector<std::pair<size_t, size_t>>& active_ranges,
+                                 size_t final_active_wire_idx = 0)
     {
-        BB_OP_COUNT_TIME();
+        PROFILE_THIS_NAME("commit_structured");
         ASSERT(polynomial.end_index() <= srs->get_monomial_size());
 
         // Percentage of nonzero coefficients beyond which we resort to the conventional commit method
         constexpr size_t NONZERO_THRESHOLD = 75;
 
+        // Compute the number of non-zero coefficients in the polynomial
         size_t total_num_scalars = 0;
-        for (const auto& range : active_ranges) {
-            total_num_scalars += range.second - range.first;
+        for (const auto& [first, second] : active_ranges) {
+            total_num_scalars += second - first;
         }
 
         // Compute "active" percentage of polynomial; resort to standard commit if appropriate
-        size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size();
+        size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
+        size_t percentage_nonzero = total_num_scalars * 100 / polynomial_size;
         if (percentage_nonzero > NONZERO_THRESHOLD) {
             return commit(polynomial);
         }
@@ -259,9 +262,10 @@ template <class Curve> class CommitmentKey {
      * @return Commitment
      */
     Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
-                                                         const std::vector<std::pair<size_t, size_t>>& active_ranges)
+                                                         const std::vector<std::pair<size_t, size_t>>& active_ranges,
+                                                         size_t final_active_wire_idx = 0)
     {
-        BB_OP_COUNT_TIME();
+        PROFILE_THIS_NAME("commit_structured_with_nonzero_complement");
         ASSERT(polynomial.end_index() <= srs->get_monomial_size());
 
         using BatchedAddition = BatchedAffineAddition<Curve>;
@@ -273,20 +277,21 @@ template <class Curve> class CommitmentKey {
         // Note: the range from the end of the last active range to the end of the polynomial is excluded from the
         // complement since the polynomial is assumed to be zero there.
         std::vector<std::pair<size_t, size_t>> active_ranges_complement;
+        // Also compute total number of scalars in the constant regions
+        size_t total_num_complement_scalars = 0;
         for (size_t i = 0; i < active_ranges.size() - 1; ++i) {
             const size_t start = active_ranges[i].second;
             const size_t end = active_ranges[i + 1].first;
-            active_ranges_complement.emplace_back(start, end);
-        }
-
-        // Compute the total number of scalars in the constant regions
-        size_t total_num_complement_scalars = 0;
-        for (const auto& range : active_ranges_complement) {
-            total_num_complement_scalars += range.second - range.first;
+            if (end > start) {
+                active_ranges_complement.emplace_back(start, end);
+                total_num_complement_scalars += end - start;
+            }
         }
 
+        size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
         // Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate
-        size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size();
+        size_t percentage_constant = total_num_complement_scalars * 100 / polynomial_size;
+
         if (percentage_constant < CONSTANT_THRESHOLD) {
             return commit(polynomial);
         }
@@ -299,12 +304,11 @@ template <class Curve> class CommitmentKey {
         // TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
         // performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
         std::vector<G1> points;
-        points.reserve(2 * total_num_complement_scalars);
-        for (const auto& range : active_ranges_complement) {
-            const size_t start = 2 * range.first;
-            const size_t end = 2 * range.second;
-            for (size_t i = start; i < end; i += 2) {
-                points.emplace_back(point_table[i]);
+
+        points.reserve(total_num_complement_scalars);
+        for (const auto& [start, end] : active_ranges_complement) {
+            for (size_t i = start; i < end; i++) {
+                points.emplace_back(point_table[2 * i]);
             }
         }
 
@@ -313,17 +317,16 @@ template <class Curve> class CommitmentKey {
         std::vector<Fr> unique_scalars;
         std::vector<size_t> sequence_counts;
         for (const auto& range : active_ranges_complement) {
-            if (range.second - range.first > 0) { // only ranges with nonzero length
-                unique_scalars.emplace_back(polynomial.span[range.first]);
-                sequence_counts.emplace_back(range.second - range.first);
-            }
+            unique_scalars.emplace_back(polynomial.span[range.first]);
+            sequence_counts.emplace_back(range.second - range.first);
         }
 
         // Reduce each sequence to a single point
         auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);
 
         // Compute the full commitment as the sum of the "active" region commitment and the constant region contribution
-        Commitment result = commit_structured(polynomial, active_ranges);
+        Commitment result = commit_structured(polynomial, active_ranges, final_active_wire_idx);
+
         for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
             result = result + point * scalar;
         }

diff --git a/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp b/barretenberg/cpp/src/barretenberg/ecc/batched_affine_addition/batched_affine_addition.cpp
@@ -10,6 +10,7 @@ template <typename Curve>
 std::vector<typename BatchedAffineAddition<Curve>::G1> BatchedAffineAddition<Curve>::add_in_place(
     const std::span<G1>& points, const std::vector<size_t>& sequence_counts)
 {
+    PROFILE_THIS_NAME("BatchedAffineAddition::add_in_place");
     // Instantiate scratch space for point addition denominators and their calculation
     std::vector<Fq> scratch_space_vector(points.size());
     std::span<Fq> scratch_space(scratch_space_vector);

diff --git a/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp b/barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp
@@ -404,9 +404,10 @@ template <class T> void field<T>::batch_invert(field* coeffs, const size_t n) no
     batch_invert(std::span{ coeffs, n });
 }
 
+// TODO(https://github.com/AztecProtocol/barretenberg/issues/1166)
 template <class T> void field<T>::batch_invert(std::span<field> coeffs) noexcept
 {
-    BB_OP_COUNT_TRACK_NAME("fr::batch_invert");
+    PROFILE_THIS_NAME("fr::batch_invert");
     const size_t n = coeffs.size();
 
     auto temporaries_ptr = std::static_pointer_cast<field[]>(get_mem_slab(n * sizeof(field)));

diff --git a/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp b/barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
@@ -123,7 +123,7 @@ template <typename FF, typename CommitmentKey_> class ProvingKey_ {
     // folded element by element.
     std::vector<FF> public_inputs;
 
-    // Ranges of the form [start, end) over which the execution trace is "active"
+    // Ranges of the form [start, end) where witnesses have non-zero values (hence the execution trace is "active")
     std::vector<std::pair<size_t, size_t>> active_block_ranges;
 
     ProvingKey_() = default;

diff --git a/.../cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp b/.../cpp/src/barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp
@@ -22,6 +22,7 @@ struct ExecutionTraceUsageTracker {
     MegaTraceFixedBlockSizes fixed_sizes; // fixed size of each block prescribed by structuring
     // Store active ranges based on the most current accumulator and those based on all but the most recently
     // accumulated circuit. The former is needed for the combiner calculation and the latter for the perturbator.
+    // The ranges cover all areas in the trace where relations have nontrivial values.
     std::vector<Range> active_ranges;
     std::vector<Range> previous_active_ranges;
 

diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/library/grand_product_library.hpp
@@ -56,8 +56,11 @@ namespace bb {
 template <typename Flavor, typename GrandProdRelation>
 void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
                            bb::RelationParameters<typename Flavor::FF>& relation_parameters,
-                           size_t size_override = 0)
+                           size_t size_override = 0,
+                           std::vector<std::pair<size_t, size_t>> active_block_ranges = {})
 {
+    PROFILE_THIS_NAME("compute_grand_product");
+
     using FF = typename Flavor::FF;
     using Polynomial = typename Flavor::Polynomial;
     using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>;
@@ -84,22 +87,34 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
     Polynomial numerator{ domain_size, domain_size };
     Polynomial denominator{ domain_size, domain_size };
 
+    auto check_is_active = [&](size_t idx) {
+        if (active_block_ranges.empty()) {
+            return true;
+        }
+        return std::any_of(active_block_ranges.begin(), active_block_ranges.end(), [idx](const auto& range) {
+            return idx >= range.first && idx < range.second;
+        });
+    };
+
     // Step (1)
     // Populate `numerator` and `denominator` with the algebra described by Relation
+    FF gamma_fourth = relation_parameters.gamma.pow(4);
     parallel_for(num_threads, [&](size_t thread_idx) {
-        typename Flavor::AllValues evaluations;
-        // TODO(https://github.com/AztecProtocol/barretenberg/issues/940): construction of evaluations is equivalent to
-        // calling get_row which creates full copies. avoid?
+        typename Flavor::AllValues row;
         const size_t start = idx_bounds[thread_idx].first;
         const size_t end = idx_bounds[thread_idx].second;
         for (size_t i = start; i < end; ++i) {
-            for (auto [eval, full_poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) {
-                eval = full_poly.size() > i ? full_poly[i] : 0;
+            if (check_is_active(i)) {
+                // TODO(https://github.com/AztecProtocol/barretenberg/issues/940):consider avoiding get_row if possible.
+                row = full_polynomials.get_row(i);
+                numerator.at(i) =
+                    GrandProdRelation::template compute_grand_product_numerator<Accumulator>(row, relation_parameters);
+                denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
+                    row, relation_parameters);
+            } else {
+                numerator.at(i) = gamma_fourth;
+                denominator.at(i) = gamma_fourth;
             }
-            numerator.at(i) = GrandProdRelation::template compute_grand_product_numerator<Accumulator>(
-                evaluations, relation_parameters);
-            denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
-                evaluations, relation_parameters);
         }
     });
 
@@ -163,6 +178,7 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
     auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials);
     // We have a 'virtual' 0 at the start (as this is a to-be-shifted polynomial)
     ASSERT(grand_product_polynomial.start_index() == 1);
+
     parallel_for(num_threads, [&](size_t thread_idx) {
         const size_t start = idx_bounds[thread_idx].first;
         const size_t end = idx_bounds[thread_idx].second;

diff --git a/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/databus_lookup_relation.hpp
@@ -231,6 +231,7 @@ template <typename FF_> class DatabusLookupRelationImpl {
                                               auto& relation_parameters,
                                               const size_t circuit_size)
     {
+        PROFILE_THIS_NAME("Databus::compute_logderivative_inverse");
         auto& inverse_polynomial = BusData<bus_idx, Polynomials>::inverses(polynomials);
 
         size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread

diff --git a/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp b/barretenberg/cpp/src/barretenberg/relations/logderiv_lookup_relation.hpp
@@ -156,6 +156,7 @@ template <typename FF_> class LogDerivLookupRelationImpl {
                                               auto& relation_parameters,
                                               const size_t circuit_size)
     {
+        PROFILE_THIS_NAME("Lookup::compute_logderivative_inverse");
         auto& inverse_polynomial = get_inverse_polynomial(polynomials);
 
         size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread

diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp
@@ -492,6 +492,8 @@ class MegaFlavor {
          */
         void compute_logderivative_inverses(const RelationParameters<FF>& relation_parameters)
         {
+            PROFILE_THIS_NAME("compute_logderivative_inverses");
+
             // Compute inverses for conventional lookups
             LogDerivLookupRelation<FF>::compute_logderivative_inverse(
                 this->polynomials, relation_parameters, this->circuit_size);
@@ -525,7 +527,7 @@ class MegaFlavor {
 
             // Compute permutation grand product polynomial
             compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
-                this->polynomials, relation_parameters, size_override);
+                this->polynomials, relation_parameters, size_override, this->active_block_ranges);
         }
 
         uint64_t estimate_memory()

diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/ultra_flavor.hpp
@@ -328,7 +328,7 @@ class UltraFlavor {
         [[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); }
         [[nodiscard]] AllValues get_row(const size_t row_idx) const
         {
-            PROFILE_THIS();
+            PROFILE_THIS_NAME("UltraFlavor::get_row");
             AllValues result;
             for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) {
                 result_field = polynomial[row_idx];

diff --git a/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp b/barretenberg/cpp/src/barretenberg/trace_to_polynomials/trace_to_polynomials.cpp
@@ -150,6 +150,7 @@ typename TraceToPolynomials<Flavor>::TraceData TraceToPolynomials<Flavor>::const
         // otherwise, the next block starts immediately following the previous one
         offset += block.get_fixed_size(is_structured);
     }
+
     return trace_data;
 }
 

diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp
@@ -235,6 +235,7 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
 {
     PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round");
     // Compute the permutation grand product polynomial
+
     proving_key->proving_key.compute_grand_product_polynomial(proving_key->relation_parameters,
                                                               proving_key->final_active_wire_idx + 1);
 
@@ -243,7 +244,9 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
         if (proving_key->get_is_structured()) {
             witness_commitments.z_perm =
                 proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement(
-                    proving_key->proving_key.polynomials.z_perm, proving_key->proving_key.active_block_ranges);
+                    proving_key->proving_key.polynomials.z_perm,
+                    proving_key->proving_key.active_block_ranges,
+                    proving_key->final_active_wire_idx + 1);
         } else {
             witness_commitments.z_perm =
                 proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.z_perm);

diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp
@@ -19,6 +19,7 @@
 // clang-format on
 #include <utility>
 
+#include "barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp"
 #include "barretenberg/ultra_honk/decider_proving_key.hpp"
 
 namespace bb {
@@ -40,16 +41,20 @@ template <IsUltraFlavor Flavor> class OinkProver {
     std::shared_ptr<DeciderPK> proving_key;
     std::shared_ptr<Transcript> transcript;
     std::string domain_separator;
+    ExecutionTraceUsageTracker trace_usage_tracker;
+
     typename Flavor::WitnessCommitments witness_commitments;
     typename Flavor::CommitmentLabels commitment_labels;
     using RelationSeparator = typename Flavor::RelationSeparator;
 
     OinkProver(std::shared_ptr<DeciderPK> proving_key,
                const std::shared_ptr<typename Flavor::Transcript>& transcript = std::make_shared<Transcript>(),
-               std::string domain_separator = "")
+               std::string domain_separator = "",
+               const ExecutionTraceUsageTracker& trace_usage_tracker = ExecutionTraceUsageTracker{})
         : proving_key(proving_key)
         , transcript(transcript)
         , domain_separator(std::move(domain_separator))
+        , trace_usage_tracker(trace_usage_tracker)
     {}
 
     void prove();
-Original file line number
+Diff line change
@@ Expand Up @@
             // otherwise, the next block starts immediately following the previous one
             offset += block.get_fixed_size(is_structured);
         }
         return trace_data;
     }
@@ Expand Down @@