Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Optimise grand product computation round based on active ranges #10460

Merged
merged 8 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ template <class Curve> class CommitmentKey {
*/
Commitment commit(PolynomialSpan<const Fr> polynomial)
{
PROFILE_THIS();
PROFILE_THIS_NAME("commit");
// We must have a power-of-2 SRS points *after* subtracting by start_index.
size_t dyadic_poly_size = numeric::round_up_power_2(polynomial.size());
// Because pippenger prefers a power-of-2 size, we must choose a starting index for the points so that we don't
Expand Down Expand Up @@ -133,7 +133,7 @@ template <class Curve> class CommitmentKey {
*/
Commitment commit_sparse(PolynomialSpan<const Fr> polynomial)
{
PROFILE_THIS();
PROFILE_THIS_NAME("commit_sparse");
const size_t poly_size = polynomial.size();
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

Expand Down Expand Up @@ -204,21 +204,24 @@ template <class Curve> class CommitmentKey {
* @return Commitment
*/
Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
const std::vector<std::pair<size_t, size_t>>& active_ranges)
const std::vector<std::pair<size_t, size_t>>& active_ranges,
size_t final_active_wire_idx = 0)
{
BB_OP_COUNT_TIME();
PROFILE_THIS_NAME("commit_structured");
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

// Percentage of nonzero coefficients beyond which we resort to the conventional commit method
constexpr size_t NONZERO_THRESHOLD = 75;

// Compute the number of non-zero coefficients in the polynomial
size_t total_num_scalars = 0;
for (const auto& range : active_ranges) {
total_num_scalars += range.second - range.first;
for (const auto& [first, second] : active_ranges) {
total_num_scalars += second - first;
}

// Compute "active" percentage of polynomial; resort to standard commit if appropriate
size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size();
size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
size_t percentage_nonzero = total_num_scalars * 100 / polynomial_size;
if (percentage_nonzero > NONZERO_THRESHOLD) {
return commit(polynomial);
}
Expand Down Expand Up @@ -259,9 +262,10 @@ template <class Curve> class CommitmentKey {
* @return Commitment
*/
Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
const std::vector<std::pair<size_t, size_t>>& active_ranges)
const std::vector<std::pair<size_t, size_t>>& active_ranges,
size_t final_active_wire_idx = 0)
{
BB_OP_COUNT_TIME();
PROFILE_THIS_NAME("commit_structured_with_nonzero_complement");
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

using BatchedAddition = BatchedAffineAddition<Curve>;
Expand All @@ -273,20 +277,21 @@ template <class Curve> class CommitmentKey {
// Note: the range from the end of the last active range to the end of the polynomial is excluded from the
// complement since the polynomial is assumed to be zero there.
std::vector<std::pair<size_t, size_t>> active_ranges_complement;
// Also compute total number of scalars in the constant regions
size_t total_num_complement_scalars = 0;
for (size_t i = 0; i < active_ranges.size() - 1; ++i) {
const size_t start = active_ranges[i].second;
const size_t end = active_ranges[i + 1].first;
active_ranges_complement.emplace_back(start, end);
}

// Compute the total number of scalars in the constant regions
size_t total_num_complement_scalars = 0;
for (const auto& range : active_ranges_complement) {
total_num_complement_scalars += range.second - range.first;
if (end > start) {
active_ranges_complement.emplace_back(start, end);
total_num_complement_scalars += end - start;
}
}

size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
// Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size();
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial_size;

if (percentage_constant < CONSTANT_THRESHOLD) {
return commit(polynomial);
}
Expand All @@ -299,12 +304,11 @@ template <class Curve> class CommitmentKey {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
// performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
std::vector<G1> points;
points.reserve(2 * total_num_complement_scalars);
for (const auto& range : active_ranges_complement) {
const size_t start = 2 * range.first;
const size_t end = 2 * range.second;
for (size_t i = start; i < end; i += 2) {
points.emplace_back(point_table[i]);

points.reserve(total_num_complement_scalars);
for (const auto& [start, end] : active_ranges_complement) {
for (size_t i = start; i < end; i++) {
points.emplace_back(point_table[2 * i]);
}
}

Expand All @@ -313,17 +317,16 @@ template <class Curve> class CommitmentKey {
std::vector<Fr> unique_scalars;
std::vector<size_t> sequence_counts;
for (const auto& range : active_ranges_complement) {
if (range.second - range.first > 0) { // only ranges with nonzero length
unique_scalars.emplace_back(polynomial.span[range.first]);
sequence_counts.emplace_back(range.second - range.first);
}
unique_scalars.emplace_back(polynomial.span[range.first]);
sequence_counts.emplace_back(range.second - range.first);
}

// Reduce each sequence to a single point
auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);

// Compute the full commitment as the sum of the "active" region commitment and the constant region contribution
Commitment result = commit_structured(polynomial, active_ranges);
Commitment result = commit_structured(polynomial, active_ranges, final_active_wire_idx);

for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
result = result + point * scalar;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ template <typename Curve>
std::vector<typename BatchedAffineAddition<Curve>::G1> BatchedAffineAddition<Curve>::add_in_place(
const std::span<G1>& points, const std::vector<size_t>& sequence_counts)
{
PROFILE_THIS_NAME("BatchedAffineAddition::add_in_place");
// Instantiate scratch space for point addition denominators and their calculation
std::vector<Fq> scratch_space_vector(points.size());
std::span<Fq> scratch_space(scratch_space_vector);
Expand Down
3 changes: 2 additions & 1 deletion barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,10 @@ template <class T> void field<T>::batch_invert(field* coeffs, const size_t n) no
batch_invert(std::span{ coeffs, n });
}

// TODO(https://github.com/AztecProtocol/barretenberg/issues/1166)
template <class T> void field<T>::batch_invert(std::span<field> coeffs) noexcept
{
BB_OP_COUNT_TRACK_NAME("fr::batch_invert");
PROFILE_THIS_NAME("fr::batch_invert");
const size_t n = coeffs.size();

auto temporaries_ptr = std::static_pointer_cast<field[]>(get_mem_slab(n * sizeof(field)));
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ template <typename FF, typename CommitmentKey_> class ProvingKey_ {
// folded element by element.
std::vector<FF> public_inputs;

// Ranges of the form [start, end) over which the execution trace is "active"
// Ranges of the form [start, end) where witnesses have non-zero values (hence the execution trace is "active")
std::vector<std::pair<size_t, size_t>> active_block_ranges;

ProvingKey_() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct ExecutionTraceUsageTracker {
MegaTraceFixedBlockSizes fixed_sizes; // fixed size of each block prescribed by structuring
// Store active ranges based on the most current accumulator and those based on all but the most recently
// accumulated circuit. The former is needed for the combiner calculation and the latter for the perturbator.
// The ranges cover all areas in the trace where relations have nontrivial values.
std::vector<Range> active_ranges;
std::vector<Range> previous_active_ranges;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ namespace bb {
template <typename Flavor, typename GrandProdRelation>
void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
bb::RelationParameters<typename Flavor::FF>& relation_parameters,
size_t size_override = 0)
size_t size_override = 0,
std::vector<std::pair<size_t, size_t>> active_block_ranges = {})
{
PROFILE_THIS_NAME("compute_grand_product");

using FF = typename Flavor::FF;
using Polynomial = typename Flavor::Polynomial;
using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>;
Expand All @@ -84,22 +87,34 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
Polynomial numerator{ domain_size, domain_size };
Polynomial denominator{ domain_size, domain_size };

auto check_is_active = [&](size_t idx) {
if (active_block_ranges.empty()) {
return true;
}
return std::any_of(active_block_ranges.begin(), active_block_ranges.end(), [idx](const auto& range) {
return idx >= range.first && idx < range.second;
});
};

// Step (1)
// Populate `numerator` and `denominator` with the algebra described by Relation
FF gamma_fourth = relation_parameters.gamma.pow(4);
parallel_for(num_threads, [&](size_t thread_idx) {
typename Flavor::AllValues evaluations;
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): construction of evaluations is equivalent to
// calling get_row which creates full copies. avoid?
typename Flavor::AllValues row;
const size_t start = idx_bounds[thread_idx].first;
const size_t end = idx_bounds[thread_idx].second;
for (size_t i = start; i < end; ++i) {
for (auto [eval, full_poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) {
eval = full_poly.size() > i ? full_poly[i] : 0;
if (check_is_active(i)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose by avoiding unnecessary calls to get_row you've cut down most of the additional cost but there's more we could potentially do right? IIUC with this update we still go on to compute z_perm everywhere even in the "constant" ranges which could be avoided. Maybe that's a negligible cost compared to what you've handled here. One thought: maybe its not this easy but could we simply update the parallel_for loop that appears multiple times in this method to operate only over the active ranges? We shouldn't really even need to compute any intermediate values in the inactive regions. We would just populate the constant regions with the appropriate constant value at the end. Anyway, I'll leave it to you to decide what to do if anything.

Copy link
Contributor Author

@maramihali maramihali Dec 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, thanks for the comment, will have a go at it in a subsequent PR

// TODO(https://github.com/AztecProtocol/barretenberg/issues/940):consider avoiding get_row if possible.
row = full_polynomials.get_row(i);
numerator.at(i) =
GrandProdRelation::template compute_grand_product_numerator<Accumulator>(row, relation_parameters);
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
row, relation_parameters);
} else {
numerator.at(i) = gamma_fourth;
denominator.at(i) = gamma_fourth;
}
numerator.at(i) = GrandProdRelation::template compute_grand_product_numerator<Accumulator>(
evaluations, relation_parameters);
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
evaluations, relation_parameters);
}
});

Expand Down Expand Up @@ -163,6 +178,7 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials);
// We have a 'virtual' 0 at the start (as this is a to-be-shifted polynomial)
ASSERT(grand_product_polynomial.start_index() == 1);

parallel_for(num_threads, [&](size_t thread_idx) {
const size_t start = idx_bounds[thread_idx].first;
const size_t end = idx_bounds[thread_idx].second;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ template <typename FF_> class DatabusLookupRelationImpl {
auto& relation_parameters,
const size_t circuit_size)
{
PROFILE_THIS_NAME("Databus::compute_logderivative_inverse");
auto& inverse_polynomial = BusData<bus_idx, Polynomials>::inverses(polynomials);

size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ template <typename FF_> class LogDerivLookupRelationImpl {
auto& relation_parameters,
const size_t circuit_size)
{
PROFILE_THIS_NAME("Lookup::compute_logderivative_inverse");
auto& inverse_polynomial = get_inverse_polynomial(polynomials);

size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,8 @@ class MegaFlavor {
*/
void compute_logderivative_inverses(const RelationParameters<FF>& relation_parameters)
{
PROFILE_THIS_NAME("compute_logderivative_inverses");

// Compute inverses for conventional lookups
LogDerivLookupRelation<FF>::compute_logderivative_inverse(
this->polynomials, relation_parameters, this->circuit_size);
Expand Down Expand Up @@ -525,7 +527,7 @@ class MegaFlavor {

// Compute permutation grand product polynomial
compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
this->polynomials, relation_parameters, size_override);
this->polynomials, relation_parameters, size_override, this->active_block_ranges);
}

uint64_t estimate_memory()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ class UltraFlavor {
[[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); }
[[nodiscard]] AllValues get_row(const size_t row_idx) const
{
PROFILE_THIS();
PROFILE_THIS_NAME("UltraFlavor::get_row");
AllValues result;
for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) {
result_field = polynomial[row_idx];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ typename TraceToPolynomials<Flavor>::TraceData TraceToPolynomials<Flavor>::const
// otherwise, the next block starts immediately following the previous one
offset += block.get_fixed_size(is_structured);
}

return trace_data;
}

Expand Down
5 changes: 4 additions & 1 deletion barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
{
PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round");
// Compute the permutation grand product polynomial

proving_key->proving_key.compute_grand_product_polynomial(proving_key->relation_parameters,
proving_key->final_active_wire_idx + 1);

Expand All @@ -243,7 +244,9 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
if (proving_key->get_is_structured()) {
witness_commitments.z_perm =
proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement(
proving_key->proving_key.polynomials.z_perm, proving_key->proving_key.active_block_ranges);
proving_key->proving_key.polynomials.z_perm,
proving_key->proving_key.active_block_ranges,
proving_key->final_active_wire_idx + 1);
} else {
witness_commitments.z_perm =
proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.z_perm);
Expand Down
7 changes: 6 additions & 1 deletion barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// clang-format on
#include <utility>

#include "barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp"
#include "barretenberg/ultra_honk/decider_proving_key.hpp"

namespace bb {
Expand All @@ -40,16 +41,20 @@ template <IsUltraFlavor Flavor> class OinkProver {
std::shared_ptr<DeciderPK> proving_key;
std::shared_ptr<Transcript> transcript;
std::string domain_separator;
ExecutionTraceUsageTracker trace_usage_tracker;

typename Flavor::WitnessCommitments witness_commitments;
typename Flavor::CommitmentLabels commitment_labels;
using RelationSeparator = typename Flavor::RelationSeparator;

OinkProver(std::shared_ptr<DeciderPK> proving_key,
const std::shared_ptr<typename Flavor::Transcript>& transcript = std::make_shared<Transcript>(),
std::string domain_separator = "")
std::string domain_separator = "",
const ExecutionTraceUsageTracker& trace_usage_tracker = ExecutionTraceUsageTracker{})
: proving_key(proving_key)
, transcript(transcript)
, domain_separator(std::move(domain_separator))
, trace_usage_tracker(trace_usage_tracker)
{}

void prove();
Expand Down
Loading