Skip to content

Commit

Permalink
Add polynomial benchmark infra, switch poly eval to horners methods (#…
Browse files Browse the repository at this point in the history
…114)

* Add benchmark for dense polynomial evaluate, rename fft bench

* Use horners method for polynomial evaluation

* Add parallel horners method support

* Refactor common logic
  • Loading branch information
ValarDragon authored Dec 7, 2020
1 parent 14c35fd commit 70ebfa6
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@
- #100 (ark-ff) Implement `batch_inverse_and_mul`
- #101 (ark-ff) Add `element(i: usize)` on the `Domain` trait.
- #107 (ark-serialize) Add an impl of `CanonicalSerialize/Deserialize` for `BTreeSet`.
- #114 (ark-poly) Significantly speedup and reduce memory usage of `DensePolynomial.evaluate`.
- #115 (ark-poly) Add parallel implementation to operations on `Evaluations`.
- #115 (ark-ff) Add parallel implementation of `batch_inversion`.

### Bug fixes
- #36 (ark-ec) In Short-Weierstrass curves, include an infinity bit in `ToConstraintField`.
- #107 (ark-serialize) Fix handling of `(de)serialize_uncompressed/unchecked` in various impls of `CanonicalSerialize/Deserialize`.
Expand Down
18 changes: 14 additions & 4 deletions poly-benches/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,24 @@ license = "MIT/Apache-2.0"
edition = "2018"
publish = false

[dev-dependencies]
[dependencies]
ark-poly = { path = "../poly" }
ark-ff = { path = "../ff" }
ark-test-curves = { path = "../test-curves", default-features = false, features = [ "bls12_381_scalar_field" ] }
ark-test-curves = { path = "../test-curves", default-features = false, features = [ "bls12_381_scalar_field", "mnt4_753_curve" ] }
criterion = "0.3.1"
rand = "0.7"
rayon = { version = "1", optional = true }

[features]
default = []
parallel = ["ark-ff/parallel", "rayon", "ark-poly/parallel" ]

[[bench]]
name = "fft"
path = "benches/fft.rs"
name = "groth16_fft"
path = "benches/groth16_fft.rs"
harness = false

[[bench]]
name = "dense_polynomial"
path = "benches/dense_polynomial.rs"
harness = false
47 changes: 47 additions & 0 deletions poly-benches/benches/dense_polynomial.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use rand;

extern crate criterion;

use ark_ff::Field;
use ark_poly::{polynomial::univariate::DensePolynomial, Polynomial, UVPolynomial};
use ark_test_curves::bls12_381::Fr as bls12_381_fr;
use criterion::BenchmarkId;
use criterion::Criterion;
use criterion::{criterion_group, criterion_main};

const POLY_LOG_MIN_SIZE: usize = 15;
const POLY_EVALUATE_MAX_DEGREE: usize = 1 << 17;

// returns vec![2^{POLY_LOG_MIN_SIZE}, ... 2^n], where n = ceil(log_2(max_degree))
fn size_range(max_degree: usize) -> Vec<usize> {
let mut to_ret = vec![1 << POLY_LOG_MIN_SIZE];
while *to_ret.last().unwrap() < max_degree {
to_ret.push(to_ret.last().unwrap() * 2);
}
to_ret
}

fn bench_poly_evaluate<F: Field>(c: &mut Criterion, name: &'static str) {
let mut group = c.benchmark_group(format!("{:?} - evaluate_polynomial", name));
for degree in size_range(POLY_EVALUATE_MAX_DEGREE).iter() {
group.bench_with_input(BenchmarkId::from_parameter(degree), degree, |b, &degree| {
// Per benchmark setup
let mut rng = &mut rand::thread_rng();
let poly = DensePolynomial::<F>::rand(degree, &mut rng);
b.iter(|| {
// Per benchmark iteration
let pt = F::rand(&mut rng);
poly.evaluate(&pt);
});
});
}
group.finish();
}

fn bench_bls12_381(c: &mut Criterion) {
let name = "bls12_381";
bench_poly_evaluate::<bls12_381_fr>(c, name);
}

criterion_group!(benches, bench_bls12_381);
criterion_main!(benches);
File renamed without changes.
64 changes: 54 additions & 10 deletions poly/src/polynomial/univariate/dense.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use ark_std::{
use ark_ff::{FftField, Field, Zero};
use rand::Rng;

#[cfg(feature = "parallel")]
use ark_std::cmp::max;
#[cfg(feature = "parallel")]
use rayon::prelude::*;

Expand Down Expand Up @@ -39,18 +41,60 @@ impl<F: Field> Polynomial<F> for DensePolynomial<F> {
fn evaluate(&self, point: &F) -> F {
if self.is_zero() {
return F::zero();
} else if point.is_zero() {
return self.coeffs[0];
}
let mut powers_of_point = vec![F::one()];
let mut cur = *point;
for _ in 0..self.degree() {
powers_of_point.push(cur);
cur *= point;
self.internal_evaluate(point)
}
}

#[cfg(feature = "parallel")]
// Set some minimum number of field elements to be worked on per thread
// to avoid per-thread costs dominating parallel execution time.
const MIN_ELEMENTS_PER_THREAD: usize = 16;

impl<F: Field> DensePolynomial<F> {
#[inline]
// Horner's method for polynomial evaluation
fn horner_evaluate(poly_coeffs: &[F], point: &F) -> F {
let mut result = F::zero();
let num_coeffs = poly_coeffs.len();
for i in (0..num_coeffs).rev() {
result *= point;
result += poly_coeffs[i];
}
assert_eq!(powers_of_point.len(), self.coeffs.len());
ark_std::cfg_into_iter!(powers_of_point)
.zip(&self.coeffs)
.map(|(power, coeff)| power * coeff)
.sum()
result
}

#[cfg(not(feature = "parallel"))]
fn internal_evaluate(&self, point: &F) -> F {
Self::horner_evaluate(&self.coeffs, point)
}

#[cfg(feature = "parallel")]
fn internal_evaluate(&self, point: &F) -> F {
// Horners method - parallel method
// compute the number of threads we will be using.
let num_cpus_available = rayon::current_num_threads();
let num_coeffs = self.coeffs.len();
let num_elem_per_thread = max(num_coeffs / num_cpus_available, MIN_ELEMENTS_PER_THREAD);

// run Horners method on each thread as follows:
// 1) Split up the coefficients across each thread evenly.
// 2) Do polynomial evaluation via horner's method for the thread's coefficeints
// 3) Scale the result point^{thread coefficient start index}
// Then obtain the final polynomial evaluation by summing each threads result.
let result = self
.coeffs
.par_chunks(num_elem_per_thread)
.enumerate()
.map(|(i, chunk)| {
let mut thread_result = Self::horner_evaluate(&chunk, point);
thread_result *= point.pow(&[(i * num_elem_per_thread) as u64]);
thread_result
})
.sum();
result
}
}

Expand Down

0 comments on commit 70ebfa6

Please sign in to comment.