Skip to content

Commit

Permalink
Merge pull request #463 from kroma-network/perf/use-msm-gpu-when-gene…
Browse files Browse the repository at this point in the history
…rating-groth16-proof

perf: use msm gpu when generating groth16 proof
  • Loading branch information
chokobole authored Jul 8, 2024
2 parents 5fde7a7 + 34705c8 commit e6e2a65
Show file tree
Hide file tree
Showing 19 changed files with 366 additions and 194 deletions.
6 changes: 6 additions & 0 deletions tachyon/base/openmp_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,18 @@

#if defined(TACHYON_HAS_OPENMP)
#define CONSTEXPR_IF_NOT_OPENMP
#define OMP_FOR _Pragma("omp for")
#define OMP_FOR_NOWAIT _Pragma("omp for nowait")
#define OMP_PARALLEL _Pragma("omp parallel")
#define OPENMP_PARALLEL_FOR(expr) _Pragma("omp parallel for") for (expr)
#define OPENMP_PARALLEL_NESTED_FOR(expr) \
_Pragma("omp parallel for collapse(2)") for (expr)
#define OPENMP_FOR(expr) _Pragma("omp for") for (expr)
#else
#define CONSTEXPR_IF_NOT_OPENMP constexpr
#define OMP_FOR
#define OMP_FOR_NOWAIT
#define OMP_PARALLEL
#define OPENMP_PARALLEL_FOR(expr) for (expr)
#define OPENMP_PARALLEL_NESTED_FOR(expr) for (expr)
#define OPENMP_FOR(expr) for (expr)
Expand Down
1 change: 0 additions & 1 deletion tachyon/math/elliptic_curves/msm/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ tachyon_cuda_unittest(
":variable_base_msm_gpu",
"//tachyon/device/gpu:scoped_mem_pool",
"//tachyon/device/gpu:scoped_stream",
"//tachyon/math/elliptic_curves/bn/bn254:g1",
"//tachyon/math/elliptic_curves/msm/test:variable_base_msm_test_set",
],
)
21 changes: 17 additions & 4 deletions tachyon/math/elliptic_curves/msm/algorithms/icicle/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,18 @@ load("//bazel:tachyon_cc.bzl", "tachyon_cuda_library")
package(default_visibility = ["//visibility:public"])

tachyon_cuda_library(
name = "icicle_msm_bn254",
srcs = if_cuda(["icicle_msm_bn254.cc"]),
hdrs = ["icicle_msm_bn254.h"],
name = "icicle_msm_bn254_g1",
srcs = if_cuda(["icicle_msm_bn254_g1.cc"]),
hdrs = ["icicle_msm_bn254_g1.h"],
deps = ["@icicle//:msm"] + if_cuda([
"@local_config_cuda//cuda:cudart_static",
]),
)

tachyon_cuda_library(
name = "icicle_msm_bn254_g2",
srcs = if_cuda(["icicle_msm_bn254_g2.cc"]),
hdrs = ["icicle_msm_bn254_g2.h"],
deps = ["@icicle//:msm"] + if_cuda([
"@local_config_cuda//cuda:cudart_static",
]),
Expand All @@ -16,9 +25,13 @@ tachyon_cuda_library(
name = "icicle_msm",
hdrs = ["icicle_msm.h"],
deps = [
":icicle_msm_bn254",
":icicle_msm_bn254_g1",
":icicle_msm_bn254_g2",
"//tachyon/base:bit_cast",
"//tachyon/device/gpu:gpu_device_functions",
"//tachyon/device/gpu:gpu_enums",
"//tachyon/math/elliptic_curves:points",
"//tachyon/math/elliptic_curves/bn/bn254:g1",
"//tachyon/math/elliptic_curves/bn/bn254:g2",
],
)
90 changes: 62 additions & 28 deletions tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@

#include "third_party/icicle/include/fields/id.h"

#include "tachyon/base/bit_cast.h"
#include "tachyon/device/gpu/gpu_device_functions.h"
#include "tachyon/device/gpu/gpu_enums.h"
#include "tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm_bn254.h"
#include "tachyon/math/elliptic_curves/bn/bn254/g1.h"
#include "tachyon/math/elliptic_curves/bn/bn254/g2.h"
#include "tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm_bn254_g1.h"
#include "tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm_bn254_g2.h"
#include "tachyon/math/elliptic_curves/projective_point.h"

namespace tachyon::math {
Expand Down Expand Up @@ -44,40 +48,70 @@ class IcicleMSM {
template <typename BaseContainer, typename ScalarContainer>
[[nodiscard]] bool Run(const BaseContainer& cpu_bases,
const ScalarContainer& cpu_scalars,
ProjectivePoint<Curve>* cpu_result) {
#if FIELD_ID != BN254
#error Only Bn254 is supported
#endif

using BaseField = typename Point::BaseField;
using BigInt = typename Point::BaseField::BigIntTy;

size_t bases_size = std::size(cpu_bases);
size_t scalars_size = std::size(cpu_scalars);

if (bases_size != scalars_size) {
LOG(ERROR) << "bases_size and scalars_size don't match";
return false;
}

::bn254::projective_t ret;
gpuError_t error = tachyon_bn254_msm_cuda(
reinterpret_cast<const ::bn254::scalar_t*>(std::data(cpu_scalars)),
reinterpret_cast<const ::bn254::affine_t*>(std::data(cpu_bases)),
bases_size, *config_, &ret);
if (error != gpuSuccess) return false;
*cpu_result = {BaseField(reinterpret_cast<const BigInt&>(ret.x)),
BaseField(reinterpret_cast<const BigInt&>(ret.y)),
BaseField(reinterpret_cast<const BigInt&>(ret.z))};
return true;
}
ProjectivePoint<Curve>* cpu_result);

private:
gpuMemPool_t mem_pool_ = nullptr;
gpuStream_t stream_ = nullptr;
std::unique_ptr<::msm::MSMConfig> config_;
};

template <>
template <typename BaseContainer, typename ScalarContainer>
bool IcicleMSM<bn254::G1AffinePoint>::Run(const BaseContainer& cpu_bases,
const ScalarContainer& cpu_scalars,
ProjectivePoint<Curve>* cpu_result) {
#if FIELD_ID != BN254
#error Only Bn254 is supported
#endif

size_t bases_size = std::size(cpu_bases);
size_t scalars_size = std::size(cpu_scalars);

if (bases_size != scalars_size) {
LOG(ERROR) << "bases_size and scalars_size don't match";
return false;
}

::bn254::projective_t ret;
gpuError_t error = tachyon_bn254_g1_msm_cuda(
reinterpret_cast<const ::bn254::scalar_t*>(std::data(cpu_scalars)),
reinterpret_cast<const ::bn254::affine_t*>(std::data(cpu_bases)),
bases_size, *config_, &ret);
if (error != gpuSuccess) return false;
ret = ::bn254::projective_t::to_montgomery(ret);
*cpu_result = base::bit_cast<ProjectivePoint<Curve>>(ret);
return true;
}

template <>
template <typename BaseContainer, typename ScalarContainer>
bool IcicleMSM<bn254::G2AffinePoint>::Run(const BaseContainer& cpu_bases,
const ScalarContainer& cpu_scalars,
ProjectivePoint<Curve>* cpu_result) {
#if FIELD_ID != BN254
#error Only Bn254 is supported
#endif

size_t bases_size = std::size(cpu_bases);
size_t scalars_size = std::size(cpu_scalars);

if (bases_size != scalars_size) {
LOG(ERROR) << "bases_size and scalars_size don't match";
return false;
}

::bn254::g2_projective_t ret;
gpuError_t error = tachyon_bn254_g2_msm_cuda(
reinterpret_cast<const ::bn254::scalar_t*>(std::data(cpu_scalars)),
reinterpret_cast<const ::bn254::g2_affine_t*>(std::data(cpu_bases)),
bases_size, *config_, &ret);
if (error != gpuSuccess) return false;
ret = ::bn254::g2_projective_t::to_montgomery(ret);
*cpu_result = base::bit_cast<ProjectivePoint<Curve>>(ret);
return true;
}

} // namespace tachyon::math

#endif // TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_H_

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm_bn254_g1.h"

#include "third_party/icicle/src/msm/msm.cu.cc" // NOLINT(build/include)

cudaError_t tachyon_bn254_g1_msm_cuda(const ::bn254::scalar_t* scalars,
const ::bn254::affine_t* points,
int msm_size, ::msm::MSMConfig& config,
::bn254::projective_t* out) {
return ::msm::msm(scalars, points, msm_size, config, out);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G1_H_
#define TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G1_H_

#include "third_party/icicle/include/curves/params/bn254.cu.h"
#include "third_party/icicle/include/msm/msm.cu.h"

extern "C" cudaError_t tachyon_bn254_g1_msm_cuda(
const ::bn254::scalar_t* scalars, const ::bn254::affine_t* points,
int msm_size, ::msm::MSMConfig& config, ::bn254::projective_t* out);

#endif // TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G1_H_
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include "tachyon/math/elliptic_curves/msm/algorithms/icicle/icicle_msm_bn254_g2.h"

#include "third_party/icicle/src/msm/msm.cu.cc" // NOLINT(build/include)

cudaError_t tachyon_bn254_g2_msm_cuda(const ::bn254::scalar_t* scalars,
const ::bn254::g2_affine_t* points,
int msm_size, ::msm::MSMConfig& config,
::bn254::g2_projective_t* out) {
return ::msm::msm(scalars, points, msm_size, config, out);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G2_H_
#define TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G2_H_

#include "third_party/icicle/include/curves/params/bn254.cu.h"
#include "third_party/icicle/include/msm/msm.cu.h"

extern "C" cudaError_t tachyon_bn254_g2_msm_cuda(
const ::bn254::scalar_t* scalars, const ::bn254::g2_affine_t* points,
int msm_size, ::msm::MSMConfig& config, ::bn254::g2_projective_t* out);

#endif // TACHYON_MATH_ELLIPTIC_CURVES_MSM_ALGORITHMS_ICICLE_ICICLE_MSM_BN254_G2_H_
38 changes: 24 additions & 14 deletions tachyon/math/elliptic_curves/msm/variable_base_msm_gpu_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "tachyon/device/gpu/gpu_enums.h"
#include "tachyon/device/gpu/scoped_mem_pool.h"
#include "tachyon/device/gpu/scoped_stream.h"
#include "tachyon/math/elliptic_curves/bn/bn254/g1.h"
#include "tachyon/math/elliptic_curves/msm/test/variable_base_msm_test_set.h"

namespace tachyon::math {
Expand All @@ -18,15 +17,18 @@ constexpr size_t kThreadNum = 32;

using namespace device;

template <typename Point>
class VariableMSMCorrectnessGpuTest : public testing::Test {
public:
using Curve = typename Point::Curve;

constexpr static size_t kLogCount = 10;
constexpr static size_t kCount = 1 << kLogCount;

static void SetUpTestSuite() {
bn254::G1Curve::Init();
Point::Curve::Init();

test_set_ = VariableBaseMSMTestSet<bn254::G1AffinePoint>::Random(
test_set_ = VariableBaseMSMTestSet<Point>::Random(
kCount, VariableBaseMSMMethod::kMSM);

expected_ = test_set_.answer.ToProjective();
Expand All @@ -35,17 +37,25 @@ class VariableMSMCorrectnessGpuTest : public testing::Test {
static void TearDownTestSuite() { GPU_MUST_SUCCESS(gpuDeviceReset(), ""); }

protected:
static VariableBaseMSMTestSet<bn254::G1AffinePoint> test_set_;
static bn254::G1ProjectivePoint expected_;
static VariableBaseMSMTestSet<Point> test_set_;
static ProjectivePoint<Curve> expected_;
};

VariableBaseMSMTestSet<bn254::G1AffinePoint>
VariableMSMCorrectnessGpuTest::test_set_;
bn254::G1ProjectivePoint VariableMSMCorrectnessGpuTest::expected_;
template <typename Point>
VariableBaseMSMTestSet<Point> VariableMSMCorrectnessGpuTest<Point>::test_set_;
template <typename Point>
ProjectivePoint<typename Point::Curve>
VariableMSMCorrectnessGpuTest<Point>::expected_;

} // namespace

TEST_F(VariableMSMCorrectnessGpuTest, MSM) {
using PointTypes = testing::Types<bn254::G1AffinePoint, bn254::G2AffinePoint>;
TYPED_TEST_SUITE(VariableMSMCorrectnessGpuTest, PointTypes);

TYPED_TEST(VariableMSMCorrectnessGpuTest, MSM) {
using Point = TypeParam;
using Curve = typename Point::Curve;

gpuMemPoolProps props = {gpuMemAllocationTypePinned,
gpuMemHandleTypeNone,
{gpuMemLocationTypeDevice, 0}};
Expand All @@ -58,11 +68,11 @@ TEST_F(VariableMSMCorrectnessGpuTest, MSM) {

gpu::ScopedStream stream = gpu::CreateStream();

VariableBaseMSMGpu<bn254::G1AffinePoint> msm_gpu(mem_pool.get(),
stream.get());
bn254::G1ProjectivePoint actual;
ASSERT_TRUE(msm_gpu.Run(test_set_.bases, test_set_.scalars, &actual));
EXPECT_EQ(actual, expected_);
VariableBaseMSMGpu<Point> msm_gpu(mem_pool.get(), stream.get());
ProjectivePoint<Curve> actual;
ASSERT_TRUE(
msm_gpu.Run(this->test_set_.bases, this->test_set_.scalars, &actual));
EXPECT_EQ(actual, this->expected_);
}

} // namespace tachyon::math
41 changes: 24 additions & 17 deletions tachyon/zk/r1cs/constraint_system/quadratic_arithmetic_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,15 @@ namespace tachyon::zk::r1cs {
template <typename F>
F EvaluateConstraint(const std::vector<Cell<F>>& cells,
absl::Span<const F> assignments) {
std::vector<F> sums = base::ParallelizeMap(
cells, [assignments](absl::Span<const Cell<F>> chunk) {
F sum;
for (const Cell<F>& cell : chunk) {
if (cell.coefficient.IsOne()) {
sum += assignments[cell.index];
} else {
sum += assignments[cell.index] * cell.coefficient;
}
}
return sum;
});
return std::accumulate(sums.begin(), sums.end(), F::Zero(), std::plus<>());
F sum;
for (const Cell<F>& cell : cells) {
if (cell.coefficient.IsOne()) {
sum += assignments[cell.index];
} else {
sum += assignments[cell.index] * cell.coefficient;
}
}
return sum;
}

template <typename F>
Expand Down Expand Up @@ -142,10 +138,21 @@ class QuadraticArithmeticProgram {
// = 0 (otherwise)
// where x is |full_assignments|.
// clang-format on
OPENMP_PARALLEL_FOR(size_t i = 0; i < matrices.num_constraints; ++i) {
a[i] = EvaluateConstraint(matrices.a[i], full_assignments);
b[i] = EvaluateConstraint(matrices.b[i], full_assignments);
c[i] = EvaluateConstraint(matrices.c[i], full_assignments);
OMP_PARALLEL {
OMP_FOR_NOWAIT
for (size_t i = 0; i < matrices.num_constraints; ++i) {
a[i] = EvaluateConstraint(matrices.a[i], full_assignments);
}

OMP_FOR_NOWAIT
for (size_t i = 0; i < matrices.num_constraints; ++i) {
b[i] = EvaluateConstraint(matrices.b[i], full_assignments);
}

OMP_FOR
for (size_t i = 0; i < matrices.num_constraints; ++i) {
c[i] = EvaluateConstraint(matrices.c[i], full_assignments);
}
}

for (size_t i = matrices.num_constraints;
Expand Down
3 changes: 3 additions & 0 deletions tachyon/zk/r1cs/groth16/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ tachyon_cc_library(
":proof",
":proving_key",
"//tachyon/base:optional",
"//tachyon/device/gpu:scoped_mem_pool",
"//tachyon/device/gpu:scoped_stream",
"//tachyon/math/elliptic_curves/msm:variable_base_msm",
"//tachyon/math/elliptic_curves/msm:variable_base_msm_gpu",
"//tachyon/zk/r1cs/constraint_system:qap_witness_map_result",
],
)
Expand Down
Loading

0 comments on commit e6e2a65

Please sign in to comment.