Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/roman/extension fields #598

Merged
merged 13 commits into from
Sep 3, 2024
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
**/wrappers/rust/icicle-cuda-runtime/src/bindings.rs
**/build/*
**tar.gz
icicle/backend/cuda
4 changes: 2 additions & 2 deletions examples/c++/best-practice-ntt/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ int main(int argc, char* argv[])
ConfigExtension backend_cfg_ext;
backend_cfg_ext.set(CudaBackendConfig::CUDA_NTT_FAST_TWIDDLES_MODE, true);
ntt_init_domain_cfg.ext = &backend_cfg_ext;
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, ntt_init_domain_cfg));
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, &ntt_init_domain_cfg));

std::cout << "Concurrent Download, Upload, and Compute In-place NTT" << std::endl;
int nof_blocks = 32;
Expand Down Expand Up @@ -89,7 +89,7 @@ int main(int argc, char* argv[])
std::cout << "Compute Vector: " << vec_compute << std::endl;
std::cout << "Transfer Vector: " << vec_transfer << std::endl;
START_TIMER(inplace);
bn254_ntt(d_vec[vec_compute], ntt_size, NTTDir::kForward, config_compute, d_vec[vec_compute]);
bn254_ntt(d_vec[vec_compute], ntt_size, NTTDir::kForward, &config_compute, d_vec[vec_compute]);
// we have to delay upload to device relative to download from device by one block: preserve write after read
for (int i = 0; i <= nof_blocks; i++) {
if (i < nof_blocks) {
Expand Down
4 changes: 2 additions & 2 deletions examples/c++/msm/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int main(int argc, char* argv[])
std::cout << "\nRunning MSM kernel with on-host inputs" << std::endl;
// Execute the MSM kernel
START_TIMER(MSM_host_mem);
ICICLE_CHECK(bn254_msm(scalars.get(), points.get(), msm_size, config, &result));
ICICLE_CHECK(bn254_msm(scalars.get(), points.get(), msm_size, &config, &result));
END_TIMER(MSM_host_mem, "MSM from host-memory took");
std::cout << projective_t::to_affine(result) << std::endl;

Expand Down Expand Up @@ -91,7 +91,7 @@ int main(int argc, char* argv[])
config.are_points_on_device = false;
g2_projective_t g2_result;
START_TIMER(MSM_g2);
ICICLE_CHECK(bn254_g2_msm(scalars.get(), g2_points.get(), msm_size, config, &g2_result));
ICICLE_CHECK(bn254_g2_msm(scalars.get(), g2_points.get(), msm_size, &config, &g2_result));
END_TIMER(MSM_g2, "MSM G2 from host-memory took");
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;

Expand Down
8 changes: 4 additions & 4 deletions examples/c++/ntt/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int main(int argc, char* argv[])
backend_cfg_ext.set(
CudaBackendConfig::CUDA_NTT_FAST_TWIDDLES_MODE, true); // optionally construct fast_twiddles for CUDA backend
ntt_init_domain_cfg.ext = &backend_cfg_ext;
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, ntt_init_domain_cfg));
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, &ntt_init_domain_cfg));

// ntt configuration
NTTConfig<scalar_t> config = default_ntt_config<scalar_t>();
Expand All @@ -46,13 +46,13 @@ int main(int argc, char* argv[])
config.batch_size = batch_size;

// warmup
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, &config, output.get()));

// NTT radix-2 alg
std::cout << "\nRunning NTT radix-2 alg with on-host data" << std::endl;
ntt_cfg_ext.set(CudaBackendConfig::CUDA_NTT_ALGORITHM, CudaBackendConfig::NttAlgorithm::Radix2);
START_TIMER(Radix2);
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, &config, output.get()));
END_TIMER(Radix2, "Radix2 NTT");

std::cout << "Validating output" << std::endl;
Expand All @@ -62,7 +62,7 @@ int main(int argc, char* argv[])
std::cout << "\nRunning NTT mixed-radix alg with on-host data" << std::endl;
ntt_cfg_ext.set(CudaBackendConfig::CUDA_NTT_ALGORITHM, CudaBackendConfig::NttAlgorithm::MixedRadix);
START_TIMER(MixedRadix);
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, &config, output.get()));
END_TIMER(MixedRadix, "MixedRadix NTT");

std::cout << "Validating output" << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion examples/c++/pedersen-commitment/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ int main(int argc, char** argv)
std::cout << "Executing MSM" << std::endl;
auto config = default_msm_config();
START_TIMER(msm);
bn254_msm(scalars, points, N + 1, config, &result);
bn254_msm(scalars, points, N + 1, &config, &result);
END_TIMER(msm, "Time to execute MSM");

std::cout << "Computed commitment: " << result << std::endl;
Expand Down
11 changes: 6 additions & 5 deletions examples/c++/polynomial_multiplication/example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ int main(int argc, char** argv)

// init domain
scalar_t basic_root = scalar_t::omega(NTT_LOG_SIZE);
bn254_ntt_init_domain(&basic_root, default_ntt_init_domain_config());
auto config = default_ntt_init_domain_config();
bn254_ntt_init_domain(&basic_root, &config);

// (1) cpu allocation
auto polyA = std::make_unique<scalar_t[]>(NTT_SIZE);
Expand All @@ -64,8 +65,8 @@ int main(int argc, char** argv)
ntt_config.are_inputs_on_device = false;
ntt_config.are_outputs_on_device = true;
ntt_config.ordering = Ordering::kNM;
ICICLE_CHECK(bn254_ntt(polyA.get(), NTT_SIZE, NTTDir::kForward, ntt_config, d_polyA));
ICICLE_CHECK(bn254_ntt(polyB.get(), NTT_SIZE, NTTDir::kForward, ntt_config, d_polyB));
ICICLE_CHECK(bn254_ntt(polyA.get(), NTT_SIZE, NTTDir::kForward, &ntt_config, d_polyA));
ICICLE_CHECK(bn254_ntt(polyB.get(), NTT_SIZE, NTTDir::kForward, &ntt_config, d_polyB));

// (4) multiply A,B
VecOpsConfig config{
Expand All @@ -76,13 +77,13 @@ int main(int argc, char** argv)
false, // is_async
nullptr // ext
};
ICICLE_CHECK(bn254_vector_mul(d_polyA, d_polyB, NTT_SIZE, config, d_polyRes));
ICICLE_CHECK(bn254_vector_mul(d_polyA, d_polyB, NTT_SIZE, &config, d_polyRes));

// (5) INTT (in place)
ntt_config.are_inputs_on_device = true;
ntt_config.are_outputs_on_device = true;
ntt_config.ordering = Ordering::kMN;
ICICLE_CHECK(bn254_ntt(d_polyRes, NTT_SIZE, NTTDir::kInverse, ntt_config, d_polyRes));
ICICLE_CHECK(bn254_ntt(d_polyRes, NTT_SIZE, NTTDir::kInverse, &ntt_config, d_polyRes));

if (print) { END_TIMER(poly_multiply, "polynomial multiplication took"); }

Expand Down
99 changes: 34 additions & 65 deletions icicle/include/icicle/api/babybear.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,96 +9,65 @@
#include "icicle/ntt.h"
#include "icicle/vec_ops.h"

extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
extern "C" eIcicleError babybear_ntt_init_domain(
babybear::scalar_t* primitive_root, const NTTInitDomainConfig* config);

extern "C" void babybear_scalar_convert_montgomery(
const babybear::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, babybear::scalar_t* output);
extern "C" eIcicleError babybear_ntt(
const babybear::scalar_t* input, int size, NTTDir dir, const NTTConfig<babybear::scalar_t>* config, babybear::scalar_t* output);

extern "C" eIcicleError babybear_ntt_init_domain(babybear::scalar_t* primitive_root, const NTTInitDomainConfig& config);
extern "C" eIcicleError babybear_ntt_release_domain();

extern "C" eIcicleError babybear_ntt(
extern "C" eIcicleError babybear_vector_mul(
const babybear::scalar_t* vec_a, const babybear::scalar_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::scalar_t* result);

extern "C" eIcicleError babybear_vector_add(
const babybear::scalar_t* vec_a, const babybear::scalar_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::scalar_t* result);

extern "C" eIcicleError babybear_vector_sub(
const babybear::scalar_t* vec_a, const babybear::scalar_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::scalar_t* result);

extern "C" eIcicleError babybear_matrix_transpose(
const babybear::scalar_t* input,
int size,
NTTDir dir,
const NTTConfig<babybear::scalar_t>& config,
uint32_t nof_rows,
uint32_t nof_cols,
const VecOpsConfig* config,
babybear::scalar_t* output);

extern "C" eIcicleError babybear_ntt_release_domain();
extern "C" eIcicleError babybear_bit_reverse(
const babybear::scalar_t* input, uint64_t n, const VecOpsConfig* config, babybear::scalar_t* output);


extern "C" eIcicleError babybear_extension_vector_mul(
const babybear::extension_t* vec_a,
const babybear::extension_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::extension_t* result);
const babybear::extension_t* vec_a, const babybear::extension_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::extension_t* result);

extern "C" eIcicleError babybear_extension_vector_add(
const babybear::extension_t* vec_a,
const babybear::extension_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::extension_t* result);
const babybear::extension_t* vec_a, const babybear::extension_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::extension_t* result);

extern "C" eIcicleError babybear_extension_vector_sub(
const babybear::extension_t* vec_a,
const babybear::extension_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::extension_t* result);
const babybear::extension_t* vec_a, const babybear::extension_t* vec_b, uint64_t n, const VecOpsConfig* config, babybear::extension_t* result);

extern "C" eIcicleError babybear_extension_matrix_transpose(
const babybear::extension_t* input,
uint32_t nof_rows,
uint32_t nof_cols,
const VecOpsConfig& config,
const VecOpsConfig* config,
babybear::extension_t* output);

extern "C" eIcicleError babybear_extension_bit_reverse(
const babybear::extension_t* input, uint64_t n, const VecOpsConfig& config, babybear::extension_t* output);
const babybear::extension_t* input, uint64_t n, const VecOpsConfig* config, babybear::extension_t* output);

extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);

extern "C" eIcicleError babybear_extension_scalar_convert_montgomery(
const babybear::extension_t* input,
uint64_t size,
bool is_into,
const VecOpsConfig& config,
babybear::extension_t* output);
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);

extern "C" eIcicleError babybear_vector_mul(
const babybear::scalar_t* vec_a,
const babybear::scalar_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::scalar_t* result);
extern "C" eIcicleError babybear_extension_scalar_convert_montgomery(
const babybear::extension_t* input, uint64_t size, bool is_into, const VecOpsConfig* config, babybear::extension_t* output);

extern "C" eIcicleError babybear_vector_add(
const babybear::scalar_t* vec_a,
const babybear::scalar_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::scalar_t* result);
extern "C" eIcicleError babybear_extension_ntt(
const babybear::extension_t* input, int size, NTTDir dir, const NTTConfig<babybear::scalar_t>* config, babybear::extension_t* output);

extern "C" eIcicleError babybear_vector_sub(
const babybear::scalar_t* vec_a,
const babybear::scalar_t* vec_b,
uint64_t n,
const VecOpsConfig& config,
babybear::scalar_t* result);

extern "C" eIcicleError babybear_matrix_transpose(
const babybear::scalar_t* input,
uint32_t nof_rows,
uint32_t nof_cols,
const VecOpsConfig& config,
babybear::scalar_t* output);
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);

extern "C" eIcicleError babybear_bit_reverse(
const babybear::scalar_t* input, uint64_t n, const VecOpsConfig& config, babybear::scalar_t* output);
extern "C" void babybear_scalar_convert_montgomery(
const babybear::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig* config, babybear::scalar_t* output);

extern "C" eIcicleError babybear_extension_ntt(
const babybear::extension_t* input,
int size,
NTTDir dir,
const NTTConfig<babybear::scalar_t>& config,
babybear::extension_t* output);
Loading
Loading