diff --git a/enzyme/test/Integration/Sparse/eigen_analysis.cpp b/enzyme/test/Integration/Sparse/eigen_analysis.cpp index d8dc311f957a..2655c21d8a4c 100644 --- a/enzyme/test/Integration/Sparse/eigen_analysis.cpp +++ b/enzyme/test/Integration/Sparse/eigen_analysis.cpp @@ -150,41 +150,6 @@ static void gradient_ip(const T *__restrict__ pos0, const size_t num_faces, cons enzyme_dup, x, out); } - -template -__attribute__((always_inline)) -static T ident_load(unsigned long long offset, size_t i) { - return (offset / sizeof(T) == i) ? T(1) : T(0); -} - - -template -__attribute__((always_inline)) -static void err_store(T val, unsigned long long offset, size_t i) { - assert(0 && "store is not legal"); -} - - -template -__attribute__((always_inline)) -static T zero_load(unsigned long long offset, size_t i, std::vector> &hess) { - return T(0); -} - - -__attribute__((enzyme_sparse_accumulate)) -void inner_store(size_t offset, size_t i, float val, std::vector> &hess) { - hess.push_back(Triple(offset, i, val)); -} - -template -__attribute__((always_inline)) -static void csr_store(T val, unsigned long long offset, size_t i, std::vector> &hess) { - if (val == 0.0) return; - offset /= sizeof(T); - inner_store(offset, i, val, hess); -} - template __attribute__((noinline)) std::vector> hessian(const T*__restrict__ pos0, size_t num_faces, const int* faces, const T*__restrict__ x, size_t x_pts) @@ -217,13 +182,20 @@ std::vector> hessian(const T*__restrict__ pos0, size_t num_faces, cons enzyme_const, pos02, enzyme_const, num_faces, enzyme_const, faces, - enzyme_dup, x2, __enzyme_todense(ident_load, err_store, i), - enzyme_dupnoneed, nullptr, __enzyme_todense(zero_load, csr_store, i, &hess)); + enzyme_dup, x2, __enzyme_todense(ident_load, ident_store, i), + enzyme_dupnoneed, nullptr, __enzyme_todense(sparse_load, sparse_store, i, &hess)); return hess; } -int main() { - const size_t x_pts = 1; +int main(int argc, char** argv) { + size_t x_pts = 8; + + if (argc >= 2) { + x_pts = atoi(argv[1]); + } + + // TODO generate data for more inputs + assert(x_ptrs == 1); const float x[] = {0.0, 1.0, 0.0}; @@ -233,25 +205,37 @@ int main() { const float pos0[] = {1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 3.0}; // Call eigenstuffM_simple + struct timeval start, end; + gettimeofday(&start, NULL); const float resultM = eigenstuffM(pos0, num_faces, faces, x); - printf("Result for eigenstuffM_simple: %f\n", resultM); + gettimeofday(&end, NULL); + printf("Result for eigenstuffM_simple: %f, runtime:%f\n", resultM, tdiff(end, start)); // Call eigenstuffL_simple + gettimeofday(&start, NULL); const float resultL = eigenstuffL(pos0, num_faces, faces, x); - printf("Result for eigenstuffL_simple: %f\n", resultL); + gettimeofday(&end, NULL); + printf("Result for eigenstuffL_simple: %f, runtime:%f\n", resultL, tdiff(end, start)); float dx[sizeof(x)/sizeof(x[0])]; for (size_t i=0; i #include +#include +float tdiff(struct timeval *start, struct timeval *end) { + return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); +} + template struct Triple { size_t row; @@ -10,6 +15,53 @@ struct Triple { Triple(size_t row, size_t col, T val) : row(row), col(col), val(val) {} }; +__attribute__((enzyme_sparse_accumulate)) +static void inner_store(int64_t row, int64_t col, size_t N, float val, std::vector> &triplets) { +#ifdef BENCHMARK + if (val == 0.0) return; +#else +#warning "Compiling for debug/verfication, performance may be slowed" +#endif + triplets.emplace_back(row % N, col % N, val); +} + +__attribute__((enzyme_sparse_accumulate)) +static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector> &triplets) { +#ifdef BENCHMARK + if (val == 0.0) return; +#else +#warning "Compiling for debug/verfication, performance may be slowed" +#endif + triplets.emplace_back(row % N, col % N, val); +} + +template +__attribute__((always_inline)) +static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector> &triplets) { + if (val == 0.0) return; + idx /= sizeof(T); + inner_store(i, idx, N, val, triplets); +} + +template +__attribute__((always_inline)) +static double sparse_load(int64_t idx, size_t i, size_t N, std::vector> &triplets) { + return 0.0; +} + +template +__attribute__((always_inline)) +static void ident_store(T, int64_t idx, size_t i) { + assert(0 && "should never load"); +} + +template +__attribute__((always_inline)) +static double ident_load(int64_t idx, size_t i, size_t N) { + idx /= sizeof(T); + return (T)(idx == i);// ? 1.0 : 0.0; +} + extern int enzyme_width; extern int enzyme_dup; extern int enzyme_dupv; @@ -17,16 +69,16 @@ extern int enzyme_const; extern int enzyme_dupnoneed; template -extern T __enzyme_autodiff(void*, Tys...); +extern T __enzyme_autodiff(void*, Tys...) noexcept; template -extern T __enzyme_fwddiff(void *, Tys...); +extern T __enzyme_fwddiff(void *, Tys...) noexcept; template -extern T __enzyme_todense(Tys...); +extern T __enzyme_todense(Tys...) noexcept; template -extern T __enzyme_post_sparse_todense(Tys...); +extern T __enzyme_post_sparse_todense(Tys...) noexcept; template __attribute__((always_inline)) @@ -200,4 +252,4 @@ static T area(const T *__restrict__ u, const T *__restrict__ v, const T *__restr T cross_product[3]; cross(cross_product, diff1, diff2); return 0.5 * norm(cross_product); -} \ No newline at end of file +} diff --git a/enzyme/test/Integration/Sparse/ringspring.cpp b/enzyme/test/Integration/Sparse/ringspring.cpp index 0ecae72bef5e..873629cd0b0b 100644 --- a/enzyme/test/Integration/Sparse/ringspring.cpp +++ b/enzyme/test/Integration/Sparse/ringspring.cpp @@ -17,123 +17,97 @@ #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -size_t N = 8; - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* input) { +static T f(size_t N, T* input) { double out = 0; // __builtin_assume(!((N-1) == 0)); for (size_t i=0; i __attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - -__attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -double ident_load(int64_t idx, size_t i, size_t N) { +double ringident_load(int64_t idx, size_t i, size_t N) { idx /= sizeof(double); // return (double)( ( (idx == N) ? 0 : idx) == i); return (double)((idx != N && idx == i) || (idx == N && 0 == i)); // return (double)( idx % N == i); } - -__attribute__((enzyme_sparse_accumulate)) -void inner_store(int64_t row, int64_t col, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, val, triplets); -} - -__attribute__((always_inline)) -double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - +template __attribute__((always_inline)) void never_store(double val, int64_t idx, double* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } +template __attribute__((always_inline)) double mod_load(int64_t idx, double* input, size_t N) { idx /= sizeof(double); return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; - input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; + input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); __builtin_assume(N != 1); for (size_t i=0; i((void*)ringident_load, (void*)ident_store, i, N); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, N, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; } -int main() { - // size_t N = 8; - double x[N]; - for (int i=0; i= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * N); + for (int i=0; i #include #include #include - #include -struct triple { - size_t row; - size_t col; - double val; - triple(triple&&) = default; - triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {} -}; - - -extern int enzyme_dup; -extern int enzyme_dupnoneed; -extern int enzyme_out; -extern int enzyme_const; - -extern void __enzyme_autodiff(void *, ...); - -extern void __enzyme_fwddiff(void *, ...); - -extern double* __enzyme_todense(void *, ...) noexcept; - +#include "matrix.h" +template __attribute__((always_inline)) -static double f(size_t N, double* pos) { +static double f(size_t N, T* __restrict__ pos) { double e = 0.; - for (size_t i = 0; i < N; i += 3) { - double vx = pos[i]; - double vy = pos[i + 1]; - double vz = pos[i + 2]; + __builtin_assume(N != 0); + for (size_t j = 0; j < N/3; j ++) { + size_t i = 3 * j; + T vx = pos[i]; + T vy = pos[i + 1]; + T vz = pos[i + 2]; - double wx = pos[i + 3]; - double wy = pos[i + 4]; - double wz = pos[i + 5]; - double distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); - double rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); + T wx = pos[i + 3]; + T wy = pos[i + 4]; + T wz = pos[i + 5]; + T distance = (wx - vx) * (wx - vx) + (wy - vy) * (wy - vy) + (wz - vz) * (wz - vz); + T rest_len_one_dist = (sqrt(distance) - 1) * (sqrt(distance) - 1); e += rest_len_one_dist; } return e; } - -__attribute__((always_inline)) -static void grad_f(size_t N, double* input, double* dinput) { - __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); -} - - +template __attribute__((always_inline)) -static void ident_store(double , int64_t idx, size_t i) { - assert(0 && "should never load"); +static void grad_f(size_t N, T* input, T* dinput) { + __enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput); } +template __attribute__((always_inline)) -static double ident_load(int64_t idx, size_t i, size_t N) { - idx /= sizeof(double); - return (double)(idx == i);// ? 1.0 : 0.0; -} - -__attribute__((enzyme_sparse_accumulate)) -static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector &triplets) { - printf("row=%d col=%d val=%f\n", row, col % N, val); - // assert(abs(val) > 0.00001); - triplets.emplace_back(row % N, col % N, val); -} - -__attribute__((always_inline)) -static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector &triplets) { - if (val == 0.0) return; - idx /= sizeof(double); - inner_store(i, idx, N, val, triplets); -} - -__attribute__((always_inline)) -static double sparse_load(int64_t idx, size_t i, size_t N, std::vector &triplets) { - return 0.0; -} - -__attribute__((always_inline)) -static void never_store(double val, int64_t idx, double* input, size_t N) { +static void never_store(T val, int64_t idx, T* input, size_t N) { assert(0 && "this is a read only input, why are you storing here..."); } @@ -104,20 +55,21 @@ static double mod_load(int64_t idx, double* input, size_t N) { return input[idx % N]; } +template __attribute__((noinline)) -std::vector hess_f(size_t N, double* input) { - std::vector triplets; +std::vector> hess_f(size_t N, T* input) { + std::vector> triplets; // input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N); __builtin_assume(N > 0); for (size_t i=0; i((void*)ident_load, (void*)ident_store, i, N); + T* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, N, &triplets); - __enzyme_fwddiff((void*)grad_f, + __enzyme_fwddiff((void*)grad_f, enzyme_const, N, enzyme_dup, input, d_input, - enzyme_dupnoneed, (double*)0x1, d_dinput); + enzyme_dupnoneed, (T*)0x1, d_dinput); } return triplets; @@ -134,34 +86,37 @@ std::vector hess_f2(size_t N, double* input) { */ // int argc, char** argv -int __attribute__((always_inline)) main() { - std::mt19937 generator(0); // Seed the random number generator - std::uniform_real_distribution normal(0, 0.05); - - - // if (argc != 2) { - // printf("Usage: %s \n", argv[0]); - // return 1; - // } - - // size_t N = atoi(argv[1]); +int main(int argc, char** argv) { size_t N = 30; - double x[3 * N]; + if (argc >= 2) { + N = atoi(argv[1]); + } + + double *x = (double*)malloc(sizeof(double) * 3 * N); for (int i = 0; i < N; ++i) { double angle = 2 * M_PI * i / N; x[3 * i] = cos(angle) ;//+ normal(generator); x[3 * i + 1] = sin(angle) ;//+ normal(generator); x[3 * i + 2] = 0;//normal(generator); } - auto res = hess_f(N, &x[0]); - - printf("%ld\n", res.size()); + struct timeval start, end; + gettimeofday(&start, NULL); + + auto res = hess_f(N, x); + + gettimeofday(&end, NULL); + + printf("Number of elements %ld\n", res.size()); + + printf("Runtime %0.6f\n", tdiff(&start, &end)); + if (N <= 30) { for (auto & tup : res) printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val); + } return 0; }