Skip to content

Commit

Permalink
Benchmark wip
Browse files Browse the repository at this point in the history
  • Loading branch information
wsmoses committed Jan 18, 2024
1 parent 1160827 commit 3282636
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 202 deletions.
70 changes: 27 additions & 43 deletions enzyme/test/Integration/Sparse/eigen_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,41 +150,6 @@ static void gradient_ip(const T *__restrict__ pos0, const size_t num_faces, cons
enzyme_dup, x, out);
}


template<typename T>
__attribute__((always_inline))
static T ident_load(unsigned long long offset, size_t i) {
return (offset / sizeof(T) == i) ? T(1) : T(0);
}


template<typename T>
__attribute__((always_inline))
static void err_store(T val, unsigned long long offset, size_t i) {
assert(0 && "store is not legal");
}


template<typename T>
__attribute__((always_inline))
static T zero_load(unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
return T(0);
}


__attribute__((enzyme_sparse_accumulate))
void inner_store(size_t offset, size_t i, float val, std::vector<Triple<float>> &hess) {
hess.push_back(Triple<float>(offset, i, val));
}

template<typename T>
__attribute__((always_inline))
static void csr_store(T val, unsigned long long offset, size_t i, std::vector<Triple<T>> &hess) {
if (val == 0.0) return;
offset /= sizeof(T);
inner_store(offset, i, val, hess);
}

template<typename T>
__attribute__((noinline))
std::vector<Triple<T>> hessian(const T*__restrict__ pos0, size_t num_faces, const int* faces, const T*__restrict__ x, size_t x_pts)
Expand Down Expand Up @@ -217,13 +182,20 @@ std::vector<Triple<T>> hessian(const T*__restrict__ pos0, size_t num_faces, cons
enzyme_const, pos02,
enzyme_const, num_faces,
enzyme_const, faces,
enzyme_dup, x2, __enzyme_todense<T*>(ident_load<T>, err_store<T>, i),
enzyme_dupnoneed, nullptr, __enzyme_todense<T*>(zero_load<T>, csr_store<T>, i, &hess));
enzyme_dup, x2, __enzyme_todense<T*>(ident_load<T>, ident_store<T>, i),
enzyme_dupnoneed, nullptr, __enzyme_todense<T*>(sparse_load<T>, sparse_store<T>, i, &hess));
return hess;
}

int main() {
const size_t x_pts = 1;
int main(int argc, char** argv) {
size_t x_pts = 8;

if (argc >= 2) {
x_pts = atoi(argv[1]);
}

// TODO generate data for more inputs
assert(x_ptrs == 1);
const float x[] = {0.0, 1.0, 0.0};


Expand All @@ -233,25 +205,37 @@ int main() {
const float pos0[] = {1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 3.0, 1.0, 3.0};

// Call eigenstuffM_simple
struct timeval start, end;
gettimeofday(&start, NULL);
const float resultM = eigenstuffM(pos0, num_faces, faces, x);
printf("Result for eigenstuffM_simple: %f\n", resultM);
gettimeofday(&end, NULL);
printf("Result for eigenstuffM_simple: %f, runtime:%f\n", resultM, tdiff(end, start));

// Call eigenstuffL_simple
gettimeofday(&start, NULL);
const float resultL = eigenstuffL(pos0, num_faces, faces, x);
printf("Result for eigenstuffL_simple: %f\n", resultL);
gettimeofday(&end, NULL);
printf("Result for eigenstuffL_simple: %f, runtime:%f\n", resultL, tdiff(end, start));

float dx[sizeof(x)/sizeof(x[0])];
for (size_t i=0; i<sizeof(dx)/sizeof(x[0]); i++)
dx[i] = 0;
gradient_ip(pos0, num_faces, faces, x, dx);

if (x_pts < 30) {
for (size_t i=0; i<sizeof(dx)/sizeof(dx[0]); i++)
printf("eigenstuffM grad_vert[%zu]=%f\n", i, dx[i]);

size_t num_elts = sizeof(x)/sizeof(x[0]) * sizeof(x)/sizeof(x[0]);
}

gettimeofday(&start, NULL);
auto hess_x = hessian(pos0, num_faces, faces, x, x_pts);
gettimeofday(&end, NULL);

printf("Number of elements %ld\n", hess_x.size());

printf("Runtime %0.6f\n", tdiff(&start, &end));

if (x_pts < 30)
for (auto &hess : hess_x) {
printf("i=%lu, j=%lu, val=%f\n", hess.row, hess.col, hess.val);
}
Expand Down
62 changes: 57 additions & 5 deletions enzyme/test/Integration/Sparse/matrix.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#include <cmath>
#include <stdio.h>

#include <sys/time.h>
float tdiff(struct timeval *start, struct timeval *end) {
return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec);
}

template<typename T>
struct Triple {
size_t row;
Expand All @@ -10,23 +15,70 @@ struct Triple {
Triple(size_t row, size_t col, T val) : row(row), col(col), val(val) {}
};

__attribute__((enzyme_sparse_accumulate))
static void inner_store(int64_t row, int64_t col, size_t N, float val, std::vector<Triple<float>> &triplets) {
#ifdef BENCHMARK
if (val == 0.0) return;
#else
#warning "Compiling for debug/verfication, performance may be slowed"
#endif
triplets.emplace_back(row % N, col % N, val);
}

__attribute__((enzyme_sparse_accumulate))
static void inner_store(int64_t row, int64_t col, size_t N, double val, std::vector<Triple<double>> &triplets) {
#ifdef BENCHMARK
if (val == 0.0) return;
#else
#warning "Compiling for debug/verfication, performance may be slowed"
#endif
triplets.emplace_back(row % N, col % N, val);
}

template<typename T>
__attribute__((always_inline))
static void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector<Triple<T>> &triplets) {
if (val == 0.0) return;
idx /= sizeof(T);
inner_store(i, idx, N, val, triplets);
}

template<typename T>
__attribute__((always_inline))
static double sparse_load(int64_t idx, size_t i, size_t N, std::vector<Triple<T>> &triplets) {
return 0.0;
}

template<typename T>
__attribute__((always_inline))
static void ident_store(T, int64_t idx, size_t i) {
assert(0 && "should never load");
}

template<typename T>
__attribute__((always_inline))
static double ident_load(int64_t idx, size_t i, size_t N) {
idx /= sizeof(T);
return (T)(idx == i);// ? 1.0 : 0.0;
}

extern int enzyme_width;
extern int enzyme_dup;
extern int enzyme_dupv;
extern int enzyme_const;
extern int enzyme_dupnoneed;

template <typename T, typename... Tys>
extern T __enzyme_autodiff(void*, Tys...);
extern T __enzyme_autodiff(void*, Tys...) noexcept;

template <typename T, typename... Tys>
extern T __enzyme_fwddiff(void *, Tys...);
extern T __enzyme_fwddiff(void *, Tys...) noexcept;

template <typename T, typename... Tys>
extern T __enzyme_todense(Tys...);
extern T __enzyme_todense(Tys...) noexcept;

template <typename T, typename... Tys>
extern T __enzyme_post_sparse_todense(Tys...);
extern T __enzyme_post_sparse_todense(Tys...) noexcept;

template<typename T, size_t n>
__attribute__((always_inline))
Expand Down Expand Up @@ -200,4 +252,4 @@ static T area(const T *__restrict__ u, const T *__restrict__ v, const T *__restr
T cross_product[3];
cross(cross_product, diff1, diff2);
return 0.5 * norm<T, 3>(cross_product);
}
}
104 changes: 39 additions & 65 deletions enzyme/test/Integration/Sparse/ringspring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,123 +17,97 @@

#include<math.h>

struct triple {
size_t row;
size_t col;
double val;
triple(triple&&) = default;
triple(size_t row, size_t col, double val) : row(row), col(col), val(val) {}
};


size_t N = 8;

extern int enzyme_dup;
extern int enzyme_dupnoneed;
extern int enzyme_out;
extern int enzyme_const;

extern void __enzyme_autodiff(void *, ...);

extern void __enzyme_fwddiff(void *, ...);

extern double* __enzyme_todense(void *, ...) noexcept;

#include "matrix.h"

template<typename T>
__attribute__((always_inline))
static double f(size_t N, double* input) {
static T f(size_t N, T* input) {
double out = 0;
// __builtin_assume(!((N-1) == 0));
for (size_t i=0; i<N; i++) {
//double sub = input[i] - input[i+1];
// out += sub * sub;
double sub = (input[i+1] - input[i]) * (input[i+1] - input[i]);
T sub = (input[i+1] - input[i]) * (input[i+1] - input[i]);
out += (sqrt(sub) - 1)*(sqrt(sub) - 1);
}
return out;
}

template<typename T>
__attribute__((always_inline))
static void grad_f(size_t N, double* input, double* dinput) {
__enzyme_autodiff((void*)f, enzyme_const, N, enzyme_dup, input, dinput);
}

__attribute__((always_inline))
static void ident_store(double , int64_t idx, size_t i) {
assert(0 && "should never load");
static void grad_f(size_t N, T* input, T* dinput) {
__enzyme_autodiff<void>((void*)f<T>, enzyme_const, N, enzyme_dup, input, dinput);
}

template<typename T>
__attribute__((always_inline))
double ident_load(int64_t idx, size_t i, size_t N) {
double ringident_load(int64_t idx, size_t i, size_t N) {
idx /= sizeof(double);
// return (double)( ( (idx == N) ? 0 : idx) == i);
return (double)((idx != N && idx == i) || (idx == N && 0 == i));
// return (double)( idx % N == i);
}

__attribute__((enzyme_sparse_accumulate))
void inner_store(int64_t row, int64_t col, double val, std::vector<triple> &triplets) {
printf("row=%d col=%d val=%f\n", row, col % N, val);
// assert(abs(val) > 0.00001);
triplets.emplace_back(row % N, col % N, val);
}

__attribute__((always_inline))
void sparse_store(double val, int64_t idx, size_t i, size_t N, std::vector<triple> &triplets) {
if (val == 0.0) return;
idx /= sizeof(double);
inner_store(i, idx, val, triplets);
}

__attribute__((always_inline))
double sparse_load(int64_t idx, size_t i, size_t N, std::vector<triple> &triplets) {
return 0.0;
}

template<typename T>
__attribute__((always_inline))
void never_store(double val, int64_t idx, double* input, size_t N) {
assert(0 && "this is a read only input, why are you storing here...");
}

template<typename T>
__attribute__((always_inline))
double mod_load(int64_t idx, double* input, size_t N) {
idx /= sizeof(double);
return input[idx % N];
}

template<typename T>
__attribute__((noinline))
std::vector<triple> hess_f(size_t N, double* input) {
std::vector<triple> triplets;
input = __enzyme_todense((void*)mod_load, (void*)never_store, input, N);
std::vector<Triple<T>> hess_f(size_t N, T* input) {
std::vector<Triple<T>> triplets;
input = __enzyme_todense<T*>((void*)mod_load<T>, (void*)never_store<T>, input, N);
__builtin_assume(N > 0);
__builtin_assume(N != 1);
for (size_t i=0; i<N; i++) {
__builtin_assume(i < 100000000);
double* d_input = __enzyme_todense((void*)ident_load, (void*)ident_store, i, N);
double* d_dinput = __enzyme_todense((void*)sparse_load, (void*)sparse_store, i, N, &triplets);
T* d_input = __enzyme_todense<T*>((void*)ringident_load<T>, (void*)ident_store<T>, i, N);
T* d_dinput = __enzyme_todense<T*>((void*)sparse_load<T>, (void*)sparse_store<T>, i, N, &triplets);

__enzyme_fwddiff((void*)grad_f,
__enzyme_fwddiff<void>((void*)grad_f<T>,
enzyme_const, N,
enzyme_dup, input, d_input,
enzyme_dupnoneed, (double*)0x1, d_dinput);
enzyme_dupnoneed, (T*)0x1, d_dinput);

}
return triplets;
}


int main() {
// size_t N = 8;
double x[N];
for (int i=0; i<N; i++) x[i] = (i + 1) * (i + 1);
int main(int argc, char** argv) {
size_t N = 8;

if (argc >= 2) {
N = atoi(argv[1]);
}

double *x = (double*)malloc(sizeof(double) * N);
for (int i=0; i<N; i++) x[i] = (i + 1) * (i + 1);

auto res = hess_f(N, &x[0]);

struct timeval start, end;
gettimeofday(&start, NULL);

auto res = hess_f(N, x);

printf("%ld\n", res.size());
gettimeofday(&end, NULL);

printf("Number of elements %ld\n", res.size());

printf("Runtime %0.6f\n", tdiff(&start, &end));

if (N <= 30) {
for (auto & tup : res)
printf("%ld, %ld = %f\n", tup.row, tup.col, tup.val);
}

return 0;
}
Loading

0 comments on commit 3282636

Please sign in to comment.