Skip to content

Commit

Permalink
better implementation of the van matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
mkstoyanov committed Feb 22, 2024
1 parent de3b275 commit 234aea4
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 53 deletions.
3 changes: 3 additions & 0 deletions SparseGrids/tsgEnumerates.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@
#include <type_traits>
#include <cassert>
#include <limits>
#include <array>

#include <chrono>

#include "TasmanianConfig.hpp" // contains build options passed down from CMake
#include "tsgUtils.hpp" // contains small utilities
Expand Down
70 changes: 39 additions & 31 deletions SparseGrids/tsgGridLocalPolynomial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,38 +731,44 @@ void GridLocalPolynomial::evaluateHierarchicalFunctions(const double x[], int nu
}
}

void GridLocalPolynomial::build_van_matrix1d(std::vector<int> &vpntr, std::vector<int> &vindx, std::vector<double> &vvals) {
vindx.clear();
vvals.clear();

//int max_level = *std::max_element(points.begin(), points.end());

// up to top-level
//int num_nodes = rule->getNumPoints(max_level);
int num_nodes = 1 + *std::max_element(points.begin(), points.end());
vpntr.resize(num_nodes + 1);

// TODO: switch to a graph-based algorithm
for(int i=0; i<num_nodes; i++) {
vpntr[i] = static_cast<int>(vindx.size());
double x = rule->getNode(i);
for(int j=0; j<i; j++) {
bool supported;
double v = rule->evalSupport(j, x, supported);
if (supported) {
vindx.push_back(j);
vvals.push_back(v);
}
}
vindx.push_back(i);
vvals.push_back(1.0);
}
vpntr.back() = static_cast<int>(vindx.size());
}

void GridLocalPolynomial::recomputeSurpluses(){
surpluses = Data2D<double>(num_outputs, points.getNumIndexes(), std::vector<double>(values.begin(), values.end()));

// There are two available algorithms here:
// - global sparse Kronecker (kron), implemented here in recomputeSurpluses()
// - sparse matrix in matrix-free form (mat), implemented in updateSurpluses()
//
// (kron) has the additional restriction that it will only work when the hierarchy is complete
// i.e., all point (multi-indexes) have all of their parents
// this is guaranteed for a full-tensor grid, non-adaptive grid, or a grid adapted with the stable refinement strategy
// other refinement strategies or using dynamic refinement (even with a stable refinement strategy)
// may yield a hierarchy-complete grid, but this is not mathematically guaranteed
//
// computing the dagUp allows us to check (at runtime) if the grid is_complete, and exclude (kron) if incomplete
// the completeness check can be done on the fly but it does incur cost
//
// approximate computational cost, let n be the number of points and d be the number of dimensions
// (kron) d n n^(1/d) due to standard Kronecker reasons, except it is hard to find the "effective" n due to sparsity
// (mat) d^2 n log(n) since there are d log(n) ancestors and basis functions are product of d one dimensional functions
// naturally, there are constants and those also depend on the system, e.g., number of threads, thread scheduling, cache ...
// (mat) has a more even load per thread and parallelizes better
//
// for d = 1, the algorithms are the same, except (kron) explicitly forms the matrix and the matrix free (mat) version is much better
// for d = 2, the (mat) algorithm is still faster
// for d = 3, and sufficiently large size the (mat) algorithm still wins
// the breaking point depends on n, the order and system
// for d = 4 and above, the (kron) algorithm is much faster
// it is possible that for sufficiently large n (mat) will win again
// but tested on 12 cores CPUs (Intel and AMD) up to n = 3.5 million, the (kron) method is over 2x faster
// higher dimensions will favor (kron) even more

if (num_dimensions <= 2 or (num_dimensions == 3 and points.getNumIndexes() > 2000000)) {
Data2D<int> dagUp = HierarchyManipulations::computeDAGup(points, rule.get());
std::vector<int> level = HierarchyManipulations::computeLevels(points, rule.get());
updateSurpluses(points, top_level, level, dagUp);
return;
}

bool is_complete = true;
Data2D<int> dagUp = HierarchyManipulations::computeDAGup(points, rule.get(), is_complete);

Expand All @@ -773,16 +779,18 @@ void GridLocalPolynomial::recomputeSurpluses(){
return;
}

int num_nodes = 1 + *std::max_element(points.begin(), points.end());

std::vector<int> vpntr, vindx;
std::vector<double> vvals;
build_van_matrix1d(vpntr, vindx, vvals);
rule->van_matrix(num_nodes, vpntr, vindx, vvals);

std::vector<std::vector<int>> map;
std::vector<std::vector<int>> lines1d;
MultiIndexManipulations::resortIndexes(points, map, lines1d);

for(int d=num_dimensions-1; d>=0; d--) {
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic)
for(int job = 0; job < static_cast<int>(lines1d[d].size() - 1); job++) {
for(int i=lines1d[d][job]+1; i<lines1d[d][job+1]; i++) {
double *row_strip = surpluses.getStrip(map[d][i]);
Expand Down
6 changes: 2 additions & 4 deletions SparseGrids/tsgGridLocalPolynomial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,6 @@ class GridLocalPolynomial : public BaseCanonicalGrid{
//! \brief Looks for a batch of constructed points and processes all that will result in a connected graph.
void loadConstructedPoints();

//! \brief Construct a sparse matrix for the 1d basis
void build_van_matrix1d(std::vector<int> &pntr, std::vector<int> &indx, std::vector<double> &vals);

//! \brief Fast algorithm, uses global Kronecker algorithm to recompute all surpluses
void recomputeSurpluses();

Expand All @@ -160,7 +157,8 @@ class GridLocalPolynomial : public BaseCanonicalGrid{
*
* Note: adjusting the \b level vector allows to update the surpluses for only a portion of the graph.
*
* Note: uses a slow algorithm but good for updating only some of the indexes
* Note: see the comments inside recomputeSurpluses() for the performance comparison between different algorithms
* also note that this method can be used to partially update, i.e., update the surpluses for some of the indexes
*/
void updateSurpluses(MultiIndexSet const &work, int max_level, std::vector<int> const &level, Data2D<int> const &dagUp);

Expand Down
18 changes: 0 additions & 18 deletions SparseGrids/tsgHierarchyManipulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,24 +178,6 @@ Data2D<int> computeDAGup(MultiIndexSet const &mset, const BaseRuleLocalPolynomia
}
}

bool checkComplete(MultiIndexSet const &mset, Data2D<int> const &dagUp, const BaseRuleLocalPolynomial *rule){
int const num_no_parents = rule->getNumPoints(0);
int const num_dimensions = mset.getNumDimensions();
int const max_parents = rule->getMaxNumParents();

for(int i=0; i<mset.getNumIndexes(); i++) {
for(int d=0; d<num_dimensions; d++) {
if (mset.getIndex(i)[d] >= num_no_parents) { // expect a parent
for(int j=0; j<max_parents; j++)
if (dagUp.getStrip(i)[d * max_parents + j] == -1)
return false;
}
}
}

return true;
}

Data2D<int> computeDAGDown(MultiIndexSet const &mset, const BaseRuleLocalPolynomial *rule){
size_t num_dimensions = mset.getNumDimensions();
int max_1d_kids = rule->getMaxNumKids();
Expand Down
Loading

0 comments on commit 234aea4

Please sign in to comment.