Merge pull request #754 from mkstoyanov/fast_tensor_weights

* switch to faster global kronecker algorithm for computing tensor weights
ORNL · Feb 15, 2024 · 9401b34 · 9401b34
2 parents 024ff74 + 86c3361
commit 9401b34
Show file tree

Hide file tree

Showing 3 changed files with 96 additions and 95 deletions.
diff --git a/SparseGrids/tsgGridSequence.cpp b/SparseGrids/tsgGridSequence.cpp
@@ -822,60 +822,16 @@ void GridSequence::recomputeSurpluses(){
         double basis = 1.0; // different basis functions
         double x = nodes[i]; // row = i-th output
         vmatrix[i * num_levels] = 1.0;
-        for(int j=1; j<i; j++) { // loop over rows
+        for(int j=1; j<i; j++) { // loop over columns (along the row)
             basis *= (x - nodes[j-1]); // numerator of next function
             vmatrix[i * num_levels + j] = basis / coeff[j];
         }
     }
 
-    auto match_outside_dim = [&](int d, int const*a, int const *b) -> bool {
-            for(int j=0; j<d; j++)
-                if (a[j] != b[j]) return false;
-            for(int j=d+1; j<num_dimensions; j++)
-                if (a[j] != b[j]) return false;
-            return true;
-        };
+    std::vector<std::vector<int>> map;
+    std::vector<std::vector<int>> job_indexes;
 
-    std::vector<std::vector<int>> map(num_dimensions, std::vector<int>(num_points));
-    std::vector<std::vector<int>> job_indexes(num_dimensions);
-    for(auto &ji : job_indexes) ji.reserve(num_levels + 1);
-
-    #pragma omp parallel for
-    for(int d=0; d<num_dimensions; d++) {
-        // for each dimension, use the map to group points together
-        std::iota(map[d].begin(), map[d].end(), 0);
-        if (d != num_dimensions - 1) {
-            std::sort(map[d].begin(), map[d].end(), [&](int a, int b)->bool{
-                const int * idxa = points.getIndex(a);
-                const int * idxb = points.getIndex(b);
-                for(int j=0; j<num_dimensions; j++) {
-                    if (j != d){
-                        if (idxa[j] < idxb[j]) return true;
-                        if (idxa[j] > idxb[j]) return false;
-                    }
-                }
-                // lexigographical order, dimension d is the fastest moving one
-                if (idxa[d] < idxb[d]) return true;
-                if (idxa[d] > idxb[d]) return false;
-                return false;
-            });
-        }
-
-        if (num_dimensions == 1) {
-            job_indexes[d].push_back(0);
-            job_indexes[d].push_back(num_points);
-        } else {
-            int const *c_index = points.getIndex(map[d][0]);
-            job_indexes[d].push_back(0);
-            for(int i=1; i<num_points; i++) {
-                if (not match_outside_dim(d, c_index, points.getIndex(map[d][i]))) {
-                    job_indexes[d].push_back(i);
-                    c_index = points.getIndex(map[d][i]);
-                }
-            }
-            job_indexes[d].push_back(num_points);
-        }
-    }
+    MultiIndexManipulations::resortIndexes(points, map, job_indexes);
 
     for(int d=num_dimensions-1; d>=0; d--) {
         #pragma omp parallel for

diff --git a/SparseGrids/tsgIndexManipulator.cpp b/SparseGrids/tsgIndexManipulator.cpp
@@ -429,63 +429,94 @@ MultiIndexSet generateNonNestedPoints(const MultiIndexSet &tensors, const OneDim
     return unionSets(point_tensors);
 }
 
-std::vector<int> computeTensorWeights(MultiIndexSet const &mset){
-    size_t num_dimensions = (size_t) mset.getNumDimensions();
-    int num_tensors = mset.getNumIndexes();
+void resortIndexes(const MultiIndexSet &iset, std::vector<std::vector<int>> &map, std::vector<std::vector<int>> &lines1d) {
+    int num_dimensions = iset.getNumDimensions();
+    int num_tensors = iset.getNumIndexes();
 
-    std::vector<int> level = computeLevels(mset);
-    int max_level = *std::max_element(level.begin(), level.end());
+    int num_levels = 1 + *std::max_element(iset.begin(), iset.end());
 
-    Data2D<int> dag_down(num_dimensions, num_tensors);
+    auto match_outside_dim = [&](int d, int const*a, int const *b) -> bool {
+        for(int j=0; j<d; j++)
+            if (a[j] != b[j]) return false;
+        for(int j=d+1; j<num_dimensions; j++)
+            if (a[j] != b[j]) return false;
+        return true;
+    };
 
-    std::vector<int> weights((size_t) num_tensors);
+    map = std::vector<std::vector<int>>(num_dimensions, std::vector<int>(num_tensors));
+    lines1d = std::vector<std::vector<int>>(num_dimensions);
+    for(auto &ji : lines1d) ji.reserve(num_levels);
 
-    #pragma omp parallel for schedule(static)
-    for(int i=0; i<num_tensors; i++){
-        std::vector<int> kid(num_dimensions);
-        std::copy_n(mset.getIndex(i), num_dimensions, kid.data());
+    #pragma omp parallel for
+    for(int d=0; d<num_dimensions; d++) {
+        // for each dimension, use the map to group indexes together
+        std::iota(map[d].begin(), map[d].end(), 0);
+        if (d != num_dimensions - 1) {
+            std::sort(map[d].begin(), map[d].end(), [&](int a, int b)->bool{
+                const int * idxa = iset.getIndex(a);
+                const int * idxb = iset.getIndex(b);
+                for(int j=0; j<num_dimensions; j++) {
+                    if (j != d){
+                        if (idxa[j] < idxb[j]) return true;
+                        if (idxa[j] > idxb[j]) return false;
+                    }
+                }
+                // lexigographical order, dimension d is the fastest moving one
+                if (idxa[d] < idxb[d]) return true;
+                if (idxa[d] > idxb[d]) return false;
+                return false;
+            });
+        }
 
-        int *ref_kids = dag_down.getStrip(i);
-        for(size_t j=0; j<num_dimensions; j++){
-            kid[j]++;
-            ref_kids[j] = mset.getSlot(kid);
-            kid[j]--;
+        if (num_dimensions == 1) {
+            lines1d[d].push_back(0);
+            lines1d[d].push_back(num_tensors);
+        } else {
+            int const *c_index = iset.getIndex(map[d][0]);
+            lines1d[d].push_back(0);
+            for(int i=1; i<num_tensors; i++) {
+                if (not match_outside_dim(d, c_index, iset.getIndex(map[d][i]))) {
+                    lines1d[d].push_back(i);
+                    c_index = iset.getIndex(map[d][i]);
+                }
+            }
+            lines1d[d].push_back(num_tensors);
         }
+    }
+}
+
+std::vector<int> computeTensorWeights(MultiIndexSet const &mset){
+    int num_dimensions = mset.getNumDimensions();
+    int num_tensors = mset.getNumIndexes();
 
-        if (level[i] == max_level) weights[i] = 1;
+    if (num_dimensions == 1) {
+        std::vector<int> weights(num_tensors, 0);
+        weights.back() = 1;
+        return weights;
     }
 
-    for(int l=max_level-1; l>=0; l--){
-        #pragma omp parallel for schedule(dynamic)
-        for(int i=0; i<num_tensors; i++){
-            if (level[i] == l){
-                std::vector<int> monkey_tail(max_level-l+1);
-                std::vector<int> monkey_count(max_level-l+1);
-                std::vector<bool> used(num_tensors, false);
-
-                int current = 0;
-                monkey_count[0] = 0;
-                monkey_tail[0] = i;
-
-                int sum = 0;
-
-                while(monkey_count[0] < (int) num_dimensions){
-                    if (monkey_count[current] < (int) num_dimensions){
-                        int branch = dag_down.getStrip(monkey_tail[current])[monkey_count[current]];
-                        if ((branch == -1) || (used[branch])){
-                            monkey_count[current]++;
-                        }else{
-                            used[branch] = true;
-                            sum += weights[branch];
-                            monkey_count[++current] = 0;
-                            monkey_tail[current] = branch;
-                        }
-                    }else{
-                        monkey_count[--current]++;
-                    }
-                }
+    std::vector<std::vector<int>> map;
+    std::vector<std::vector<int>> lines1d;
+
+    resortIndexes(mset, map, lines1d);
 
-                weights[i] = 1 - sum;
+    Data2D<int> dag_down(num_dimensions, num_tensors);
+
+    std::vector<int> weights(num_tensors, 0);
+
+    // the row with contiguous indexes has a trivial solution
+    auto const& last_jobs = lines1d[num_dimensions-1];
+    for(int i=0; i<static_cast<int>(last_jobs.size() - 1); i++)
+        weights[last_jobs[i+1] - 1] = 1;
+
+    for(int d=num_dimensions-2; d>=0; d--) {
+        #pragma omp parallel for
+        for(int job = 0; job < static_cast<int>(lines1d[d].size() - 1); job++) {
+            for(int i=lines1d[d][job+1]-2; i>=lines1d[d][job]; i--) {
+                int &val = weights[map[d][i]];
+                for(int j=i+1; j<lines1d[d][job+1]; j++) {
+                    val -= weights[map[d][j]];
+                }
             }
         }
     }

diff --git a/SparseGrids/tsgIndexManipulator.hpp b/SparseGrids/tsgIndexManipulator.hpp
@@ -332,6 +332,20 @@ MultiIndexSet generateNestedPoints(const MultiIndexSet &tensors, std::function<i
  */
 MultiIndexSet generateNonNestedPoints(const MultiIndexSet &tensors, const OneDimensionalWrapper &wrapper);
 
+/*!
+ * \internal
+ * \ingroup TasmanianMultiIndexManipulations
+ * \brief Creates a map with sorted indexes dimension by dimension.
+ *
+ * \param iset is a non-empty set of indexes
+ * \param map (output) the i-th index in the order with d as the fastest changing (contiguous) dimension is map[d][i]
+ * \param lines1d for each dimension d the indexes that match in all but d-dimension will be between
+ *        lines1d[d][i] and lines1d[d][i+1] (not including the last entry)
+ *        This is similar to the pntr index of row-compressed sparse matrix
+ * \endinternal
+ */
+void resortIndexes(const MultiIndexSet &iset, std::vector<std::vector<int>> &map, std::vector<std::vector<int>> &lines1d);
+
 /*!
  * \internal
  * \ingroup TasmanianMultiIndexManipulations