hpc4cmb · keskitalo · Apr 26, 2021 · Feb 21, 2021 · Feb 21, 2021 · Feb 21, 2021
diff --git a/src/libtoast/src/toast_atm.cpp b/src/libtoast/src/toast_atm.cpp
@@ -35,7 +35,7 @@ bool toast::atm_verbose() {
         // First time we were called
         auto & env = toast::Environment::get();
         std::string logval = env.log_level();
-        if (strncmp(logval.c_str(), "VERBOSE", 7) == 0) {
+        if (strncmp(logval.c_str(), "DEBUG", 5) == 0) {
             verbose = true;
         }
         called = true;

diff --git a/src/libtoast/src/toast_atm_sim.cpp b/src/libtoast/src/toast_atm_sim.cpp
@@ -722,21 +722,19 @@ cholmod_sparse * toast::atm_sim_sqrt_sparse_covariance(
                 // Extract band diagonal of the matrix and try
                 // factorizing again
                 // int ndiag = ntry - itry - 1;
-                int ndiag = nelem - nelem * (itry + 1) / ntry;
-                if (ndiag < 3) ndiag = 3;
+                int ndiag = nelem - nelem * (itry + 1) / (ntry - 1);
+                if (ndiag < 1) ndiag = 1;
                 int iupper = ndiag - 1;
                 int ilower = -iupper;
                 if (atm_verbose()) {
                     cholmod_print_sparse(cov, "Covariance matrix", chol.chcommon);
-
                     // DEBUG begin
                     if (itry == ntry - 2) {
                         FILE * covfile = fopen("failed_covmat.mtx", "w");
                         cholmod_write_sparse(covfile, cov, NULL, NULL,
                                              chol.chcommon);
                         fclose(covfile);
                     }
-
                     // DEBUG end
                     o.str("");
                     o << rank
@@ -750,6 +748,17 @@ cholmod_sparse * toast::atm_sim_sqrt_sparse_covariance(
                 if (chol.chcommon->status != CHOLMOD_OK) {
                     throw std::runtime_error("cholmod_band_inplace failed.");
                 }
+                if (atm_verbose()) {
+                    cholmod_print_sparse(cov, "Covariance matrix", chol.chcommon);
+                    // DEBUG begin
+                    if (itry == ntry - 2) {
+                        FILE * covfile = fopen("band_covmat.mtx", "w");
+                        cholmod_write_sparse(covfile, cov, NULL, NULL,
+                                             chol.chcommon);
+                        fclose(covfile);
+                    }
+                    // DEBUG end
+                }
             } else throw std::runtime_error("cholmod_factorize failed.");
         } else {
             break;

diff --git a/src/toast/_libtoast_todmap_mapmaker.cpp b/src/toast/_libtoast_todmap_mapmaker.cpp
@@ -7,6 +7,7 @@
 #ifdef _OPENMP
 # include <omp.h>
 #endif // ifdef _OPENMP
+#include <chrono>
 
 
 void apply_flags_to_pixels(py::array_t <unsigned char> common_flags,
@@ -212,11 +213,22 @@ void accumulate_observation_matrix(py::array_t <double,
     size_t npixtot = fast_obs_matrix.shape(0);
     size_t npix = npixtot / nnz;
 
+    // Count number of dense templates in the beginning of template array
+    size_t ndense = 0;
+    for (size_t itemplate = 0; itemplate < ntemplate; ++itemplate) {
+        size_t nhit = 0;
+        for (size_t isample = 0; isample < nsample; ++isample) {
+            if (fast_templates(isample, itemplate) != 0) ++nhit;
+        }
+        if (nhit < 0.1 * nsample) break;
+        ++ndense;
+    }
+
     // Build lists of non-zeros for each row of the template matrix
     std::vector <std::vector <size_t> > nonzeros(nsample);
 #pragma omp parallel for schedule(static, 1)
     for (size_t isample = 0; isample < nsample; ++isample) {
-        for (size_t itemplate = 0; itemplate < ntemplate; ++itemplate) {
+        for (size_t itemplate = ndense; itemplate < ntemplate; ++itemplate) {
             if (fast_templates(isample, itemplate) != 0) {
                 nonzeros[isample].push_back(itemplate);
             }
@@ -232,19 +244,47 @@ void accumulate_observation_matrix(py::array_t <double,
         idthread = omp_get_thread_num();
 #endif // ifdef _OPENMP
 
+        //if (idthread == 0) std::cerr << "Accumulate loop, nsample = " << nsample << ".  nthreads = " << nthreads << std::endl;  // DEBUG
+
         for (size_t isample = 0; isample < nsample; ++isample) {
-            size_t ipixel = fast_pixels(isample);
+            const int64_t ipixel = fast_pixels(isample);
             if (ipixel % nthreads != idthread) continue;
+            const double * ipointer = &fast_templates(isample, 0);
+
+            //std::chrono::steady_clock::time_point tstart = std::chrono::steady_clock::now();
+
             for (size_t jsample = 0; jsample < nsample; ++jsample) {
-                size_t jpixel = fast_pixels(jsample);
+                const int64_t jpixel = fast_pixels(jsample);
                 double filter_matrix = 0;
-                for (auto itemplate : nonzeros[isample]) {
-                    for (auto jtemplate : nonzeros[jsample]) {
-                        filter_matrix
-                            += fast_templates(isample, itemplate)
-                               * fast_templates(jsample, jtemplate)
-                               * fast_covariance(itemplate, jtemplate);
+                const double * jpointer = &fast_templates(jsample, 0);
+                const double * pcov = &fast_covariance(0, 0);
+                // dense templates
+                for (size_t itemplate=0; itemplate < ndense; ++itemplate) {
+                    const double * covpointer = pcov + itemplate * ntemplate;
+                    double dtemp = 0;
+                    // dense X dense
+                    for (size_t jtemplate=0; jtemplate < ndense; ++jtemplate) {
+                        dtemp += jpointer[jtemplate] * covpointer[jtemplate];
+                    }
+                    // dense X sparse
+                    for (const auto jtemplate : nonzeros[jsample]) {
+                        dtemp += jpointer[jtemplate] * covpointer[jtemplate];
                     }
+                    filter_matrix += dtemp * ipointer[itemplate];
+                }
+                // sparse templates
+                for (const auto itemplate : nonzeros[isample]) {
+                    const double * covpointer = pcov + itemplate * ntemplate;
+                    double dtemp = 0;
+                    // sparse X dense
+                    for (size_t jtemplate=0; jtemplate < ndense; ++jtemplate) {
+                        dtemp += jpointer[jtemplate] * covpointer[jtemplate];
+                    }
+                    // sparse X sparse
+                    for (const auto jtemplate : nonzeros[jsample]) {
+                        dtemp += jpointer[jtemplate] * covpointer[jtemplate];
+                    }
+                    filter_matrix += dtemp * ipointer[itemplate];
                 }
                 if (isample == jsample) {
                     filter_matrix = 1 - filter_matrix;
@@ -259,6 +299,25 @@ void accumulate_observation_matrix(py::array_t <double,
                     }
                 }
             }
+
+            /*
+            std::chrono::steady_clock::time_point tstop = std::chrono::steady_clock::now();
+
+            std::cerr
+              << idthread
+              << " : Accumulate loop, isample = "
+              << isample
+              << " / "
+              << nsample
+              << ". ndense = "
+              << ndense
+              << ". nonzeros = "
+              << nonzeros[isample].size()
+              << ". Computed in "
+              <<  std::chrono::duration_cast<std::chrono::microseconds>(tstop-tstart).count() * 1e-6
+              << " s"
+              << std::endl;
+            */
         }
     }
 }

diff --git a/src/toast/pipeline_tools/filterbin.py b/src/toast/pipeline_tools/filterbin.py
@@ -242,6 +242,7 @@ def apply_filterbin(
             write_wcov_inv = args.write_wcov_inv and first_call
             write_wcov = args.write_wcov and first_call
             write_binned = args.write_binmap
+            write_obs_matrix = args.filterbin_write_obs_matrix and first_call
 
             if len(time_name.split("-")) == 3:
                 # Special rules for daily maps
@@ -288,7 +289,7 @@ def apply_filterbin(
                 ground_filter_order=args.filterbin_ground_order,
                 split_ground_template=args.filterbin_split_ground_template,
                 poly_filter_order=args.filterbin_poly_order,
-                write_obs_matrix=args.filterbin_write_obs_matrix,
+                write_obs_matrix=write_obs_matrix,
                 deproject_map=args.filterbin_deproject_map,
                 deproject_pattern=args.filterbin_deproject_pattern,
                 deproject_nnz=deproject_nnz,

diff --git a/src/toast/pshmem/shmem.py b/src/toast/pshmem/shmem.py
@@ -178,6 +178,9 @@ def __init__(self, shape, dtype, comm):
 
     def __del__(self):
         self.close()
+        # Free the node communicator
+        if self._nodecomm is not None:
+            self._nodecomm.Free()
 
     def __enter__(self):
         return self