hpc4cmb · keskitalo · Feb 2, 2021 · Nov 23, 2020 · Nov 24, 2020 · Nov 26, 2020
diff --git a/pipelines/toast_ground_sim.py b/pipelines/toast_ground_sim.py
@@ -74,6 +74,7 @@ def parse_arguments(comm):
     pipeline_tools.add_gainscrambler_args(parser)
     pipeline_tools.add_madam_args(parser)
     pipeline_tools.add_mapmaker_args(parser)
+    pipeline_tools.add_filterbin_args(parser)
     pipeline_tools.add_sky_map_args(parser)
     pipeline_tools.add_pysm_args(parser)
     pipeline_tools.add_sss_args(parser)
@@ -565,12 +566,26 @@ def main():
                     first_call=(mc == firstmc),
                 )
 
+            if (
+                args.filterbin_ground_order is not None
+                or args.filterbin_poly_order is not None
+            ):
+                pipeline_tools.apply_filterbin(
+                    args,
+                    comm,
+                    data,
+                    outpath,
+                    totalname_freq,
+                    time_comms=time_comms,
+                    telescope_data=telescope_data,
+                    first_call=(mc == firstmc),
+                )
+
             if (
                 args.apply_polyfilter
                 or args.apply_polyfilter2D
                 or args.apply_groundfilter
             ):
-
                 # Filter signal
 
                 pipeline_tools.apply_polyfilter(args, comm, data, totalname_freq)

diff --git a/src/libtoast/include/toast/tod_filter.hpp b/src/libtoast/include/toast/tod_filter.hpp
@@ -12,6 +12,8 @@ void filter_polynomial(int64_t order, size_t nsignal, uint8_t * flags,
                        int64_t const * starts, int64_t const * stops);
 void bin_templates(double * signal, double * templates, uint8_t * good,
                    double * invcov, double * proj, size_t nsample, size_t ntemplate);
+void legendre(double * x, double * templates, size_t start_order, size_t stop_order,
+              size_t nsample);
 void chebyshev(double * x, double * templates, size_t start_order, size_t stop_order,
                size_t nsample);
 void add_templates(double * signal, double * templates, double * coeff, size_t nsample,

diff --git a/src/libtoast/src/toast_sys_environment.cpp b/src/libtoast/src/toast_sys_environment.cpp
@@ -17,9 +17,9 @@ extern "C" {
 
 #ifdef HAVE_MKL
 extern "C" {
-#include <mkl.h>
+# include <mkl.h>
 }
-#endif
+#endif // ifdef HAVE_MKL
 
 // These variables are autogenerated and compiled
 // into the library by the version.cmake script

diff --git a/src/libtoast/src/toast_tod_filter.cpp b/src/libtoast/src/toast_tod_filter.cpp
@@ -280,6 +280,69 @@ void toast::chebyshev(double * x, double * templates, size_t start_order,
     return;
 }
 
+void toast::legendre(double * x, double * templates, size_t start_order,
+                     size_t stop_order, size_t nsample) {
+    // order == 0
+    double norm = 1. / sqrt(2);
+    if ((start_order == 0) && (stop_order > 0)) {
+        for (size_t i = 0; i < nsample; ++i) templates[i] = norm;
+    }
+
+    // order == 1
+    norm = 1. / sqrt(2. / 3.);
+    if ((start_order <= 1) && (stop_order > 1)) {
+        double * ptemplates = templates + (1 - start_order) * nsample;
+        for (size_t i = 0; i < nsample; ++i) ptemplates[i] = norm * x[i];
+    }
+
+    // Calculate the hierarchy of polynomials, one buffer length
+    // at a time to allow for parallelization
+    const size_t buflen = 1000;
+    size_t nbuf = nsample / buflen + 1;
+
+#pragma omp parallel for schedule(static) default(none) \
+    shared(x, templates, start_order, stop_order, nsample, nbuf)
+    for (size_t ibuf = 0; ibuf < nbuf; ++ibuf) {
+        size_t istart = ibuf * buflen;
+        size_t istop = istart + buflen;
+        if (istop > nsample) istop = nsample;
+        if (istop <= istart) continue;
+        size_t n = istop - istart;
+
+        // Initialize to order = 1
+        std::vector <double> val(n);
+        std::copy(x + istart, x + istart + n, val.data());
+        std::vector <double> prev(n);
+        std::fill(prev.begin(), prev.end(), 1.0);
+        std::vector <double> next(n);
+
+        for (size_t order = 2; order < stop_order; ++order) {
+            // Evaluate current order and store in val
+            double orderinv = 1. / order;
+            for (size_t i = 0; i < n; ++i) {
+                next[i] =
+                    ((2 * order - 1) * x[istart + i] * val[i] - (order - 1) *
+                     prev[i]) * orderinv;
+            }
+            std::copy(val.data(), val.data() + n, prev.data());
+            std::copy(next.data(), next.data() + n, val.data());
+            if (order >= start_order) {
+                double * ptemplates = templates + istart + (order - start_order) *
+                                      nsample;
+                std::copy(val.data(), val.data() + n, ptemplates);
+
+                // Normalize for better condition number
+                double norm = 1. / sqrt(2. / (2. * order + 1.));
+                for (size_t i = 0; i < n; ++i) {
+                    ptemplates[i] *= norm;
+                }
+            }
+        }
+    }
+
+    return;
+}
+
 void toast::add_templates(double * signal, double * templates, double * coeff,
                           size_t nsample, size_t ntemplate) {
     const size_t buflen = 1000;

diff --git a/src/toast/_libtoast_pixels.cpp b/src/toast/_libtoast_pixels.cpp
@@ -60,6 +60,6 @@ void init_pixels(py::module & m) {
             (tuple):  The (local submap, pixel within submap) for each global pixel.
 
     )");
-    m.def("global_to_local", &global_to_local <int32_t> );
-    m.def("global_to_local", &global_to_local <int16_t> );
+    m.def("global_to_local", &global_to_local <int32_t>);
+    m.def("global_to_local", &global_to_local <int16_t>);
 }
diff --git a/src/toast/_libtoast_tod_filter.cpp b/src/toast/_libtoast_tod_filter.cpp
@@ -7,6 +7,41 @@
 
 
 void init_tod_filter(py::module & m) {
+    m.def("legendre",
+          [](py::buffer x, py::buffer templates, size_t start_order,
+             size_t stop_order) {
+              pybuffer_check_1D <double> (x);
+              py::buffer_info info_x = x.request();
+              py::buffer_info info_templates = templates.request();
+
+              size_t nsample = info_x.size;
+              size_t ntemplate = info_templates.size / nsample;
+              if (ntemplate != stop_order - start_order) {
+                  auto log = toast::Logger::get();
+                  std::ostringstream o;
+                  o << "Size of templates does not match x, and order";
+                  log.error(o.str().c_str());
+                  throw std::runtime_error(o.str().c_str());
+              }
+
+              double * px = reinterpret_cast <double *> (info_x.ptr);
+              double * ptemplates = reinterpret_cast <double *> (info_templates.ptr);
+              toast::legendre(px, ptemplates, start_order, stop_order, nsample);
+
+              return;
+          }, py::arg("x"), py::arg("templates"), py::arg("start_order"),
+          py::arg(
+              "stop_order"),
+          R"(
+        Populate an array of Legendre polynomials at x (in range [-1, 1])
+
+        Args:
+
+        Returns:
+            None.
+
+    )");
+
     m.def("chebyshev",
           [](py::buffer x, py::buffer templates, size_t start_order,
              size_t stop_order) {

diff --git a/src/toast/_libtoast_todmap_mapmaker.cpp b/src/toast/_libtoast_todmap_mapmaker.cpp
@@ -4,6 +4,9 @@
 // a BSD-style license that can be found in the LICENSE file.
 
 #include <_libtoast.hpp>
+#ifdef _OPENMP
+# include <omp.h>
+#endif // ifdef _OPENMP
 
 
 void apply_flags_to_pixels(py::array_t <unsigned char> common_flags,
@@ -20,7 +23,7 @@ void apply_flags_to_pixels(py::array_t <unsigned char> common_flags,
     for (size_t i = 0; i < nsamp; ++i) {
         unsigned char common_flag = fast_common_flags(i);
         unsigned char detector_flag = fast_detector_flags(i);
-        if (common_flag & common_flag_mask || detector_flag & detector_flag_mask) {
+        if ((common_flag & common_flag_mask) || (detector_flag & detector_flag_mask)) {
             fast_pixels(i) = -1;
         }
     }
@@ -85,11 +88,189 @@ void project_signal_offsets(py::array_t <double> ref, py::list todslices,
     }
 }
 
+void expand_matrix(py::array_t <double> compressed_matrix,
+                   py::array_t <int64_t> local_to_global,
+                   int64_t npix,
+                   int64_t nnz,
+                   py::array_t <int64_t> indices,
+                   py::array_t <int64_t> indptr
+                   ) {
+    auto fast_matrix = compressed_matrix.unchecked <2>();
+    auto fast_local_to_global = local_to_global.unchecked <1>();
+    auto fast_indices = indices.mutable_unchecked <1>();
+    auto fast_indptr = indptr.mutable_unchecked <1>();
+
+    size_t nlocal = fast_local_to_global.shape(0);
+    size_t nlocal_tot = fast_matrix.shape(0);
+    std::vector <int64_t> col_indices;
+
+    size_t offset = 0;
+    for (size_t inz = 0; inz < nnz; ++inz) {
+        for (size_t ilocal = 0; ilocal < nlocal; ++ilocal) {
+            size_t iglobal = fast_local_to_global[ilocal];
+            col_indices.push_back(iglobal + offset);
+        }
+        offset += npix;
+    }
+
+    size_t global_row = 0;
+    offset = 0;
+    for (size_t inz = 0; inz < nnz; ++inz) {
+        size_t global_pixel = 0;
+        for (size_t ilocal = 0; ilocal < nlocal; ++ilocal) {
+            size_t iglobal = fast_local_to_global[ilocal];
+            while (global_pixel < iglobal) {
+                fast_indptr[global_row + 1] = offset;
+                global_row++;
+                global_pixel++;
+            }
+            for (auto ind : col_indices) {
+                fast_indices[offset++] = ind;
+            }
+            fast_indptr[global_row + 1] = offset;
+            global_pixel++;
+            global_row++;
+        }
+        while (global_pixel < npix) {
+            fast_indptr[global_row + 1] = offset;
+            global_row++;
+            global_pixel++;
+        }
+    }
+}
+
+void build_template_covariance(py::array_t <double,
+                                            py::array::c_style | py::array::forcecast> templates,
+                               py::array_t <double,
+                                            py::array::c_style | py::array::forcecast> good,
+                               py::array_t <double,
+                                            py::array::c_style |
+                                            py::array::forcecast> template_covariance) {
+    auto fast_templates = templates.unchecked <2>();
+    auto fast_good = good.unchecked <1>();
+    auto fast_covariance = template_covariance.mutable_unchecked <2>();
+
+    size_t ntemplate = fast_templates.shape(0);
+    size_t nsample = fast_templates.shape(1);
+
+    std::vector <size_t> starts(ntemplate);
+    std::vector <size_t> stops(ntemplate);
+#pragma omp parallel for schedule(static, 1)
+    for (size_t itemplate = 0; itemplate < ntemplate; ++itemplate) {
+        size_t istart;
+        for (istart = 0; istart < nsample; ++istart) {
+            if ((fast_templates(itemplate,
+                                istart) != 0) && (fast_good[istart] != 0)) break;
+        }
+        starts[itemplate] = istart;
+
+        size_t istop;
+        for (istop = nsample - 1; istop >= 0; --istop) {
+            if ((fast_templates(itemplate,
+                                istop) != 0) && (fast_good[istop] != 0)) break;
+        }
+        stops[itemplate] = istop + 1;
+    }
+
+#pragma omp parallel for schedule(static, 1)
+    for (size_t itemplate = 0; itemplate < ntemplate; ++itemplate) {
+        for (size_t jtemplate = itemplate; jtemplate < ntemplate; ++jtemplate) {
+            double val = 0;
+            size_t istart = std::max(starts[itemplate], starts[jtemplate]);
+            size_t istop = std::min(stops[itemplate], stops[jtemplate]);
+            for (size_t isample = istart; isample < istop; ++isample) {
+                val += fast_templates(itemplate, isample)
+                       * fast_templates(jtemplate, isample)
+                       * fast_good(isample);
+            }
+            fast_covariance(itemplate, jtemplate) = val;
+            if (itemplate != jtemplate) {
+                fast_covariance(jtemplate, itemplate) = val;
+            }
+        }
+    }
+}
+
+void accumulate_observation_matrix(py::array_t <double,
+                                                py::array::c_style | py::array::forcecast> c_obs_matrix,
+                                   py::array_t <int64_t, py::array::c_style | py::array::forcecast> c_pixels,
+                                   py::array_t <double, py::array::c_style | py::array::forcecast> weights,
+                                   py::array_t <double, py::array::c_style | py::array::forcecast> templates,
+                                   py::array_t <double,
+                                                py::array::c_style |
+                                                py::array::forcecast> template_covariance)
+{
+    auto fast_obs_matrix = c_obs_matrix.mutable_unchecked <2>();
+    auto fast_pixels = c_pixels.unchecked <1>();
+    auto fast_weights = weights.unchecked <2>();
+    auto fast_templates = templates.unchecked <2>();
+    auto fast_covariance = template_covariance.unchecked <2>();
+
+    size_t nsample = fast_pixels.shape(0);
+    size_t nnz = fast_weights.shape(1);
+    size_t ntemplate = fast_templates.shape(1);
+    size_t npixtot = fast_obs_matrix.shape(0);
+    size_t npix = npixtot / nnz;
+
+    // Build lists of non-zeros for each row of the template matrix
+    std::vector <std::vector <size_t> > nonzeros(nsample);
+#pragma omp parallel for schedule(static, 1)
+    for (size_t isample = 0; isample < nsample; ++isample) {
+        for (size_t itemplate = 0; itemplate < ntemplate; ++itemplate) {
+            if (fast_templates(isample, itemplate) != 0) {
+                nonzeros[isample].push_back(itemplate);
+            }
+        }
+    }
+
+#pragma omp parallel
+    {
+        int nthreads = 1;
+        int idthread = 0;
+#ifdef _OPENMP
+        nthreads = omp_get_num_threads();
+        idthread = omp_get_thread_num();
+#endif // ifdef _OPENMP
+
+        for (size_t isample = 0; isample < nsample; ++isample) {
+            size_t ipixel = fast_pixels(isample);
+            if (ipixel % nthreads != idthread) continue;
+            for (size_t jsample = 0; jsample < nsample; ++jsample) {
+                size_t jpixel = fast_pixels(jsample);
+                double filter_matrix = 0;
+                for (auto itemplate : nonzeros[isample]) {
+                    for (auto jtemplate : nonzeros[jsample]) {
+                        filter_matrix
+                            += fast_templates(isample, itemplate)
+                               * fast_templates(jsample, jtemplate)
+                               * fast_covariance(itemplate, jtemplate);
+                    }
+                }
+                if (isample == jsample) {
+                    filter_matrix = 1 - filter_matrix;
+                } else {
+                    filter_matrix = -filter_matrix;
+                }
+                for (size_t inz = 0; inz < nnz; ++inz) {
+                    double iweight = fast_weights(isample, inz) * filter_matrix;
+                    for (size_t jnz = 0; jnz < nnz; ++jnz) {
+                        fast_obs_matrix(ipixel + inz * npix, jpixel + jnz * npix)
+                            += iweight * fast_weights(jsample, jnz);
+                    }
+                }
+            }
+        }
+    }
+}
+
 void init_todmap_mapmaker(py::module & m)
 {
     m.doc() = "Compiled kernels to support TOAST mapmaker";
 
     m.def("project_signal_offsets", &project_signal_offsets);
     m.def("add_offsets_to_signal", &add_offsets_to_signal);
     m.def("apply_flags_to_pixels", &apply_flags_to_pixels);
+    m.def("accumulate_observation_matrix", &accumulate_observation_matrix);
+    m.def("expand_matrix", &expand_matrix);
+    m.def("build_template_covariance", &build_template_covariance);
 }
diff --git a/src/toast/dist.py b/src/toast/dist.py
@@ -99,7 +99,7 @@ def distribute_discrete(sizes, groups, pow=1.0, breaks=None):
 
     if len(dist) != groups:
         raise RuntimeError(
-            "Number of distributed groups different than " "number requested"
+            "Number of distributed groups different than number requested"
         )
     return dist