From e2b48a948df41410988bf6e276809bbf21994258 Mon Sep 17 00:00:00 2001
From: Bryna Hazelton <bryna.hazelton@gmail.com>
Date: Fri, 30 Jun 2023 13:36:02 -0700
Subject: [PATCH] Add comments to frequency_average method

---
 pyuvdata/uvdata/uvdata.py | 50 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/pyuvdata/uvdata/uvdata.py b/pyuvdata/uvdata/uvdata.py
index 2dfa615f05..deeeb41cd3 100644
--- a/pyuvdata/uvdata/uvdata.py
+++ b/pyuvdata/uvdata/uvdata.py
@@ -10414,8 +10414,7 @@ def frequency_average(
         Does a simple average over an integer number of input channels, leaving
         flagged samples out of the average.
 
-        In the future, this method will support non-equally spaced channels
-        and varying channel widths. It will also support setting the frequency
+        In the future, this method will support setting the frequency
         to the true mean of the averaged non-flagged frequencies rather than
         the simple mean of the input channel frequencies. For now it does not.
 
@@ -10446,12 +10445,18 @@ def frequency_average(
             combined into a smaller frequency bin (keep_ragged=True). Note that if
             ragged frequencies are kept, the final averaged object will have
             future_array_shapes=True because it will have varying channel widths.
+
         """
+        # The default will be changing soon, so it's currently set to None in the
+        # function signature so we can detect that it's not set and warn about the
+        # changing default.
         kr_use_default = False
         if keep_ragged is None:
             kr_use_default = True
             keep_ragged = False
 
+        # All the logic with future array shapes.
+        # Test to see if we need to restore it to current shapes at the end.
         reset_cs = False
         if not self.future_array_shapes:
             self.use_future_array_shapes()
@@ -10485,9 +10490,15 @@ def frequency_average(
                     "before frequency averaging."
                 )
 
+        # Figure out how many channels are in each spw so we can tell if we have a
+        # ragged situation (indicated by the some_uneven variable).
+        # While we're at it, build up some useful dicts for later, keyed on spw
         nchans_spw = np.zeros(self.Nspws, dtype=int)
+        # final_nchan will hold the number of Nfreqs after averaging.
         final_nchan = 0
+        # spw_chans will hold the original channel indices for each spw
         spw_chans = {}
+        # final_spw_chans will hold the final channel indices for each spw
         final_spw_chans = {}
         some_uneven = False
         for spw_ind, spw in enumerate(self.spw_array):
@@ -10505,6 +10516,7 @@ def frequency_average(
             )
             final_nchan += this_final_nchan
 
+        # Warn about changing keep_ragged default if it applies
         if some_uneven and kr_use_default:
             warnings.warn(
                 "Some spectral windows do not divide evenly by `n_chan_to_avg` and the "
@@ -10513,6 +10525,8 @@ def frequency_average(
                 "True or False to silence this warning.",
                 DeprecationWarning,
             )
+        # Cannot go back to current array shapes if we have ragged channels that we're
+        # keeping
         if some_uneven and keep_ragged and reset_cs:
             warnings.warn(
                 "Ragged frequencies will result in varying channel widths, so "
@@ -10520,6 +10534,9 @@ def frequency_average(
                 "Use keep_ragged=False to avoid this."
             )
 
+        # Since we have to loop through the spws, we cannot do the averaging with a
+        # simple reshape and average. So we need to create arrays to hold the
+        # various metadata & data after averaging
         final_freq_array = np.zeros(final_nchan, dtype=float)
         final_channel_width = np.zeros(final_nchan, dtype=float)
         final_flex_spw_id_array = np.zeros(final_nchan, dtype=int)
@@ -10534,7 +10551,15 @@ def frequency_average(
                 final_shape_tuple, dtype=self.nsample_array.dtype
             )
 
+        # Now loop through the spws to actually do the averaging
         for spw_ind, spw in enumerate(self.spw_array):
+            # n_final_chan_reg is the number of regular (non-ragged) channels after
+            # averaging in this spw.
+            # For the regular channels, we can average more quickly by reshaping the
+            # frequency axis into two axes of lengths (n_final_chan_reg, n_chan_to_avg)
+            # followed by an average (or sum) over the axis of length n_chan_to_avg.
+            # Then we just have to do one more calculation for the remaining input
+            # channels if there are ragged channels.
             n_final_chan_reg = int(np.floor(nchans_spw[spw_ind] / n_chan_to_avg))
             nfreq_mod_navg = nchans_spw[spw_ind] % n_chan_to_avg
             these_inds = spw_chans[spw]
@@ -10546,23 +10571,28 @@ def frequency_average(
                 # not an even number of final channels
                 regular_inds = these_inds[0 : n_final_chan_reg * n_chan_to_avg]
                 if not keep_ragged:
+                    # only use the non-ragged inds
                     these_inds = regular_inds
                 else:
+                    # find the irregular inds for this spw
                     this_ragged = True
                     irregular_inds = these_inds[n_final_chan_reg * n_chan_to_avg :]
                     this_final_reg_inds = this_final_reg_inds[:-1]
 
+            # Now do the reshaping and combining across the n_chan_to_avg length axis
             final_freq_array[this_final_reg_inds] = (
                 self.freq_array[regular_inds]
                 .reshape((n_final_chan_reg, n_chan_to_avg))
                 .mean(axis=1)
             )
+            # take a sum here rather to get final channel width
             final_channel_width[this_final_reg_inds] = (
                 self.channel_width[regular_inds]
                 .reshape((n_final_chan_reg, n_chan_to_avg))
                 .sum(axis=1)
             )
             if this_ragged:
+                # deal with the final ragged channel
                 final_freq_array[final_spw_chans[spw][-1]] = np.mean(
                     self.freq_array[irregular_inds]
                 )
@@ -10612,6 +10642,10 @@ def frequency_average(
 
                 # need to update mask if a downsampled visibility will be flagged
                 # so that we don't set it to zero
+                # This is a common radio astronomy convention that when averaging over
+                # entirely flagged channels, you include the flagged channels in the
+                # result (so it's not zero) whereas you exclude flagged channels if
+                # there are any unflagged channels in the average.
                 for chan_ind in np.arange(n_final_chan_reg):
                     this_chan = final_spw_chans[spw][chan_ind]
                     if (final_flag_array[:, this_chan]).any():
@@ -10636,6 +10670,9 @@ def frequency_average(
                     ff_inds = np.nonzero(fully_flagged)
                     irreg_mask[ax0_inds[ff_inds], :, ax2_inds[ff_inds]] = False
 
+                # create a masked data array from the data_array and mask_array
+                # (based on the flag_array).
+                # This lets numpy handle the averaging with flags.
                 masked_reg_data = np.ma.masked_array(
                     self.data_array[:, regular_inds].reshape(shape_tuple), mask=reg_mask
                 )
@@ -10650,6 +10687,7 @@ def frequency_average(
                     masked_nsample_dtype = np.float32
                 else:
                     masked_nsample_dtype = nsample_dtype
+                # create a masked nsample array from the data_array and mask_array
                 masked_reg_nsample = np.ma.masked_array(
                     self.nsample_array[:, regular_inds].reshape(shape_tuple),
                     mask=reg_mask,
@@ -10663,6 +10701,7 @@ def frequency_average(
                     )
 
                 if summing_correlator_mode:
+                    # sum rather than average
                     final_data_array[:, this_final_reg_inds] = np.sum(
                         masked_reg_data, axis=2
                     ).data
@@ -10671,7 +10710,7 @@ def frequency_average(
                             masked_irreg_data, axis=1
                         ).data
                 else:
-                    # need to weight by the nsample_array
+                    # do a weighted average with the weights given by the nsample_array
                     final_data_array[:, this_final_reg_inds] = (
                         np.sum(masked_reg_data * masked_reg_nsample, axis=2)
                         / np.sum(masked_reg_nsample, axis=2)
@@ -10684,6 +10723,8 @@ def frequency_average(
 
                 # nsample array is the fraction of data that we actually kept,
                 # relative to the amount that went into the sum or average.
+                # So it's a sum over the averaged channels divided by the number of
+                # averaged channels
                 # Need to take care to return precision back to original value.
                 final_nsample_array[:, this_final_reg_inds] = (
                     np.sum(masked_reg_nsample, axis=2) / float(n_chan_to_avg)
@@ -10693,6 +10734,7 @@ def frequency_average(
                         np.sum(masked_irreg_nsample, axis=1) / irregular_inds.size
                     ).data.astype(nsample_dtype)
 
+        # Put the final arrays on the object
         self.freq_array = final_freq_array
         self.channel_width = final_channel_width
         self.flex_spw_id_array = final_flex_spw_id_array
@@ -10704,8 +10746,10 @@ def frequency_average(
             self.data_array = final_data_array
             self.nsample_array = final_nsample_array
 
+        # update Nfreqs
         self.Nfreqs = final_nchan
 
+        # return to current shapes if needed and possible
         if reset_cs and not (some_uneven and keep_ragged):
             with warnings.catch_warnings():
                 warnings.filterwarnings("ignore", "This method will be removed")