spacetelescope · melanieclarke · Nov 19, 2024 · Sep 26, 2024 · Sep 26, 2024 · Oct 6, 2024
@@ -0,0 +1 @@
+Added pixel-to-pixel sigma clipping on input backgrounds in the NIRSpec MOS master_background_mos step.  Added option to median filter master background spectrum in both the master_background and master_background_mos steps.
@@ -1,7 +1,7 @@
 Step Arguments
 ==============
 
-The ``combine_1d`` step has one step-specific argument:
+The ``combine_1d`` step has two step-specific arguments:
 
 ``--exptime_key``
   This is a case-insensitive string that identifies the metadata element
@@ -13,3 +13,10 @@ The ``combine_1d`` step has one step-specific argument:
   the string is "unit_weight" or "unit weight", the same weight (1) will
   be used for all input spectra.  If the string is anything else, a warning
   will be logged and unit weight will be used.
+
+``--sigma_clip``
+  Optional factor for sigma clipping outliers when combining spectra. If
+  a floating point value is provided for ``sigma_clip``, this value will be
+  used to set an outlier threshold for any pixels in the input spectra that 
+  deviate from the median and median absolute deviation of the inputs.
+  Defaults to None (such that no clipping is performed).
@@ -4,6 +4,12 @@ Step Arguments
 ==============
 The master background subtraction step uses the following optional arguments.
 
+``--median_kernel``
+  Optional user-supplied kernel size for a moving-median boxcar filter to filter 
+  outliers in the master background spectrum.  The kernel size must be an odd integer.
+  Even integers will be rounded down to the nearest odd integer.
+  Defaults to 1 (which applies no median filtering).
+
 ``--user_background``
   The file name of a user-supplied 1-D master background spectrum. Must be in the form
   of a standard :ref:`x1d <x1d>` product containing a single 'EXTRACT1D' extension.
@@ -15,6 +21,10 @@ The master background subtraction step uses the following optional arguments.
 ``--save_background``
   A boolean indicating whether the computed 1-D master background spectrum should be saved
   to a file. The file name uses a product type suffix of "masterbg".
+  For the `master_background_mos` step, multiple files will be produced including the 1-D 
+  master background spectrum (saved with the suffix "masterbg1d"), the expanded 2-D background spectra
+  for each MOS slitlet (with the suffix "masterbg2d"), and the 1-D background spectra 
+  that were combined into the master background spectrum (with the suffix "bkgx1d").
   If a user-supplied background is specified, this argument is ignored.
   Defaults to ``False``.
 
@@ -28,3 +38,13 @@ The master background subtraction step uses the following optional arguments.
 ``--output_use_model``
   A boolean indicating whether to use the "filename" meta attribute in the data model to
   determine the name of the output file created by the step. Defaults to ``True``.
+
+``--sigma_clip``
+  Factor for sigma clipping outliers and contaminated spectra when combining MOS 
+  background spectra in the `master_background_mos` step.  The value of ``sigma_clip`` 
+  will be used to set an outlier threshold for clipping any pixels in the background 
+  spectra that deviate from the median and median absolute deviation of the inputs before
+  combining the background spectra.  Setting ``sigma_clip`` to None will
+  skip any outlier clipping.  This parameter is only available in `master_background_mos`
+  step and is not available in the generic `master_background` step.
+  Defaults to 3.
@@ -3,14 +3,15 @@
 #
 
 import logging
+import warnings
 
 import numpy as np
 
 from stdatamodels.jwst import datamodels
 
 from jwst.datamodels import ModelContainer
 
-from ..extract_1d.spec_wcs import create_spectral_wcs
+from jwst.extract_1d.spec_wcs import create_spectral_wcs
 
 log = logging.getLogger(__name__)
 log.setLevel(logging.DEBUG)
@@ -157,7 +158,7 @@ def assign_wavelengths(self, input_spectra):
                                        input_spectra[0].declination[0],
                                        self.wavelength)
 
-    def accumulate_sums(self, input_spectra):
+    def accumulate_sums(self, input_spectra, sigma_clip=None):
         """Compute a weighted sum of all the input spectra.
 
         Each pixel of each input spectrum will be added to one pixel of
@@ -180,6 +181,8 @@ def accumulate_sums(self, input_spectra):
         ----------
         input_spectra : list of InputSpectrumModel objects
             List of input spectra.
+        sigma_clip : float, optional
+            Factor for clipping outliers in spectral combination.
         """
 
         # This is the data type for the output spectrum.  We'll use double
@@ -189,21 +192,23 @@ def accumulate_sums(self, input_spectra):
         dq_dtype = cmb_dtype.fields["DQ"][0]
 
         nelem = self.wavelength.shape[0]
+        nspec = len(input_spectra)
 
-        self.flux = np.zeros(nelem, dtype=np.float64)
-        self.flux_error = np.zeros(nelem, dtype=np.float64)
-        self.surf_bright = np.zeros(nelem, dtype=np.float64)
-        self.sb_error = np.zeros(nelem, dtype=np.float64)
-        self.dq = np.zeros(nelem, dtype=dq_dtype)
-        self.weight = np.zeros(nelem, dtype=np.float64)
-        self.count = np.zeros(nelem, dtype=np.float64)
+        dq = np.zeros(nelem, dtype=dq_dtype)
+
+        flux = np.zeros((nspec, nelem), dtype=np.float64)
+        flux_error = np.zeros((nspec, nelem), dtype=np.float64)
+        surf_bright = np.zeros((nspec, nelem), dtype=np.float64)
+        sb_error = np.zeros((nspec, nelem), dtype=np.float64)
+        weight = np.zeros((nspec, nelem), dtype=np.float64)
+        count = np.zeros((nspec, nelem), dtype=np.float64)
 
         self.flux_unit = input_spectra[0].flux_unit
         self.sb_unit = input_spectra[0].sb_unit
 
         n_nan = 0                                       # initial value
         ninputs = 0
-        for in_spec in input_spectra:
+        for s, in_spec in enumerate(input_spectra):
             ninputs += 1
             if in_spec.name is not None:
                 slit_name = f'{ninputs}, slit {in_spec.name}'
@@ -220,25 +225,39 @@ def accumulate_sums(self, input_spectra):
             nan_flag = np.isnan(out_pixel)
             n_nan += nan_flag.sum()
             for i in range(len(out_pixel)):
-                if in_spec.dq[i] & datamodels.dqflags.pixel['DO_NOT_USE'] > 0:
+                # Need to check on dq and nan flux because dq is not set for some x1d
+                if (in_spec.dq[i] & datamodels.dqflags.pixel['DO_NOT_USE'] > 0) | np.isnan(in_spec.flux[i]):
                     continue
                 # Round to the nearest pixel.
                 if nan_flag[i]:         # skip if the pixel number is NaN
                     continue
                 k = round(float(out_pixel[i]))
                 if k < 0 or k >= nelem:
                     continue
-                weight = in_spec.weight[i]
-                self.dq[k] |= in_spec.dq[i]
-                self.flux[k] += in_spec.flux[i] * weight
-                self.flux_error[k] += (in_spec.flux_error[i] * weight)**2
-                self.surf_bright[k] += (in_spec.surf_bright[i] * weight)
-                self.sb_error[k] += (in_spec.sb_error[i] * weight)**2
-                self.weight[k] += weight
-                self.count[k] += 1.
+                dq[k] |= in_spec.dq[i]
+                flux[s, k] = in_spec.flux[i]
+                flux_error[s, k] = in_spec.flux_error[i]
+                surf_bright[s, k] = in_spec.surf_bright[i]
+                sb_error[s, k] = in_spec.sb_error[i]
+                weight[s, k] = in_spec.weight[i]
+                count[s, k] = 1.
+
+        (flux, flux_error, surf_bright, sb_error,
+         weight, count) = self.combine_spectra(flux, flux_error,
+                                               surf_bright, sb_error,
+                                               weight, count, sigma_clip=sigma_clip)
+
         if n_nan > 0:
             log.warning("%d output pixel numbers were NaN", n_nan)
 
+        self.flux = flux
+        self.flux_error = flux_error
+        self.surf_bright = surf_bright
+        self.sb_error = sb_error
+        self.dq = dq
+        self.weight = weight
+        self.count = count
+
         # Since the output wavelengths will not usually be exactly the same
         # as the input wavelengths, it's possible that there will be output
         # pixels for which there is no corresponding pixel in any of the
@@ -259,18 +278,83 @@ def accumulate_sums(self, input_spectra):
             self.count = self.count[index]
         del index
 
-        self.normalized = False
+    def combine_spectra(self, flux, flux_error, surf_bright, sb_error,
+                        weight, count, sigma_clip=None):
+        """Combine accumulated spectra.
 
-    def compute_combination(self):
-        """Compute the combined values."""
+        Parameters
+        ----------
+        flux : ndarray, 2-D
+            Tabulated fluxes for the input spectra in the format
+            [N spectra, M wavelengths].
+        flux_error : ndarray, 2-D
+            Flux errors of input spectra.
+        surf_bright : ndarray, 2-D
+            Surface brightnesses of input spectra.
+        sb_error : ndarray, 2-D
+            Surface brightness errors for input spectra.
+        weight : ndarray, 2-D
+            Pixel weights for input spectra
+        count : ndarray, 2-D
+            Count of how many values at each index in the input arrays.
+        sigma_clip : float, optional
+            Factor for clipping outliers.  Compares input spectra to the
+            median and medaian absolute devaition, by default None.
+
+        Returns
+        -------
+        flux : ndarray, 1-D
+            Combined 1-D fluxes.
+        flux_error : ndarray, 1-D
+            Combined 1-D flux errors.
+        surf_bright : ndarray, 1-D
+            Combined 1-D surface brightnesses.
+        sb_error : ndarray, 1-D
+            Combined 1-D surface brightness errors.
+        weight : ndarray, 1-D
+            Total, per wavelength weights.
+        count : ndarray, 1-D
+            Total count of spectra contributing to each wavelength.
+        """
+
+        # Catch warnings for all NaN slices in an array.
+        with warnings.catch_warnings():
+            warnings.simplefilter('ignore')
+
+            if sigma_clip is not None:
+                # Copy the fluxes for modifying
+                flux_2d = np.array(flux * weight)
+
+                # Mask any missing pixels in the input spectra
+                missing = (count < 1) | (flux_2d == 0.0)
+                flux_2d[missing] = np.nan
+                # Calculate median and median absolute deviation
+                med_flux = np.nanmedian(flux_2d, axis=0)
+                mad = np.nanmedian(np.abs(flux_2d - med_flux))
+
+                # Clip any outlier pixels in the input spectra
+                clipped = np.abs(flux * weight - med_flux) > sigma_clip * mad
+                flux[clipped] = np.nan
+                flux_error[clipped] = np.nan
+                surf_bright[clipped] = np.nan
+                sb_error[clipped] = np.nan
+                count[clipped] = 0
+                weight[clipped] = 0
+
+            # Perform a weighted sum of the input spectra
+            sum_weight = np.nansum(weight, axis=0)
+            sum_weight_nonzero = np.where(sum_weight > 0., sum_weight, 1.)
+
+            flux= np.nansum(flux * weight, axis=0) / sum_weight_nonzero
+            flux_error = np.sqrt(np.nansum((flux_error * weight)**2, axis=0)) / sum_weight_nonzero
+            surf_bright = np.nansum(surf_bright * weight, axis=0) / sum_weight_nonzero
+            sb_error = np.sqrt(np.nansum((sb_error * weight)**2, axis=0)) / sum_weight_nonzero
+            count = np.nansum(count, axis=0)
+
+        self.normalized = True
+
+        return flux, flux_error, surf_bright, sb_error, sum_weight, count
 
-        if not self.normalized:
-            sum_weight = np.where(self.weight > 0., self.weight, 1.)
-            self.surf_bright /= sum_weight
-            self.flux /= sum_weight
-            self.flux_error = np.sqrt(self.flux_error) / sum_weight
-            self.sb_error = np.sqrt(self.sb_error) / sum_weight
-            self.normalized = True
 
     def create_output_data(self):
         """Create the output data.
@@ -557,7 +641,7 @@ def check_exptime(exptime_key):
     return exptime_key
 
 
-def combine_1d_spectra(input_model, exptime_key):
+def combine_1d_spectra(input_model, exptime_key, sigma_clip=None):
     """Combine the input spectra.
 
     Parameters
@@ -602,8 +686,7 @@ def combine_1d_spectra(input_model, exptime_key):
     for order in input_spectra:
         output_spectra[order] = OutputSpectrumModel()
         output_spectra[order].assign_wavelengths(input_spectra[order])
-        output_spectra[order].accumulate_sums(input_spectra[order])
-        output_spectra[order].compute_combination()
+        output_spectra[order].accumulate_sums(input_spectra[order], sigma_clip=sigma_clip)
 
     output_model = datamodels.MultiCombinedSpecModel()
 

@@ -20,12 +20,14 @@ class Combine1dStep(Step):
 
     spec = """
     exptime_key = string(default="exposure_time") # use for weight
+    sigma_clip = float(default=None) # Factor for clipping outliers
     """
 
     def process(self, input_file):
 
         with datamodels.open(input_file) as input_model:
             result = combine1d.combine_1d_spectra(input_model,
-                                                  self.exptime_key)
+                                                  self.exptime_key,
+                                                  sigma_clip=self.sigma_clip)
 
         return result
@@ -49,6 +49,47 @@ def test_err():
     assert np.allclose(result.spec[0].spec_table['SB_ERROR'], expected_error)
 
 
+def test_nan():
+    """Test that nan exclusion works"""
+    spec1 = create_spec_model(flux=1e-9)
+    spec2 = create_spec_model(flux=1e-9)
+    ms = datamodels.MultiSpecModel()
+    ms.meta.exposure.exposure_time = 1
+    ms.spec.append(spec1)
+    ms.spec.append(spec2)
+
+    # Make one pixel bad by being nan, without being marked as DO_NOT_USE.
+    # Result should just contain the second spectrum value
+    BAD_PIX = 5
+    spec1.spec_table['FLUX'][BAD_PIX] = np.nan
+    result = Combine1dStep.call(ms)
+    assert np.isclose(result.spec[0].spec_table['FLUX'][BAD_PIX], spec2.spec_table['FLUX'][BAD_PIX])
+
+
+def test_sigmaclip():
+    """Test that sigma clipping works"""
+    spec1 = create_spec_model(flux=1e-9)
+    spec2 = create_spec_model(flux=1e-9)
+    spec3 = create_spec_model(flux=1e-9)
+    ms = datamodels.MultiSpecModel()
+    ms.meta.exposure.exposure_time = 1
+    ms.spec.append(spec1)
+    ms.spec.append(spec2)
+    ms.spec.append(spec3)
+
+    # Make one pixel bad by being large.
+    # Result should be large
+    BAD_PIX = 5
+    spec1.spec_table['FLUX'][BAD_PIX] = 1.0
+    result = Combine1dStep.call(ms)
+    assert np.isclose(result.spec[0].spec_table['FLUX'][BAD_PIX], 1/3)
+
+    # Now process with sigma_clipping turned on
+    # Result should not just contain the second and third spectra values.
+    result_dq = Combine1dStep.call(ms, sigma_clip=3)
+    assert np.isclose(result_dq.spec[0].spec_table['FLUX'][BAD_PIX], spec2.spec_table['FLUX'][BAD_PIX])
+
+
 def create_spec_model(npoints=10, flux=1e-9, error=1e-10, wave_range=(11, 13)):
     """Create a SpecModel"""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Added pixel-to-pixel sigma clipping on input backgrounds in the NIRSpec MOS master_background_mos step. Added option to median filter master background spectrum in both the master_background and master_background_mos steps.