Merge pull request legolason#71 from WenkeRen/master

Solving the high-z failed
WenkeRen · Jul 16, 2024 · c7525d9 · c7525d9
2 parents 2dcb3f6 + 4b203da
commit c7525d9
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 53 deletions.
diff --git a/src/pyqsofit/HostDecomp.py b/src/pyqsofit/HostDecomp.py
@@ -9,8 +9,10 @@
 # Description：Use the PCA data to decompose the spectra with a priori
 """
 import os, glob
+import warnings
+
 from astropy.io import fits
-import pandas as pd
+from numpy import genfromtxt
 import numpy as np
 from lmfit import Minimizer, Parameters
 from scipy.interpolate import interp1d
@@ -88,9 +90,7 @@ def __init__(self, template_path, n_template=5, template_type='PCA'):
             self._read_PCA(template_path)
         elif template_type == 'BC03':
             self._read_BC03(template_path)
-        elif template_type == 'indo19':
-            self._read_INDO(template_path)
-        elif template_type == 'indo50':
+        elif template_type == 'indo':
             self._read_INDO(template_path)
         elif template_type == 'M09_17':
             self._read_M09(template_path)
@@ -155,6 +155,7 @@ def _read_INDO(self, template_path):
         return wave_gal, flux_gal
 
     def _read_M09(self, template_path):
+        import pandas as pd
         flux_temp = np.array([])
         wave_gal = np.array([])
         cc = 0
@@ -173,6 +174,7 @@ def _read_M09(self, template_path):
         return wave_gal, flux_gal
 
     def _read_MILES(self, template_path):
+        import pandas as pd
         flux_temp = np.array([])
         wave_gal = np.array([])
         cc = 0
@@ -279,6 +281,7 @@ def __init__(self, wave, flux, err, n_gal, n_qso, path, host_type='PCA', qso_typ
         self.err = err
         self.n_gal = n_gal
         self.n_qso = n_qso
+        self.assertion = True
 
         path2prior = os.path.join(path, 'pca/prior')
         path2qso = os.path.join(path, 'pca/Yip_pca_templates')
@@ -296,25 +299,30 @@ def __init__(self, wave, flux, err, n_gal, n_qso, path, host_type='PCA', qso_typ
         wave_min = np.max([np.min(wave), np.min(self.qso_tmp.wave_qso), np.min(self.gal_tmp.wave_gal)])
         wave_max = np.min([np.max(wave), np.max(self.qso_tmp.wave_qso), np.max(self.gal_tmp.wave_gal)])
         ind_data = np.where((wave > wave_min) & (wave < wave_max), True, False)
-        self.wave, self.flux, self.err = wave[ind_data], flux[ind_data], err[ind_data]
-
-        if na_mask == True:
-            self.wave_fit, self.flux_fit, self.err_fit = _na_mask(self.wave, self.flux, self.err)
+        if np.sum(ind_data)/len(ind_data) < 0.5:
+            self.assertion = False
+            warnings.warn('The templates used for decomposition can only cover less than 50% of the original data. '
+                             'Please check the settings and consider close the decomposition function.')
         else:
-            self.wave_fit, self.flux_fit, self.err_fit = self.wave, self.flux, self.err
+            self.wave, self.flux, self.err = wave[ind_data], flux[ind_data], err[ind_data]
 
-        self.qso_datacube = self.qso_tmp.interp_data(self.wave_fit)
-        self.gal_datacube = self.gal_tmp.interp_data(self.wave_fit)
+            if na_mask == True:
+                self.wave_fit, self.flux_fit, self.err_fit = _na_mask(self.wave, self.flux, self.err)
+            else:
+                self.wave_fit, self.flux_fit, self.err_fit = self.wave, self.flux, self.err
 
-        self.n_qso = self.qso_tmp.n_template
-        self.n_gal = self.gal_tmp.n_template
+            self.qso_datacube = self.qso_tmp.interp_data(self.wave_fit)
+            self.gal_datacube = self.gal_tmp.interp_data(self.wave_fit)
 
-        if self.n_qso != n_qso:
-            raise ValueError(f'The number of qso template is not correct. '
-                             f'You ask for {n_qso} template, while the maximum number of qso template is {self.n_qso}')
-        if self.n_gal != n_gal:
-            raise ValueError(f'The number of gal template is not correct. '
-                             f'You ask for {n_gal} template, while the maximum number of gal template is {self.n_gal}')
+            self.n_qso = self.qso_tmp.n_template
+            self.n_gal = self.gal_tmp.n_template
+
+            if self.n_qso != n_qso:
+                raise ValueError(f'The number of qso template is not correct. '
+                                 f'You ask for {n_qso} template, while the maximum number of qso template is {self.n_qso}')
+            if self.n_gal != n_gal:
+                raise ValueError(f'The number of gal template is not correct. '
+                                 f'You ask for {n_gal} template, while the maximum number of gal template is {self.n_gal}')
 
     def auto_decomp(self):
         flux_temp = np.vstack((self.qso_datacube, self.gal_datacube)).T
@@ -381,29 +389,41 @@ def __init__(self, wave, flux, err, n_gal, n_qso, path, host_type='PCA', qso_typ
         QSO_prior_name = f'QSO_pp_prior_{qso_type}.csv'
         self.fh_ini_list = fh_ini_list
 
+        self.wave = wave
+        self.flux = flux
+        self.err = err
+        self.n_gal = n_gal
+        self.n_qso = n_qso
+        self.assertion = True
+
         self.qso_tmp = QSO_PCA(path2qso, n_qso, template_name=qso_type)
         self.gal_tmp = host_template(n_template=n_gal, template_path=path2host, template_type=host_type)
 
         # Get the shortest wavelength range
         wave_min = np.max([np.min(wave), np.min(self.qso_tmp.wave_qso), np.min(self.gal_tmp.wave_gal)])
         wave_max = np.min([np.max(wave), np.max(self.qso_tmp.wave_qso), np.max(self.gal_tmp.wave_gal)])
         ind_data = np.where((wave > wave_min) & (wave < wave_max), True, False)
-        self.wave, self.flux, self.err = wave[ind_data], flux[ind_data], err[ind_data]
-
-        if na_mask == True:
-            self.wave_fit, self.flux_fit, self.err_fit = _na_mask(self.wave, self.flux, self.err)
+        if np.sum(ind_data)/len(ind_data) < 0.5:
+            warnings.warn('The templates used for decomposition can only cover less than 50% of the original data. '
+                             'Please check the settings and consider close the decomposition function.')
+            self.assertion = False
         else:
-            self.wave_fit, self.flux_fit, self.err_fit = self.wave, self.flux, self.err
+            self.wave, self.flux, self.err = wave[ind_data], flux[ind_data], err[ind_data]
+
+            if na_mask == True:
+                self.wave_fit, self.flux_fit, self.err_fit = _na_mask(self.wave, self.flux, self.err)
+            else:
+                self.wave_fit, self.flux_fit, self.err_fit = self.wave, self.flux, self.err
 
-        self.qso_datacube = self.qso_tmp.interp_data(self.wave_fit)
-        self.gal_datacube = self.gal_tmp.interp_data(self.wave_fit)
-        self.n_qso = self.qso_tmp.n_template
-        self.n_gal = self.gal_tmp.n_template
+            self.qso_datacube = self.qso_tmp.interp_data(self.wave_fit)
+            self.gal_datacube = self.gal_tmp.interp_data(self.wave_fit)
+            self.n_qso = self.qso_tmp.n_template
+            self.n_gal = self.gal_tmp.n_template
 
-        self.qso_prior = np.array([[0, 1]] * self.n_qso)
-        self.gal_prior = np.array([[0, 1]] * self.n_gal)
+            self.qso_prior = np.array([[0, 1]] * self.n_qso)
+            self.gal_prior = np.array([[0, 1]] * self.n_gal)
 
-        self.read_prior(path2prior, QSO_prior_name, GAL_prior_name)
+            self.read_prior(path2prior, QSO_prior_name, GAL_prior_name)
 
     def read_prior(self, prior_loc, QSO_prior_name='QSO_pp_prior.csv', GAL_prior_name='GAL_pp_prior.csv'):
         self.qso_prior = self._read_prior(os.path.join(prior_loc, QSO_prior_name), self.n_qso)
@@ -498,7 +518,8 @@ def _residuals(self, param: Parameters, yval, err, reg_factor):
         return r0 + sig/nn
 
     def _read_prior(self, path2prior, n_pp):
-        prior = np.array(pd.read_csv(path2prior))
+        # prior = np.array(pd.read_csv(path2prior))
+        prior = genfromtxt(path2prior, delimiter=',', skip_header=1)
         return prior[:n_pp]
 
     def qso_model(self, param: list = None, wave=None):

diff --git a/src/pyqsofit/PyQSOFit.py b/src/pyqsofit/PyQSOFit.py
@@ -220,31 +220,50 @@ def Fit(self, name=None, nsmooth=1, and_mask=False, or_mask=False, reject_badpix
 
         host_prior: bool, optional
             This parameter is only useful when the decompose_host is True and BC03 is False. If True, the code will
-            adopt the prior parameters given in the pca file to perform host decomposition.
+            adopt the prior parameters given in the pca file to perform host decomposition. See arXiv:2406.17598 for
+            more description about the functionality of this prior.
 
         host_prior_scale: float, optional
             If the prior decomposition is performed, the code will use this parameter to scale the prior penalty to the
             original chi2. Default: 0.2
 
         host_line_mask: bool, optional
             If True, the line region of galaxy will be masked when subtracted from original spectra. Default: True
-            
-        BC03: bool, optional
-            if True, it will use Bruzual1 & Charlot 2003 host model to fit spectrum, high shift host will be low resolution R ~ 300, the rest is R ~ 2000. Default: False
-        
-        Mi: float, optional
-            i-band absolute magnitude. It only works when decompose_host is True. If not None, the Luminosity redshift binned PCA will be used to decompose
-            the spectrum. Default: None
-            
+
+        decomp_na_mask: bool, optional
+            If True, the narrow line region will be masked when perform decomposition so that the model would not be
+            affected by the emission lines. In cases we are using PCA templates to perform the decomposition,
+            restricted by the template numbers, the model may not enough to recover all the emission lines with various
+            width and strength. For purpose for only separating host continuum, we suggest to set this option as True.
+
+        qso_type: str, optional
+            The name of quasar PCA templates used in the host decomposition. This parameter can be set as 'global' or
+            '{1}ZBIN{2}' where 1 is the luminosity bin from one of [A, B, C, D] and 2 is the redshift bin from one of
+            [1, 2, 3, 4, 5]. Yip et al. (2004) built a series sets of quasar PCA templates based on different redshift
+            and absolute i-band magnitude subsamples. Check https://doi.org/10.1086/425626 for more detail. If the
+            host_prior is set to True, then only 'DZBIN1' and 'CZBIN1' is supported.
+
         npca_gal: int, optional
             the number of galaxy PCA components applied. It only works when decompose_host is True. The default is 5,
             which is already account for 98.37% galaxies.
-        
+
+        host_type: str, optional
+            The name of galaxy templates used in the host decomposition. We have two tested build-in options for this
+            parameter: PCA, BC03. Only PCA option is allowed if host_prior=True. For pro user who want to customize
+            their own templates, please check Class host_template in HostDecomp.py.
+
         npca_qso: int, optional
             the number of QSO PCA components applied. It only works when decompose_host is True. The default is 20,
             No matter the global or luminosity-redshift binned PCA is used, it can reproduce > 92% QSOs. The binned PCA
             is better if have Mi information.
-         
+
+        BC03: bool, optional -- Unavailable
+            if True, it will use Bruzual1 & Charlot 2003 host model to fit spectrum, high shift host will be low resolution R ~ 300, the rest is R ~ 2000. Default: False
+
+        Mi: float, optional -- Unavailable
+            i-band absolute magnitude. It only works when decompose_host is True. If not None, the Luminosity redshift binned PCA will be used to decompose
+            the spectrum. Default: None
+
         Fe_uv_op: bool, optional
             if True, fit continuum with UV and optical FeII template. Default: True
 
@@ -749,16 +768,31 @@ def _CalculateSN(self, wave, flux, alter=True):
 
     def decompose_host_qso(self, wave, flux, err, path):
         """Decompose the host galaxy from QSO"""
+        # Initialize default values
+        self.host = np.zeros(len(wave))
+        self.decomposed = True
+        self.host_result = np.array([])
+        self.host_result_type = np.array([])
+        self.host_result_name = np.array([])
+
         if self.host_prior is True:
             prior_fitter = Prior_decomp(self.wave, self.flux, self.err, self.npca_gal, self.npca_qso,
                                         path, host_type=self.host_type, qso_type=self.qso_type,
                                         na_mask=self.decomp_na_mask)
-            datacube, frac_host_4200, frac_host_5100, qso_par, gal_par = prior_fitter.auto_decomp(self.host_prior_scale)
+            if prior_fitter.assertion is True:
+                datacube, frac_host_4200, frac_host_5100, qso_par, gal_par = prior_fitter.auto_decomp(self.host_prior_scale)
+            else:
+                self.decomposed = False
+                return self.wave, self.flux, self.err
         else:
             linear_fitter = Linear_decomp(self.wave, self.flux, self.err, self.npca_gal, self.npca_qso, path,
                                           host_type=self.host_type, qso_type=self.qso_type,
                                           na_mask=self.decomp_na_mask)
-            datacube, frac_host_4200, frac_host_5100, qso_par, gal_par = linear_fitter.auto_decomp()
+            if linear_fitter.assertion is True:
+                datacube, frac_host_4200, frac_host_5100, qso_par, gal_par = linear_fitter.auto_decomp()
+            else:
+                self.decomposed = False
+                return self.wave, self.flux, self.err
 
         # for some negative host template, we do not do the decomposition # not apply anymore
         # For a few cases, the host template is too weak that the host spectra (data - qso) would be mostly negative
@@ -767,16 +801,10 @@ def decompose_host_qso(self, wave, flux, err, path):
         host_spec = datacube[1, :] - datacube[4, :]
         if np.sum(np.where(datacube[3, :] < 0, True, False) | np.where(datacube[4, :] < 0, True, False)) > 0.1 * \
                 datacube.shape[1] or np.median(datacube[3, :]) < 0.01 * flux_level or np.median(host_spec) < 0:
-            self.host = np.zeros(len(wave))
             self.decomposed = False
-            self.host_result = np.array([])
-            self.host_result_type = np.array([])
-            self.host_result_name = np.array([])
             if self.verbose:
                 print('Got negative host galaxy / QSO flux over 10% of coverage, decomposition is not applied!')
         else:
-            self.decomposed = True
-            del self.wave, self.flux, self.err
             self.wave = datacube[0, :]
 
             rchi2_decomp = np.sum((datacube[1, :] - datacube[4, :] - datacube[3, :]) ** 2 / datacube[2, :] ** 2) / (

diff --git a/tests/test_dr7sample.py b/tests/test_dr7sample.py
@@ -309,10 +309,10 @@ def test_dr7(nqsofit=20):
                 q = QSOFit(lam, flux, err, z, ra=ra, dec=dec, plateid=plate, mjd=mjd, fiberid=fiber,  path=path_ex)
 
                 # Do the fitting
-                q.Fit(param_file_name='qsopar.fits', name=None, qso_type='global', host_type='BC03', save_fig=False, save_result=False)
+                q.Fit(param_file_name='qsopar.fits', name=None, qso_type='CZBIN1', host_type='BC03', save_fig=False, save_result=False)
 
                 # Test with host prior
-                q.Fit(param_file_name='qsopar.fits', name=None, host_prior=True, qso_type='global', host_type='PCA', save_fig=False, save_result=False)
+                q.Fit(param_file_name='qsopar.fits', name=None, host_prior=True, qso_type='CZBIN1', host_type='PCA', save_fig=False, save_result=False)
 
                 # Emission line loop
                 for j, line in enumerate(line_calc_names):