From e139526c5e4bc5d151c7b698ca1c27a31eb35f58 Mon Sep 17 00:00:00 2001 From: Rongpu Zhou Date: Fri, 6 Sep 2024 16:45:56 -0700 Subject: [PATCH] clean up and improve validredshifts.py --- py/desispec/validredshifts.py | 143 +++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 63 deletions(-) diff --git a/py/desispec/validredshifts.py b/py/desispec/validredshifts.py index fa7733ba7..57c228683 100644 --- a/py/desispec/validredshifts.py +++ b/py/desispec/validredshifts.py @@ -37,7 +37,7 @@ def get_good_fiberstatus(cat, isqso=False): return good_fiberstatus -def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, extra_columns=None, emline_path=None, ignore_emline=False): +def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, extra_columns=None): ''' Validate the redshift quality with tracer-dependent criteria for redrock+afterburner results. @@ -48,8 +48,6 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex fiberstatus_cut: bool (default True), if True, impose requirements on COADD_FIBERSTATUS and ZWARN return_target_columns: bool (default False), if True, include columns that indicate if the object belongs to each class of DESI targets extra_columns: list of str (default None), additional columns to include in the output - emline_path: str (default None), specify the location of the emline file; by default the emline file is in the same directory as the redrock file - ignore_emline: bool (default False), if True, ignore the emline file and do not validate the ELG redshift Returns: cat: astropy table with basic columns such as TARGETID and boolean columns (e.g., GOOD_BGS) @@ -58,14 +56,11 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex output_columns = ['GOOD_BGS', 'GOOD_LRG', 'GOOD_ELG', 'GOOD_QSO'] if return_target_columns: - output_columns += ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT'] - - if ignore_emline: - output_columns.remove('GOOD_ELG') + output_columns = ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT'] + output_columns if extra_columns is None: extra_columns = ['TARGETID', 'Z', 'ZWARN', 'COADD_FIBERSTATUS'] - output_columns = output_columns + list(np.array(extra_columns)[~np.in1d(extra_columns, output_columns)]) + output_columns = list(np.array(extra_columns)[~np.in1d(extra_columns, output_columns)]) + output_columns ############################ Load data ############################ @@ -78,10 +73,10 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex dir_path = os.path.dirname(redrock_path) qso_mgii_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'qso_mgii-')) qso_qn_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'qso_qn-')) - if emline_path is None: - emline_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'emline-')) + emline_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'emline-')) tmp_redshifts = Table(fitsio.read(redrock_path, ext='REDSHIFTS', columns=columns_redshifts)) + tid = tmp_redshifts['TARGETID'].copy() # read the full fibermap until we determine the targeting columns from desitarget.targets import main_cmx_or_sv @@ -93,32 +88,41 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex desi_target_col, bgs_target_col, _ = surv_target desi_mask, bgs_mask, _ = surv_mask tmp_fibermap = Table(tmp_fibermap[columns_fibermap + surv_target]) + assert np.all(tid==tmp_fibermap['TARGETID']) + tmp_fibermap.remove_column('TARGETID') - tmp_qso_mgii = Table(fitsio.read(qso_mgii_path, columns=(columns_qso_mgii))) - tmp_qso_qn = Table(fitsio.read(qso_qn_path, columns=(columns_qso_qn))) - if not ignore_emline: + ignore_emline = False + ignore_qso = False + + if os.path.isfile(emline_path): tmp_emline = Table(fitsio.read(emline_path, columns=(columns_emline))) + assert np.all(tid==tmp_emline['TARGETID']) + tmp_emline.remove_column('TARGETID') + else: + print('emline file not found:', emline_path) + ignore_emline = True - # Sanity check - tid = tmp_redshifts['TARGETID'].copy() - if not (np.all(tid==tmp_fibermap['TARGETID']) \ - # and np.all(tid==tmp_emline['TARGETID']) \ - and np.all(tid==tmp_qso_mgii['TARGETID']) \ - and np.all(tid==tmp_qso_qn['TARGETID'])): - raise ValueError - if (not ignore_emline) and (not np.all(tid==tmp_emline['TARGETID'])): - raise ValueError + if os.path.isfile(qso_mgii_path): + tmp_qso_mgii = Table(fitsio.read(qso_mgii_path, columns=(columns_qso_mgii))) + assert np.all(tid==tmp_qso_mgii['TARGETID']) + tmp_qso_mgii.remove_column('TARGETID') + else: + print('qso_mgii file not found:', qso_mgii_path) + ignore_qso = True - tmp_fibermap.remove_column('TARGETID') - tmp_qso_mgii.remove_column('TARGETID') - tmp_qso_qn.remove_column('TARGETID') - if not ignore_emline: - tmp_emline.remove_column('TARGETID') + if os.path.isfile(qso_qn_path): + tmp_qso_qn = Table(fitsio.read(qso_qn_path, columns=(columns_qso_qn))) + assert np.all(tid==tmp_qso_qn['TARGETID']) + tmp_qso_qn.remove_column('TARGETID') + else: + print('qso_qn file not found:', qso_qn_path) + ignore_qso = True + cat = hstack([tmp_redshifts, tmp_fibermap], join_type='exact') if not ignore_emline: - cat = hstack([tmp_redshifts, tmp_fibermap, tmp_qso_mgii, tmp_qso_qn, tmp_emline], join_type='inner') - else: - cat = hstack([tmp_redshifts, tmp_fibermap, tmp_qso_mgii, tmp_qso_qn], join_type='inner') + cat = hstack([cat, tmp_emline], join_type='exact') + if not ignore_qso: + cat = hstack([cat, tmp_qso_mgii, tmp_qso_qn], join_type='exact') if return_target_columns: for name in ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT']: @@ -140,13 +144,16 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex # cat['BGS_FAINT'] = cat['BGS_TARGET'] & 2**0 > 0 # cat['BGS_BRIGHT'] = cat['BGS_TARGET'] & 2**1 > 0 - cat = actually_validate(cat, fiberstatus_cut=fiberstatus_cut, ignore_emline=ignore_emline) + res = actually_validate(cat, fiberstatus_cut=fiberstatus_cut, ignore_emline=ignore_emline, ignore_qso=ignore_qso) + cat = hstack([cat, res]) + + output_columns = [col for col in output_columns if col in cat.colnames] cat = cat[output_columns] return cat -def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False): +def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False, ignore_qso=False): ''' Apply redshift quality criteria @@ -159,50 +166,60 @@ def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False): extra_columns: list of str (default None), additional columns to include in the output emline_path: str (default None), specify the location of the emline file; by default the emline file is in the same directory as the redrock file ignore_emline: bool (default False), if True, ignore the emline file and do not validate the ELG redshift + ignore_qso: bool (default False), if True, do not validate the QSO redshift Returns: - cat: astropy table with boolean columns (e.g., GOOD_BGS) added + res: astropy table with boolean columns (e.g., GOOD_BGS) ''' + res = Table() + # BGS - cat['GOOD_BGS'] = cat['ZWARN']==0 - cat['GOOD_BGS'] &= cat['DELTACHI2']>40 + res['GOOD_BGS'] = cat['ZWARN']==0 + res['GOOD_BGS'] &= cat['DELTACHI2']>40 if fiberstatus_cut: - cat['GOOD_BGS'] &= get_good_fiberstatus(cat) + res['GOOD_BGS'] &= get_good_fiberstatus(cat) # LRG - cat['GOOD_LRG'] = cat['ZWARN']==0 - cat['GOOD_LRG'] &= cat['Z']<1.5 - cat['GOOD_LRG'] &= cat['DELTACHI2']>15 + res['GOOD_LRG'] = cat['ZWARN']==0 + res['GOOD_LRG'] &= cat['Z']<1.5 + res['GOOD_LRG'] &= cat['DELTACHI2']>15 if fiberstatus_cut: - cat['GOOD_LRG'] &= get_good_fiberstatus(cat) + res['GOOD_LRG'] &= get_good_fiberstatus(cat) # ELG if not ignore_emline: with warnings.catch_warnings(): warnings.simplefilter('ignore') - cat['GOOD_ELG'] = (cat['OII_FLUX']>0) & (cat['OII_FLUX_IVAR']>0) - cat['GOOD_ELG'] &= np.log10(cat['OII_FLUX'] * np.sqrt(cat['OII_FLUX_IVAR'])) > 0.9 - 0.2 * np.log10(cat['DELTACHI2']) + res['GOOD_ELG'] = (cat['OII_FLUX']>0) & (cat['OII_FLUX_IVAR']>0) + res['GOOD_ELG'] &= np.log10(cat['OII_FLUX'] * np.sqrt(cat['OII_FLUX_IVAR'])) > 0.9 - 0.2 * np.log10(cat['DELTACHI2']) if fiberstatus_cut: - cat['GOOD_ELG'] &= get_good_fiberstatus(cat) - - # QSO - adopted from the code from Edmond - # https://github.com/echaussidon/LSS/blob/8ca53f4c38cfa29722ee6958687e188cc894ed2b/py/LSS/qso_cat_utils.py#L282 - cat['IS_QSO_QN'] = np.max(np.array([cat[name] for name in ['C_LYA', 'C_CIV', 'C_CIII', 'C_MgII', 'C_Hbeta', 'C_Halpha']]), axis=0) > 0.95 - cat['IS_QSO_QN_NEW_RR'] &= cat['IS_QSO_QN'] - cat['QSO_MASKBITS'] = np.zeros(len(cat), dtype=int) - cat['QSO_MASKBITS'][cat['SPECTYPE']=='QSO'] += 2**1 - cat['QSO_MASKBITS'][cat['IS_QSO_MGII']] += 2**2 - cat['QSO_MASKBITS'][cat['IS_QSO_QN']] += 2**3 - cat['QSO_MASKBITS'][cat['IS_QSO_QN_NEW_RR']] += 2**4 - cat['Z_RR'] = cat['Z'] - cat['Z'][cat['IS_QSO_QN_NEW_RR']] = cat['Z_NEW'][cat['IS_QSO_QN_NEW_RR']].copy() - cat['ZERR'][cat['IS_QSO_QN_NEW_RR']] = cat['ZERR_NEW'][cat['IS_QSO_QN_NEW_RR']].copy() - # Correct bump at z~3.7 - sel_pb_redshift = (((cat['Z'] > 3.65) & (cat['Z'] < 3.9)) | ((cat['Z'] > 5.15) & (cat['Z'] < 5.35))) & ((cat['C_LYA'] < 0.95) | (cat['C_CIV'] < 0.95)) - cat['QSO_MASKBITS'][sel_pb_redshift] = 0 - cat['GOOD_QSO'] = cat['QSO_MASKBITS']>0 - if fiberstatus_cut: - cat['GOOD_QSO'] &= get_good_fiberstatus(cat, isqso=True) + res['GOOD_ELG'] &= get_good_fiberstatus(cat) + + if not ignore_qso: + # QSO - adopted from the code from Edmond + # https://github.com/echaussidon/LSS/blob/8ca53f4c38cfa29722ee6958687e188cc894ed2b/py/LSS/qso_cat_utils.py#L282 + res['IS_QSO_QN'] = np.max(np.array([cat[name] for name in ['C_LYA', 'C_CIV', 'C_CIII', 'C_MgII', 'C_Hbeta', 'C_Halpha']]), axis=0) > 0.95 + res['IS_QSO_QN_NEW_RR'] = cat['IS_QSO_QN_NEW_RR'] & res['IS_QSO_QN'] + res['QSO_MASKBITS'] = np.zeros(len(cat), dtype=int) + res['QSO_MASKBITS'][cat['SPECTYPE']=='QSO'] += 2**1 + res['QSO_MASKBITS'][cat['IS_QSO_MGII']] += 2**2 + res['QSO_MASKBITS'][res['IS_QSO_QN']] += 2**3 + res['QSO_MASKBITS'][res['IS_QSO_QN_NEW_RR']] += 2**4 + res['Z'] = cat['Z'].copy() + res['Z'][res['IS_QSO_QN_NEW_RR']] = cat['Z_NEW'][res['IS_QSO_QN_NEW_RR']].copy() + res['ZERR'] = cat['ZERR'].copy() + res['ZERR'][res['IS_QSO_QN_NEW_RR']] = cat['ZERR_NEW'][res['IS_QSO_QN_NEW_RR']].copy() + # Correct bump at z~3.7 + sel_pb_redshift = (((res['Z'] > 3.65) & (res['Z'] < 3.9)) | ((res['Z'] > 5.15) & (res['Z'] < 5.35))) & ((cat['C_LYA'] < 0.95) | (cat['C_CIV'] < 0.95)) + res['QSO_MASKBITS'][sel_pb_redshift] = 0 + res['GOOD_QSO'] = res['QSO_MASKBITS']>0 + if fiberstatus_cut: + res['GOOD_QSO'] &= get_good_fiberstatus(cat, isqso=True) - return cat + # Remove unnecessary columns + columns_to_keep = ['GOOD_BGS', 'GOOD_LRG', 'GOOD_ELG', 'GOOD_QSO'] + columns_to_keep = [col for col in columns_to_keep if col in res.colnames] + res = res[columns_to_keep] + + return res