Skip to content

Commit

Permalink
clean up and improve validredshifts.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rongpu committed Sep 6, 2024
1 parent e8256df commit e139526
Showing 1 changed file with 80 additions and 63 deletions.
143 changes: 80 additions & 63 deletions py/desispec/validredshifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_good_fiberstatus(cat, isqso=False):
return good_fiberstatus


def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, extra_columns=None, emline_path=None, ignore_emline=False):
def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, extra_columns=None):
'''
Validate the redshift quality with tracer-dependent criteria for redrock+afterburner results.
Expand All @@ -48,8 +48,6 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex
fiberstatus_cut: bool (default True), if True, impose requirements on COADD_FIBERSTATUS and ZWARN
return_target_columns: bool (default False), if True, include columns that indicate if the object belongs to each class of DESI targets
extra_columns: list of str (default None), additional columns to include in the output
emline_path: str (default None), specify the location of the emline file; by default the emline file is in the same directory as the redrock file
ignore_emline: bool (default False), if True, ignore the emline file and do not validate the ELG redshift
Returns:
cat: astropy table with basic columns such as TARGETID and boolean columns (e.g., GOOD_BGS)
Expand All @@ -58,14 +56,11 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex

output_columns = ['GOOD_BGS', 'GOOD_LRG', 'GOOD_ELG', 'GOOD_QSO']
if return_target_columns:
output_columns += ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT']

if ignore_emline:
output_columns.remove('GOOD_ELG')
output_columns = ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT'] + output_columns

if extra_columns is None:
extra_columns = ['TARGETID', 'Z', 'ZWARN', 'COADD_FIBERSTATUS']
output_columns = output_columns + list(np.array(extra_columns)[~np.in1d(extra_columns, output_columns)])
output_columns = list(np.array(extra_columns)[~np.in1d(extra_columns, output_columns)]) + output_columns

############################ Load data ############################

Expand All @@ -78,10 +73,10 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex
dir_path = os.path.dirname(redrock_path)
qso_mgii_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'qso_mgii-'))
qso_qn_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'qso_qn-'))
if emline_path is None:
emline_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'emline-'))
emline_path = os.path.join(dir_path, os.path.basename(redrock_path).replace('redrock-', 'emline-'))

tmp_redshifts = Table(fitsio.read(redrock_path, ext='REDSHIFTS', columns=columns_redshifts))
tid = tmp_redshifts['TARGETID'].copy()

# read the full fibermap until we determine the targeting columns
from desitarget.targets import main_cmx_or_sv
Expand All @@ -93,32 +88,41 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex
desi_target_col, bgs_target_col, _ = surv_target
desi_mask, bgs_mask, _ = surv_mask
tmp_fibermap = Table(tmp_fibermap[columns_fibermap + surv_target])
assert np.all(tid==tmp_fibermap['TARGETID'])
tmp_fibermap.remove_column('TARGETID')

tmp_qso_mgii = Table(fitsio.read(qso_mgii_path, columns=(columns_qso_mgii)))
tmp_qso_qn = Table(fitsio.read(qso_qn_path, columns=(columns_qso_qn)))
if not ignore_emline:
ignore_emline = False
ignore_qso = False

if os.path.isfile(emline_path):
tmp_emline = Table(fitsio.read(emline_path, columns=(columns_emline)))
assert np.all(tid==tmp_emline['TARGETID'])
tmp_emline.remove_column('TARGETID')
else:
print('emline file not found:', emline_path)
ignore_emline = True

# Sanity check
tid = tmp_redshifts['TARGETID'].copy()
if not (np.all(tid==tmp_fibermap['TARGETID']) \
# and np.all(tid==tmp_emline['TARGETID']) \
and np.all(tid==tmp_qso_mgii['TARGETID']) \
and np.all(tid==tmp_qso_qn['TARGETID'])):
raise ValueError
if (not ignore_emline) and (not np.all(tid==tmp_emline['TARGETID'])):
raise ValueError
if os.path.isfile(qso_mgii_path):
tmp_qso_mgii = Table(fitsio.read(qso_mgii_path, columns=(columns_qso_mgii)))
assert np.all(tid==tmp_qso_mgii['TARGETID'])
tmp_qso_mgii.remove_column('TARGETID')
else:
print('qso_mgii file not found:', qso_mgii_path)
ignore_qso = True

tmp_fibermap.remove_column('TARGETID')
tmp_qso_mgii.remove_column('TARGETID')
tmp_qso_qn.remove_column('TARGETID')
if not ignore_emline:
tmp_emline.remove_column('TARGETID')
if os.path.isfile(qso_qn_path):
tmp_qso_qn = Table(fitsio.read(qso_qn_path, columns=(columns_qso_qn)))
assert np.all(tid==tmp_qso_qn['TARGETID'])
tmp_qso_qn.remove_column('TARGETID')
else:
print('qso_qn file not found:', qso_qn_path)
ignore_qso = True

cat = hstack([tmp_redshifts, tmp_fibermap], join_type='exact')
if not ignore_emline:
cat = hstack([tmp_redshifts, tmp_fibermap, tmp_qso_mgii, tmp_qso_qn, tmp_emline], join_type='inner')
else:
cat = hstack([tmp_redshifts, tmp_fibermap, tmp_qso_mgii, tmp_qso_qn], join_type='inner')
cat = hstack([cat, tmp_emline], join_type='exact')
if not ignore_qso:
cat = hstack([cat, tmp_qso_mgii, tmp_qso_qn], join_type='exact')

if return_target_columns:
for name in ['LRG', 'ELG', 'QSO', 'ELG_LOP', 'ELG_HIP', 'ELG_VLO', 'BGS_ANY', 'BGS_FAINT', 'BGS_BRIGHT']:
Expand All @@ -140,13 +144,16 @@ def validate(redrock_path, fiberstatus_cut=True, return_target_columns=False, ex
# cat['BGS_FAINT'] = cat['BGS_TARGET'] & 2**0 > 0
# cat['BGS_BRIGHT'] = cat['BGS_TARGET'] & 2**1 > 0

cat = actually_validate(cat, fiberstatus_cut=fiberstatus_cut, ignore_emline=ignore_emline)
res = actually_validate(cat, fiberstatus_cut=fiberstatus_cut, ignore_emline=ignore_emline, ignore_qso=ignore_qso)
cat = hstack([cat, res])

output_columns = [col for col in output_columns if col in cat.colnames]
cat = cat[output_columns]

return cat


def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False):
def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False, ignore_qso=False):
'''
Apply redshift quality criteria
Expand All @@ -159,50 +166,60 @@ def actually_validate(cat, fiberstatus_cut=True, ignore_emline=False):
extra_columns: list of str (default None), additional columns to include in the output
emline_path: str (default None), specify the location of the emline file; by default the emline file is in the same directory as the redrock file
ignore_emline: bool (default False), if True, ignore the emline file and do not validate the ELG redshift
ignore_qso: bool (default False), if True, do not validate the QSO redshift
Returns:
cat: astropy table with boolean columns (e.g., GOOD_BGS) added
res: astropy table with boolean columns (e.g., GOOD_BGS)
'''

res = Table()

# BGS
cat['GOOD_BGS'] = cat['ZWARN']==0
cat['GOOD_BGS'] &= cat['DELTACHI2']>40
res['GOOD_BGS'] = cat['ZWARN']==0
res['GOOD_BGS'] &= cat['DELTACHI2']>40
if fiberstatus_cut:
cat['GOOD_BGS'] &= get_good_fiberstatus(cat)
res['GOOD_BGS'] &= get_good_fiberstatus(cat)

# LRG
cat['GOOD_LRG'] = cat['ZWARN']==0
cat['GOOD_LRG'] &= cat['Z']<1.5
cat['GOOD_LRG'] &= cat['DELTACHI2']>15
res['GOOD_LRG'] = cat['ZWARN']==0
res['GOOD_LRG'] &= cat['Z']<1.5
res['GOOD_LRG'] &= cat['DELTACHI2']>15
if fiberstatus_cut:
cat['GOOD_LRG'] &= get_good_fiberstatus(cat)
res['GOOD_LRG'] &= get_good_fiberstatus(cat)

# ELG
if not ignore_emline:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
cat['GOOD_ELG'] = (cat['OII_FLUX']>0) & (cat['OII_FLUX_IVAR']>0)
cat['GOOD_ELG'] &= np.log10(cat['OII_FLUX'] * np.sqrt(cat['OII_FLUX_IVAR'])) > 0.9 - 0.2 * np.log10(cat['DELTACHI2'])
res['GOOD_ELG'] = (cat['OII_FLUX']>0) & (cat['OII_FLUX_IVAR']>0)
res['GOOD_ELG'] &= np.log10(cat['OII_FLUX'] * np.sqrt(cat['OII_FLUX_IVAR'])) > 0.9 - 0.2 * np.log10(cat['DELTACHI2'])
if fiberstatus_cut:
cat['GOOD_ELG'] &= get_good_fiberstatus(cat)

# QSO - adopted from the code from Edmond
# https://github.com/echaussidon/LSS/blob/8ca53f4c38cfa29722ee6958687e188cc894ed2b/py/LSS/qso_cat_utils.py#L282
cat['IS_QSO_QN'] = np.max(np.array([cat[name] for name in ['C_LYA', 'C_CIV', 'C_CIII', 'C_MgII', 'C_Hbeta', 'C_Halpha']]), axis=0) > 0.95
cat['IS_QSO_QN_NEW_RR'] &= cat['IS_QSO_QN']
cat['QSO_MASKBITS'] = np.zeros(len(cat), dtype=int)
cat['QSO_MASKBITS'][cat['SPECTYPE']=='QSO'] += 2**1
cat['QSO_MASKBITS'][cat['IS_QSO_MGII']] += 2**2
cat['QSO_MASKBITS'][cat['IS_QSO_QN']] += 2**3
cat['QSO_MASKBITS'][cat['IS_QSO_QN_NEW_RR']] += 2**4
cat['Z_RR'] = cat['Z']
cat['Z'][cat['IS_QSO_QN_NEW_RR']] = cat['Z_NEW'][cat['IS_QSO_QN_NEW_RR']].copy()
cat['ZERR'][cat['IS_QSO_QN_NEW_RR']] = cat['ZERR_NEW'][cat['IS_QSO_QN_NEW_RR']].copy()
# Correct bump at z~3.7
sel_pb_redshift = (((cat['Z'] > 3.65) & (cat['Z'] < 3.9)) | ((cat['Z'] > 5.15) & (cat['Z'] < 5.35))) & ((cat['C_LYA'] < 0.95) | (cat['C_CIV'] < 0.95))
cat['QSO_MASKBITS'][sel_pb_redshift] = 0
cat['GOOD_QSO'] = cat['QSO_MASKBITS']>0
if fiberstatus_cut:
cat['GOOD_QSO'] &= get_good_fiberstatus(cat, isqso=True)
res['GOOD_ELG'] &= get_good_fiberstatus(cat)

if not ignore_qso:
# QSO - adopted from the code from Edmond
# https://github.com/echaussidon/LSS/blob/8ca53f4c38cfa29722ee6958687e188cc894ed2b/py/LSS/qso_cat_utils.py#L282
res['IS_QSO_QN'] = np.max(np.array([cat[name] for name in ['C_LYA', 'C_CIV', 'C_CIII', 'C_MgII', 'C_Hbeta', 'C_Halpha']]), axis=0) > 0.95
res['IS_QSO_QN_NEW_RR'] = cat['IS_QSO_QN_NEW_RR'] & res['IS_QSO_QN']
res['QSO_MASKBITS'] = np.zeros(len(cat), dtype=int)
res['QSO_MASKBITS'][cat['SPECTYPE']=='QSO'] += 2**1
res['QSO_MASKBITS'][cat['IS_QSO_MGII']] += 2**2
res['QSO_MASKBITS'][res['IS_QSO_QN']] += 2**3
res['QSO_MASKBITS'][res['IS_QSO_QN_NEW_RR']] += 2**4
res['Z'] = cat['Z'].copy()
res['Z'][res['IS_QSO_QN_NEW_RR']] = cat['Z_NEW'][res['IS_QSO_QN_NEW_RR']].copy()
res['ZERR'] = cat['ZERR'].copy()
res['ZERR'][res['IS_QSO_QN_NEW_RR']] = cat['ZERR_NEW'][res['IS_QSO_QN_NEW_RR']].copy()
# Correct bump at z~3.7
sel_pb_redshift = (((res['Z'] > 3.65) & (res['Z'] < 3.9)) | ((res['Z'] > 5.15) & (res['Z'] < 5.35))) & ((cat['C_LYA'] < 0.95) | (cat['C_CIV'] < 0.95))
res['QSO_MASKBITS'][sel_pb_redshift] = 0
res['GOOD_QSO'] = res['QSO_MASKBITS']>0
if fiberstatus_cut:
res['GOOD_QSO'] &= get_good_fiberstatus(cat, isqso=True)

return cat
# Remove unnecessary columns
columns_to_keep = ['GOOD_BGS', 'GOOD_LRG', 'GOOD_ELG', 'GOOD_QSO']
columns_to_keep = [col for col in columns_to_keep if col in res.colnames]
res = res[columns_to_keep]

return res

0 comments on commit e139526

Please sign in to comment.