-
Notifications
You must be signed in to change notification settings - Fork 8
/
likelihood_ratio.py
219 lines (160 loc) · 8.84 KB
/
likelihood_ratio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
'''
Adopting the liklihood ratio code from https://github.com/nudomarinero/mltier1 to use for crossmatching radio catalogues to the HELP masterlist
The original code was used in Williams et al 2018 using the method described by Fleuren et al 2012
'''
import numpy as np
import astropy
from astropy.table import Table, hstack
from astropy import units as u
from astropy.coordinates import SkyCoord, search_around_sky
from IPython.display import clear_output
from scipy.optimize import curve_fit
import pickle
import os
from pymoc import MOC
from matplotlib import pyplot as plt
#import mltier which is a non standard HELP module
from mltier import (get_center, get_n_m, estimate_q_m, Field, SingleMLEstimator, MultiMLEstimator,
parallel_process, get_sigma, get_q_m, get_threshold, q0_min_level, q0_min_numbers)
'''
Runs an adapted version of the liklihood ratio code from https://github.com/nudomarinero/mltier1 to crossmatch radio catalogues to the HELP masterlist
The original code was used in Williams et al 2018 using the method described by Fleuren et al 2012
INPUTS
------------
cat1 - the radio catalogue
cat2 - the masterlist
cat1ra - column name of the ra column for cat1
cat1dec - column name of the dec column for cat1
cat2ra - column name of the ra column for cat2
cat2dec - column name of the dec column for cat2
cat2mag - column name of the magnitude of the sources in cat2 in the band that is being used for the crossmatching (must be a magnitude or your crossmatch will be incorrect)
Q0_i - the value of Q0 used for the crossmatching
radius - the radius used for the crossmatching in arcseconds
ra_down - the ra that defines the left side of a rectangle encompasing the area the crossmatching will be done in (this parameter is redundant if a moc is provided)
ra_up - the ra that defines the right side of a rectangle encompasing the area the crossmatching will be done in (this parameter is redundant if a moc is provided)
dec_down - the dec that defines the bottom side of a rectangle encompasing the area the crossmatching will be done in (this parameter is redundant if a moc is provided)
dec_up - the dec that defines the upper side of a rectangle encompassing the area the crossmatching will be done in (this parameter is redundant if a moc is provided)
moc - a moc defining the area the crossmatching will be done in
OUTPUTS
------------
new_table - A masterlist table of the crossmatched sources containing all the columns from cat1 and cat2
'''
def likelihood_ratio(cat1,cat2,cat1ra,cat1dec,cat2ra,cat2dec,cat2mag,Q0_i,radius,
ra_down,ra_up,dec_down,dec_up,moc=None):
field = Field(ra_down, ra_up, dec_down, dec_up, moc)
radio_150mhz = cat1
masterlist = cat2
# Filter radio catalogue with the field
radio = field.filter_catalogue(radio_150mhz, colnames=(cat1ra,cat1dec))
print('number of radio sources being crossmatched is {}'.format(len(radio)))
# Filter the masterlist with the field
masterlist = field.filter_catalogue(masterlist, colnames=(cat2ra,cat2dec))
print('number of masterlist sources being crossmatched too is {}'.format(len(masterlist)))
coords_masterlist = SkyCoord(masterlist[cat2ra],
masterlist[cat2dec],
unit=(u.deg, u.deg),
frame='icrs')
coords_radio = SkyCoord(radio[cat1ra],
radio[cat1dec],
unit=(u.deg, u.deg),
frame='icrs')
masterlist[cat2mag][np.isnan(masterlist[cat2mag])] = 0
catalogue_i = masterlist
mask = catalogue_i[cat2mag] != 0
bin_list_i = np.arange(np.min(catalogue_i[mask][cat2mag]), np.max(catalogue_i[mask][cat2mag]), 0.5) # Bins of 0.5
center_i = get_center(bin_list_i)
# get the mid point of each bin i.e if there is a bin from 1 to 4 then the centre will be at 2.5
n_m_i = get_n_m(catalogue_i[cat2mag], bin_list_i, field.area)
# get a cumulative distribution of the magnitudes divided by the field area
q_m_i = estimate_q_m(catalogue_i[cat2mag], bin_list_i, n_m_i, coords_radio, coords_masterlist, radius=radius)
# ### $Q_0$ and likelihood estimators
q0 = Q0_i
likelihood_ratio_i = SingleMLEstimator(Q0_i, n_m_i, q_m_i, center_i)
idx_radio, idx_i, d2d, d3d = search_around_sky(
coords_radio, coords_masterlist, radius*u.arcsec)
idx_radio_unique = np.unique(idx_radio)
radio["lr_i"] = np.nan # Likelihood ratio
radio["lr_dist_i"] = np.nan # Distance to the selected source
radio["lr_index_i"] = np.nan # Index of the source in masterlist
total_sources = len(idx_radio_unique)
masterlist_aux_index = np.arange(len(masterlist))
def ml(i):
idx_0 = idx_i[idx_radio == i]
d2d_0 = d2d[idx_radio == i]
i_mag = catalogue_i[cat2mag][idx_0]
radio_ra = radio[i]["RA"]
radio_dec = radio[i]["DEC"]
radio_pa = radio[i]["PA"]
radio_maj_err = radio[i]["E_Maj"]
radio_min_err = radio[i]["E_Min"]
c_ra = catalogue_i["ra"][idx_0]
c_dec = catalogue_i["dec"][idx_0]
# Error for masterlist given as the true error is difficult to compute due to the composite nature of the masterlist
c_ra_err = 1/3600
c_dec_err = 1/3600
sigma = get_sigma(radio_maj_err, radio_min_err, radio_pa,
radio_ra, radio_dec,
c_ra, c_dec, c_ra_err, c_dec_err)
lr_0 = likelihood_ratio_i(i_mag, d2d_0.arcsec, sigma)
if len(lr_0) == 0:
result = [np.nan,np.nan,np.nan]
return result,np.nan
chosen_index = np.argmax(lr_0)
lr_sum = np.sum(lr_0)
rel = lr_0/(lr_sum + (1-q0))
result = [masterlist_aux_index[idx_0[chosen_index]], # Index
(d2d_0.arcsec)[chosen_index], # distance
lr_0[chosen_index]] # LR
return (result,rel[chosen_index])
print('starting the crossmatch')
res = []
rel = []
nomatches = 0
for i in range(len(radio)):
result,reliab = ml(i)
if result == [np.nan,np.nan,np.nan]:
nomatches = nomatches + 1
continue
res.append(result)
rel.append(reliab)
print('number of sources with no possible match is {}'.format(nomatches))
(radio["lr_index_i"][idx_radio_unique],
radio["lr_dist_i"][idx_radio_unique],
radio["lr_i"][idx_radio_unique]) = list(map(list, zip(*res)))
# #### Threshold and selection for i-band
radio["lr_i"][np.isnan(radio["lr_i"])] = 0
threshold_i = np.percentile(radio[radio['lr_i'] !=0]["lr_i"], 100*(1 - Q0_i))
print('liklihood ratio threshold using Q0 only is {}'.format(threshold_i))
radio["lrt"] = radio["lr_i"]
radio["lrt"][np.isnan(radio["lr_i"])] = 0
print('updating threshold of liklihood ratio that can be trusted')
def completeness(lr, threshold, q0):
n = len(lr)
lrt = lr[lr < threshold]
return 1. - np.sum((q0 * lrt)/(q0 * lrt + (1 - q0)))/float(n)
def reliability(lr, threshold, q0):
n = len(lr)
lrt = lr[lr > threshold]
return 1. - np.sum((1. - q0)/(q0 * lrt + (1 - q0)))/float(n)
completeness_v = np.vectorize(completeness, excluded=[0])
reliability_v = np.vectorize(reliability, excluded=[0])
n_test = 100
threshold_mean = np.percentile(radio["lrt"], 100*(1 - q0))
thresholds = np.arange(0., 10., 0.01)
thresholds_fine = np.arange(0.1, 1., 0.001)
completeness_t = completeness_v(radio["lrt"], thresholds, q0)
reliability_t = reliability_v(radio["lrt"], thresholds, q0)
average_t = (completeness_t + reliability_t)/2
completeness_t_fine = completeness_v(radio["lrt"], thresholds_fine, q0)
reliability_t_fine = reliability_v(radio["lrt"], thresholds_fine, q0)
average_t_fine = (completeness_t_fine + reliability_t_fine)/2
threshold_sel = thresholds_fine[np.argmax(average_t_fine)]
print('liklihood ratio updated threshold considering completeness and reliability is {}'.format(threshold_sel))
# ##### create a table containing the radio sources whose likelihood matches were above the new threshold and create a table of the data for the counterparts and then merge then using hstack
print('creating masterlist table of cross matched sources')
radio_match = radio[~np.isnan(radio['lr_dist_i'])]
radio_match = radio_match[radio_match['lr_i']>threshold_sel]
lockman_match = masterlist[radio_match['lr_index_i'].quantity.value.astype(int)]
new_table = astropy.table.hstack([radio_match,lockman_match],join_type='outer')
print('number of crossmatched sources is {}'.format(len(new_table)))
return(new_table)