-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGenerate_random_datasets.py
executable file
·159 lines (135 loc) · 5.96 KB
/
Generate_random_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
############################################################################################################
# A script for randomly generating datasets using the S2COSMOS catalogue.
############################################################################################################
#import modules/packages
import os, sys
import general as gen
import plotstyle as ps
import numcounts as nc
import stats
import numpy as np
from multiprocessing import Pool, cpu_count
from astropy.table import Table
from astropy.io import fits
from scipy.interpolate import LinearNDInterpolator
#put main script into a function so that multiprocessing doesn't try rerunning the whole thing
def main():
##################
#### SETTINGS ####
##################
#toggle `switches' for additional functionality
rand_comp = True #calculate completeness values for every random flux density value
settings = [
rand_comp
]
#print the chosen settings to the Terminal
print(gen.colour_string('CHOSEN SETTINGS:', 'white'))
settings_print = [
'Calculate completeness values for every random flux density value: '
]
for i in range(len(settings_print)):
if settings[i]:
settings_print[i] += 'y'
else:
settings_print[i] += 'n'
nsim = gen.nsim #number of iterations to use if randomising the flux densities/completeness
settings_print.append(f'Number of datasets required: {nsim}')
print(gen.colour_string('\n'.join(settings_print), 'white'))
#######################################################
############### START OF SCRIPT #################
#######################################################
#relevant paths
PATH_CATS = gen.PATH_CATS
PATH_DATA = gen.PATH_DATA
PATH_SIMS = gen.PATH_SIMS
#S2COSMOS catalogue
S19_cat = gen.S19_cat
data_submm = Table.read(S19_cat, format='fits')
#retrieve the deboosted flux densities and uncertainties, and RMS noise
S850, eS850_lo, eS850_hi, RMS, *_ = gen.get_relevant_cols_S19(data_submm, main_only=gen.main_only)
#see if a file exists with the reconstructed S2COSMOS completeness grid (should have been created
#in previous step of pipeline)
compgrid_file = PATH_DATA + 'Completeness_at_S850_and_rms.fits'
if os.path.exists(compgrid_file):
print(gen.colour_string(f'Loading and interpolating completeness grid...', 'purple'))
CG_HDU = fits.open(compgrid_file)[0]
zgrid = CG_HDU.data.T
hdr = CG_HDU.header
#get the min, max and step along the S850 and RMS axes
xmin = hdr['CRVAL1']
xstep = hdr['CDELT1']
xmax = xmin + (hdr['NAXIS1'] - 1) * xstep
ymin = hdr['CRVAL2']
ystep = hdr['CDELT2']
ymax = ymin + (hdr['NAXIS2'] - 1) * ystep
#create a grid in flux density-RMS space
xgrid, ygrid = np.mgrid[xmin:xmax+xstep:xstep, ymin:ymax+ystep:ystep]
#interpolate the completeness values from the file w.r.t. S850 and RMS
points = np.array([xgrid.flatten(), ygrid.flatten()])
values = zgrid.flatten()
comp_interp = LinearNDInterpolator(points.T, values)
#otherwise, print error message
else:
gen.error_message(
sys.argv[0],
'''Completeness grid does not exist. Run step 1 of pipeline to create it.\n
Script will proceed but only generating flux density values.
''')
#set up empty lists for randomly generated flux densities and completeness values
S850_rand = np.zeros((nsim,len(S850)))
comp_rand = np.zeros((nsim,len(S850))) #only relevant if rand_comp = True
#number of datasets generated
N_done = 0
#create a dictionary containing the complete array of datasets
dict_rand = {'S850_rand':S850_rand}
#see if file exists containing randomised flux densities already
npz_filename = PATH_SIMS + 'S2COSMOS_randomised_S850.npz'
if os.path.exists(npz_filename):
rand_data = np.load(npz_filename)
if 'S850_rand' in rand_data.files:
S850_rand_old = rand_data['S850_rand']
N_done += len(S850_rand_old)
S850_rand[:N_done] = S850_rand_old
#update the dictionary
dict_rand['S850_rand'] = S850_rand
if 'comp_rand' in rand_data.files:
comp_rand_old = rand_data['comp_rand']
else:
comp_rand_old = []
N_comp_done = len(comp_rand_old)
#if the length of comp_rand is not the same as S850_rand, recalculate the completeness values
if N_comp_done != N_done:
if 'comp_interp' in locals():
print(gen.colour_string(f'Recalculating completeness for existing S850 values...', 'purple'))
#set up a multiprocessing Pool using all but one CPU
with Pool(cpu_count()-1) as pool:
#calculate the completeness for the randomly generated flux densities
comp_rand[:N_done] = np.array(pool.starmap(comp_interp, [[S850_rand_old[i], RMS] for i in range(N_done)]))
else:
comp_rand[:N_done] = comp_rand_old
dict_rand['comp_rand'] = comp_rand
#if the parent directory doesn't even exist, make it
elif not os.path.exists(PATH_SIMS):
os.system(f'mkdir -p {PATH_SIMS}')
#calculate how many more datasets need to be generated to reach the required number
N_todo = nsim - N_done
#if required, generate more datasets
if N_todo > 0:
print(gen.colour_string(f'Generating new random S850 values...', 'purple'))
S850_rand[N_done:] = np.array([stats.random_asymmetric_gaussian(S850[i], eS850_lo[i], eS850_hi[i], N_todo) for i in range(len(S850))]).T
#create a dictionary containing the complete array of datasets
dict_rand['S850_rand'] = S850_rand
if 'comp_interp' in locals():
if rand_comp:
#calculate completeness for the new flux density datasets
print(gen.colour_string(f'Calculating completeness for new S850_values...', 'purple'))
#set up a multiprocessing Pool using all but one CPU
with Pool(cpu_count()-1) as pool:
#calculate the completeness for the randomly generated flux densities
comp_rand[N_done:] = np.array(pool.starmap(comp_interp, [[S850_rand[N_done+i], RMS] for i in range(N_todo)]))
#add the result to the dictionary
dict_rand['comp_rand'] = comp_rand
#save the randomised flux densities (and completenesses if generated) to a compressed numpy archive
np.savez_compressed(npz_filename, **dict_rand)
if __name__ == '__main__':
main()