-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatch_halos.py
270 lines (219 loc) · 9.35 KB
/
match_halos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
import os, gc, glob
from pathlib import Path
import numpy as np
import argparse
import asdf
from numba import njit
from abacusnbody.data.compaso_halo_catalog import CompaSOHaloCatalog
from abacusnbody.data.bitpacked import unpack_pids
DEFAULTS = {}
DEFAULTS['sim_name'] = "AbacusSummit_base_c000_ph006"
DEFAULTS['sim_dir'] = "/global/cfs/cdirs/desi/cosmosim/Abacus/"
DEFAULTS['save_dir'] = "/global/cfs/cdirs/desi/users/boryanah/halo_IC/"
DEFAULTS['subsample_AB'] = "A"
DEFAULTS['redshift'] = 0.8
"""
Description:
This code generates proto halo positions for each halo in a an AbacusSummit box at a given redshift. By default, it uses only the
particle A subsample (3%), but to use particle B (7%), one needs to fetch that from tape (and slightly modify the code below).
Usage:
- Example with base-resolution simulation
`python match_halos.py --sim_dir /global/cfs/cdirs/desi/cosmosim/Abacus/ --sim_name AbacusSummit_small_c000_ph006 --redshift 0.8`
- Example with small-resolution simulation
`python match_halos.py --sim_dir /global/cfs/cdirs/desi/cosmosim/Abacus/small/ --sim_name AbacusSummit_small_c000_ph3000 --redshift 0.5`
"""
@njit
def dist(pos1, pos2, L=None):
'''
Calculate L2 norm distance between a set of points
and either a reference point or another set of points.
Optionally includes periodicity.
Parameters
----------
pos1: ndarray of shape (N,m)
A set of points
pos2: ndarray of shape (N,m) or (m,) or (1,m)
A single point or set of points
L: float, optional
The box size. Will do a periodic wrap if given.
Returns
-------
dist: ndarray of shape (N,)
The distances between pos1 and pos2
'''
# read dimension of data
N, nd = pos1.shape
# allow pos2 to be a single point
pos2 = np.atleast_2d(pos2)
assert pos2.shape[-1] == nd
broadcast = len(pos2) == 1
dist = np.empty(N, dtype=pos1.dtype)
# loop over all objects
i2 = 0
for i in range(N):
delta = 0.
for j in range(nd):
dx = pos1[i][j] - pos2[i2][j]
if L is not None:
if dx >= L/2:
dx -= L
elif dx < -L/2:
dx += L
delta += dx*dx
dist[i] = np.sqrt(delta)
if not broadcast:
i2 += 1
return dist
@njit
def calc_avg_pos(pos, Lbox, dtype=np.float32):
"""
Compute average position of a halo and account for
periodic wrapping of the box.
"""
# read off parameter
N = pos.shape[0]
D = pos.shape[1]
half_box = dtype(Lbox/2.)
Lbox = dtype(Lbox)
# initialize avg position array
avg_pos = np.zeros(D, dtype=dtype)
# loop over each dimension
for j in range(D):
# loop over each particle
for i in range(N):
# compute difference vector in this dimension
dx = pos[i, j] - pos[0, j]
# wrap around if on opposite side of the box
if dx >= half_box:
avg_pos[j] += (pos[i, j] - Lbox)
elif dx < -half_box:
avg_pos[j] += (pos[i, j] + Lbox)
else:
avg_pos[j] += pos[i, j]
# take the average over all particles in halo
avg_pos[j] /= N
# wrap around since pos go from (-L/2, L/2)
if avg_pos[j] < -half_box:
avg_pos[j] += Lbox
elif avg_pos[j] >= half_box:
avg_pos[j] -= Lbox
return avg_pos
@njit
def calc_halos_avg_pos(part_pos, npstart, npout, Lbox, dtype=np.float32):
"""
Compute average positions of all halos and account for
periodic wrapping of the box.
"""
# initialize array with averaged halo positions
halo_avg_pos = np.zeros((len(npstart), 3), dtype=dtype)
# loop over all halos
for i in range(len(npstart)):
# skip if no particle subsamples
if npout[i] == 0: continue
# compute the average position for this halo
nst = npstart[i]
nout = npout[i]
halo_avg_pos[i] = calc_avg_pos(part_pos[nst:nst+nout], Lbox, dtype=dtype)
return halo_avg_pos
def compress_asdf(asdf_fn, table, header):
"""
Given the file name of the asdf file, the table and the header, compress the table info and save as `asdf_fn'
"""
# cram into a dictionary
data_dict = {}
for field in table.keys():
data_dict[field] = table[field]
# create data tree structure
data_tree = {
"data": data_dict,
"header": header,
}
# set compression options here
compression_kwargs=dict(typesize="auto", shuffle="shuffle", compression_block_size=12*1024**2, blosc_block_size=3*1024**2, nthreads=4)
with asdf.AsdfFile(data_tree) as af, open(asdf_fn, 'wb') as fp: # where data_tree is the ASDF dict tree structure
af.write_to(fp, all_array_compression='blsc', compression_kwargs=compression_kwargs)
def main(sim_name, redshift, subsample_AB, sim_dir, save_dir, want_sanity=False):
# path to halos of interest
halo_path = Path(sim_dir) / sim_name / "halos" / f"z{redshift:.3f}" / "halo_info"
print(str(halo_path))
save_dir = Path(save_dir)
# save_dir
save_path = save_dir / sim_name / f"z{redshift:.3f}"
os.makedirs(save_path, exist_ok=True)
# fields that need to be loaded
fields = ["npstartA", "npoutA"]
if want_sanity:
fields.append("x_L2com")
# if subsample B is needed, simulation data needs to be copied from tape
assert "B" not in subsample_AB, "Not yet implemented"
# find all halo info files
halo_info_fns = sorted(halo_path.glob("halo_info_*.asdf"))
# loop over each superslab
for i in range(len(halo_info_fns)):
print("file", i, len(halo_info_fns))
print("loading", str(halo_info_fns[i]))
# define name of final file
save_fn = save_path / f"halo_lagr_pos_{subsample_AB}_{i:03d}.asdf"
# particle info that needs to be loaded
subsamples = {}
subsamples['A'] = True if "A" in subsample_AB else False
subsamples['B'] = True if "B" in subsample_AB else False
subsamples['rv'] = True
subsamples['pid'] = True
# load halo catalog
cat = CompaSOHaloCatalog(halo_info_fns[i], subsamples=subsamples, fields=fields)
print("loaded catalog")
# load header information
Lbox = cat.header['BoxSizeHMpc']
ppd = cat.header['ppd']
Mpart = cat.header['ParticleMassHMsun']
# load particle information
npstart = np.asarray(cat.halos['npstartA'])
npout = np.asarray(cat.halos['npoutA'])
if want_sanity:
x_L2com = np.asarray(cat.halos['x_L2com'])
pos = np.asarray(cat.subsamples['pos'])
pid = np.asarray(cat.subsamples['pid'])
lagr_pos = unpack_pids(pid, box=Lbox, ppd=ppd, pid=False, lagr_pos=True, tagged=False, density=False, lagr_idx=False)['lagr_pos']
# compute distance between current and IC particle position
if want_sanity:
part_dist = dist(pos, lagr_pos, L=Lbox)
print("part IC dist stats min, max, mean, median", np.min(part_dist), np.max(part_dist), np.mean(part_dist), np.median(part_dist))
print("number of halos", len(npstart))
# compute average positions
halo_avg_lagr_pos = calc_halos_avg_pos(lagr_pos, npstart, npout, Lbox)
print("calculated avg lagr pos")
if want_sanity:
# compute distance between current and IC halo position
halo_avg_pos = calc_halos_avg_pos(pos, npstart, npout, Lbox)
halo_dist = dist(halo_avg_pos, halo_avg_lagr_pos, L=Lbox)
print("halo IC dist stats min, max, mean, median", np.min(halo_dist[halo_dist > 0.]), np.max(halo_dist), np.mean(halo_dist[halo_dist > 0.]), np.median(halo_dist[halo_dist > 0.]))
# halo distance
halo_dist = dist(halo_avg_pos, x_L2com, L=Lbox)
mask = ~np.isclose(np.product(halo_avg_pos, axis=1), 0.)
assert len(halo_dist) == len(mask)
print("halo dist stats min, max, mean, median", np.min(halo_dist[mask]), np.max(halo_dist[mask]), np.mean(halo_dist[mask]), np.median(halo_dist[mask]))
# record header
header = {}
header['sim_name'] = sim_name
header['subsample_AB'] = subsample_AB
header['redshift'] = redshift
# record table
data = {}
data['lagr_pos_avg'] = halo_avg_lagr_pos
# save as asdf file
print("compressing")
compress_asdf(save_fn, data, header)
class ArgParseFormatter(argparse.RawDescriptionHelpFormatter, argparse.ArgumentDefaultsHelpFormatter):
pass
if __name__ == "__main__":
# parsing arguments
parser = argparse.ArgumentParser(description=__doc__, formatter_class=ArgParseFormatter)
parser.add_argument('--sim_dir', help='Path to simulations', default=DEFAULTS['sim_dir'])
parser.add_argument('--save_dir', help='Path to save output', default=DEFAULTS['save_dir'])
parser.add_argument('--sim_name', help='Simulation name', default=DEFAULTS['sim_name'])
parser.add_argument('--subsample_AB', help='Subsample type', default=DEFAULTS['subsample_AB'])
parser.add_argument('--redshift', help='Redshift', default=DEFAULTS['redshift'], type=float)
parser.add_argument('--want_sanity', help='Want a sanity check?', action='store_true')
args = vars(parser.parse_args())
main(**args)