Skip to content

Commit

Permalink
improve memory usage by not storing various matrices
Browse files Browse the repository at this point in the history
  • Loading branch information
jsdillon committed Oct 24, 2024
1 parent 24929d7 commit 8827a62
Showing 1 changed file with 78 additions and 87 deletions.
165 changes: 78 additions & 87 deletions notebooks/single_baseline_postprocessing_and_pspec.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"source": [
"# Single Baseline Filtering and Power Spectrum Estimation\n",
"\n",
"**by Josh Dillon, Bobby Pascua, and Mike Wilensky**, last updated October 23, 2024\n",
"**by Josh Dillon, Bobby Pascua, and Mike Wilensky**, last updated October 24, 2024\n",
"\n",
"This notebook is designed to take a single redundantly-averaged unique baseline (typically after LST-binning) and push it through all the way to the power spectrum. It operates on single files that contain a single baseline for all LSTs and both `'ee'` and `'nn'` polarizations. It then can:\n",
"* Throw out highly flagged times and/or channels\n",
Expand Down Expand Up @@ -372,7 +372,6 @@
"\n",
" plt.ylim([-1, np.max(to_plot) * 1.1]) \n",
"\n",
"\n",
" for freq in [117.19, 133.11, 152.25, 167.97]:\n",
" plt.axvline(freq, ls='--', color='k')\n",
"\n",
Expand Down Expand Up @@ -1316,6 +1315,14 @@
"target_lsts = [np.mean([np.unwrap(d.lsts)[i * Navg:(i+1) * Navg] for d in deint_filt_data]) for i in range(n_avg_int)]"
]
},
{
"cell_type": "markdown",
"id": "2fe88381-3623-4af1-9c4a-a95d720dd028",
"metadata": {},
"source": [
"## Run time filtering, time averaging, and form pseudo-Stokes"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1363,7 +1370,43 @@
"id": "a560e664",
"metadata": {},
"source": [
"## Calculate FRF noise covariances (set USE_CORR_MATRIX=True in globals cell to run)"
"## Compute correction factor for coherent averaging, either approximately with mode counting or more exactly with the FRF noise covariances (set USE_CORR_MATRIX=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72ddbb6c-2c9f-48db-9176-4426b190cbcc",
"metadata": {},
"outputs": [],
"source": [
"# TODO: this function should probably be graduated into hera_pspec or hera_cal\n",
"\n",
"def dpss_coherent_avg_correction(spw):\n",
" '''This function computes an approximate correction to the noise calculation after fringe-rate filtering. It assumes the that number of integrations\n",
" that are coherently averaged together is equal the ratio of the number of integrations per interleave divided by the number of FR modes kept. This\n",
" is then used to correct the calculation done in hera_pspec, which doesn't know about the FRF. The actual noise power spectrum is reduced by this factor\n",
" compared to what naively comes out of hera_pspec. However, when performing incoherent averaging of power spectra, one needs to raise noise power spectrum\n",
" by the square-root of this factor to account for the correlations between coherently-averaged power spectrum bins.'''\n",
" if SKIP_XTALK_AND_FRF:\n",
" coherent_avg_correction_factor = 1.0\n",
" else: \n",
" band = bands[spw]\n",
" time_in_seconds = (deint_filt_data[0].times - deint_filt_data[0].times.min()) * 60 * 60 * 24 # time array in seconds\n",
" time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], \n",
" [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
"\n",
" # count the effective number of integrations that go into each coherent average, accounting for overlap with the xtalk filter\n",
" if xtalk_overlaps[band] is None:\n",
" actual_integrations_per_coherent_avg = time_filters.shape[0] / time_filters.shape[1] # ratio of total number of DPSS FR modes to modes kept after filtering\n",
" else:\n",
" overlap_filters = dspec.dpss_operator(time_in_seconds, [np.mean(xtalk_overlaps[band]) / 1000], \n",
" [np.diff(xtalk_overlaps[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
" actual_integrations_per_coherent_avg = time_filters.shape[0] / (time_filters.shape[1] - overlap_filters.shape[1])\n",
" \n",
" integrations_per_coherent_avg = int(AVERAGING_TIME // (dt * NINTERLEAVE))\n",
" coherent_avg_correction_factor = actual_integrations_per_coherent_avg / integrations_per_coherent_avg\n",
" return coherent_avg_correction_factor"
]
},
{
Expand Down Expand Up @@ -1418,42 +1461,41 @@
"metadata": {},
"outputs": [],
"source": [
"if USE_CORR_MATRIX:\n",
" deint_vars = []\n",
" deint_frops = []\n",
" deint_covs = []\n",
"\n",
" for stream_ind in range(NINTERLEAVE):\n",
" var_dict = {}\n",
" frop_dict = {}\n",
" cov_dict = {}\n",
" times=deint_filt_data[stream_ind].times * 24 * 3600\n",
"\n",
" for band_ind, band in enumerate(bands):\n",
" var_dict[band] = {}\n",
" frop_dict[band] = {}\n",
" cov_dict[band] = {}\n",
"coherent_avg_correction_factors = []\n",
"for spw, band in enumerate(bands):\n",
" if USE_CORR_MATRIX:\n",
" corr_factors = []\n",
" for stream_ind in range(NINTERLEAVE):\n",
" times=deint_filt_data[stream_ind].times * 24 * 3600 \n",
" cov_here = 0\n",
" for pol in (\"ee\", \"nn\"):\n",
" cross_antpairpol = ANTPAIR + (pol,)\n",
" freq_slice = band_slices[band_ind]\n",
" var_dict[band][pol] = frf.prep_var_for_frop(deint_filt_data[stream_ind],\n",
" deint_nsamples[stream_ind],\n",
" deint_wgts[stream_ind],\n",
" cross_antpairpol,\n",
" freq_slice,\n",
" auto_ant=0)\n",
" frop_dict[band][pol] = get_frop_wrapper(pol=pol, stream_ind=stream_ind, band_ind=band_ind,\n",
" nsamples=deint_nsamples[stream_ind][cross_antpairpol][:, freq_slice],\n",
" dlst=dlst, bl_vec=bl_vec[cross_antpairpol],\n",
" times=times)\n",
"\n",
" cov_dict[band][pol] = frf.get_FRF_cov(frop_dict[band][pol], var_dict[band][pol])\n",
" cov_dict[band][\"pI\"] = cov_dict[band][\"ee\"] + cov_dict[band][\"nn\"]\n",
" freq_slice = band_slices[spw]\n",
" var = frf.prep_var_for_frop(deint_filt_data[stream_ind],\n",
" deint_nsamples[stream_ind],\n",
" deint_wgts[stream_ind],\n",
" cross_antpairpol,\n",
" freq_slice,\n",
" auto_ant=0)\n",
" frop = get_frop_wrapper(pol=pol, stream_ind=stream_ind, band_ind=spw,\n",
" nsamples=deint_nsamples[stream_ind][cross_antpairpol][:, freq_slice],\n",
" dlst=dlst, bl_vec=bl_vec[cross_antpairpol],\n",
" times=times)\n",
"\n",
" cov_here += frf.get_FRF_cov(frop, var) # computes covariance for pI by adding ee and nn\n",
" if hd.pol_convention == \"avg\":\n",
" cov_dict[band][\"pI\"] *= 0.25\n",
" deint_vars.append(var_dict)\n",
" deint_frops.append(frop_dict)\n",
" deint_covs.append(cov_dict)"
" cov_here /= 4\n",
"\n",
" ALLed_flags = np.all(deint_avg_flags[stream_ind][ANTPAIR + ('pI',)], axis=1)\n",
" tslc = slice(true_stretches(~ALLed_flags)[0].start, true_stretches(~ALLed_flags)[-1].stop)\n",
" corr_factors.append(frf.get_correction_factor_from_cov(cov_here, tslc=tslc))\n",
"\n",
" assert np.all(np.isfinite(corr_factors)), f'For band {band}, corr_factors={corr_factors}'\n",
" coherent_avg_correction_factors.append(np.mean(corr_factors))\n",
"\n",
" else:\n",
" # use the mode-counting method, which is simpler but less accurate\n",
" coherent_avg_correction_factors.append(dpss_coherent_avg_correction(spw)) "
]
},
{
Expand Down Expand Up @@ -1687,42 +1729,6 @@
"time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dd45cc89",
"metadata": {},
"outputs": [],
"source": [
"# TODO: this function should probably be graduated into hera_pspec or hera_cal\n",
"\n",
"def dpss_coherent_avg_correction(spw):\n",
" '''This function computes an approximate correction to the noise calculation after fringe-rate filtering. It assumes the that number of integrations\n",
" that are coherently averaged together is equal the ratio of the number of integrations per interleave divided by the number of FR modes kept. This\n",
" is then used to correct the calculation done in hera_pspec, which doesn't know about the FRF. The actual noise power spectrum is reduced by this factor\n",
" compared to what naively comes out of hera_pspec. However, when performing incoherent averaging of power spectra, one needs to raise noise power spectrum\n",
" by the square-root of this factor to account for the correlations between coherently-averaged power spectrum bins.'''\n",
" if SKIP_XTALK_AND_FRF:\n",
" coherent_avg_correction_factor = 1.0\n",
" else: \n",
" band = bands[spw]\n",
" time_in_seconds = (deint_filt_data[0].times - deint_filt_data[0].times.min()) * 60 * 60 * 24 # time array in seconds\n",
" time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], \n",
" [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
"\n",
" # count the effective number of integrations that go into each coherent average, accounting for overlap with the xtalk filter\n",
" if xtalk_overlaps[band] is None:\n",
" actual_integrations_per_coherent_avg = time_filters.shape[0] / time_filters.shape[1] # ratio of total number of DPSS FR modes to modes kept after filtering\n",
" else:\n",
" overlap_filters = dspec.dpss_operator(time_in_seconds, [np.mean(xtalk_overlaps[band]) / 1000], \n",
" [np.diff(xtalk_overlaps[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
" actual_integrations_per_coherent_avg = time_filters.shape[0] / (time_filters.shape[1] - overlap_filters.shape[1])\n",
" \n",
" integrations_per_coherent_avg = int(AVERAGING_TIME // (dt * NINTERLEAVE))\n",
" coherent_avg_correction_factor = actual_integrations_per_coherent_avg / integrations_per_coherent_avg\n",
" return coherent_avg_correction_factor"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -1760,22 +1766,7 @@
"# TODO: graduate this code into hera_pspec\n",
"\n",
"# Apply coherent_avg_correction_factor and compute P_SN\n",
"coherent_avg_correction_factors = []\n",
"for spw, band in enumerate(bands):\n",
" # calculate correction factors associated with the FRF, since times are not actually independent\n",
" if USE_CORR_MATRIX:\n",
" corr_factor_streams = []\n",
" for stream_ind in range(NINTERLEAVE):\n",
" # exclude any fully-flagged integrations at the edges of the dataset\n",
" ALLed_flags = np.all(deint_avg_flags[stream_ind][ANTPAIR + ('pI',)], axis=1)\n",
" tslc = slice(true_stretches(~ALLed_flags)[0].start, true_stretches(~ALLed_flags)[-1].stop)\n",
" corr_factor_streams.append(frf.get_correction_factor_from_cov(deint_covs[stream_ind][band][\"pI\"], tslc=tslc))\n",
" assert np.all(np.isfinite(corr_factor_streams)), f'For band {band}, corr_factor_streams={corr_factor_streams}'\n",
" # average over the streams\n",
" coherent_avg_correction_factors.append(np.mean(corr_factor_streams))\n",
" else:\n",
" coherent_avg_correction_factors.append(dpss_coherent_avg_correction(spw))\n",
" \n",
" for i, uvp in enumerate(uvps):\n",
" # loop over all pols, but only this spw\n",
" for key in uvp.get_all_keys():\n",
Expand Down

0 comments on commit 8827a62

Please sign in to comment.