improve memory usage by not storing various matrices

HERA-Team · Oct 24, 2024 · 8827a62 · 8827a62
1 parent 24929d7
commit 8827a62
Showing 1 changed file with 78 additions and 87 deletions.
diff --git a/notebooks/single_baseline_postprocessing_and_pspec.ipynb b/notebooks/single_baseline_postprocessing_and_pspec.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Single Baseline Filtering and Power Spectrum Estimation\n",
     "\n",
-    "**by Josh Dillon, Bobby Pascua, and Mike Wilensky**, last updated October 23, 2024\n",
+    "**by Josh Dillon, Bobby Pascua, and Mike Wilensky**, last updated October 24, 2024\n",
     "\n",
     "This notebook is designed to take a single redundantly-averaged unique baseline (typically after LST-binning) and push it through all the way to the power spectrum. It operates on single files that contain a single baseline for all LSTs and both `'ee'` and `'nn'` polarizations. It then can:\n",
     "* Throw out highly flagged times and/or channels\n",
@@ -372,7 +372,6 @@
     "\n",
     "    plt.ylim([-1, np.max(to_plot) * 1.1])    \n",
     "\n",
-    "\n",
     "    for freq in [117.19, 133.11, 152.25, 167.97]:\n",
     "        plt.axvline(freq, ls='--', color='k')\n",
     "\n",
@@ -1316,6 +1315,14 @@
     "target_lsts = [np.mean([np.unwrap(d.lsts)[i * Navg:(i+1) * Navg] for d in deint_filt_data]) for i in range(n_avg_int)]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "2fe88381-3623-4af1-9c4a-a95d720dd028",
+   "metadata": {},
+   "source": [
+    "## Run time filtering, time averaging, and form pseudo-Stokes"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1363,7 +1370,43 @@
    "id": "a560e664",
    "metadata": {},
    "source": [
-    "## Calculate FRF noise covariances (set USE_CORR_MATRIX=True in globals cell to run)"
+    "## Compute correction factor for coherent averaging, either approximately with mode counting or more exactly with the FRF noise covariances (set USE_CORR_MATRIX=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72ddbb6c-2c9f-48db-9176-4426b190cbcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO: this function should probably be graduated into hera_pspec or hera_cal\n",
+    "\n",
+    "def dpss_coherent_avg_correction(spw):\n",
+    "    '''This function computes an approximate correction to the noise calculation after fringe-rate filtering. It assumes the that number of integrations\n",
+    "    that are coherently averaged together is equal the ratio of the number of integrations per interleave divided by the number of FR modes kept. This\n",
+    "    is then used to correct the calculation done in hera_pspec, which doesn't know about the FRF. The actual noise power spectrum is reduced by this factor\n",
+    "    compared to what naively comes out of hera_pspec. However, when performing incoherent averaging of power spectra, one needs to raise noise power spectrum\n",
+    "    by the square-root of this factor to account for the correlations between coherently-averaged power spectrum bins.'''\n",
+    "    if SKIP_XTALK_AND_FRF:\n",
+    "        coherent_avg_correction_factor = 1.0\n",
+    "    else: \n",
+    "        band = bands[spw]\n",
+    "        time_in_seconds = (deint_filt_data[0].times - deint_filt_data[0].times.min()) * 60 * 60 * 24  # time array in seconds\n",
+    "        time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], \n",
+    "                                           [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
+    "\n",
+    "        # count the effective number of integrations that go into each coherent average, accounting for overlap with the xtalk filter\n",
+    "        if xtalk_overlaps[band] is None:\n",
+    "            actual_integrations_per_coherent_avg = time_filters.shape[0] / time_filters.shape[1]  # ratio of total number of DPSS FR modes to modes kept after filtering\n",
+    "        else:\n",
+    "            overlap_filters = dspec.dpss_operator(time_in_seconds, [np.mean(xtalk_overlaps[band]) / 1000], \n",
+    "                                                  [np.diff(xtalk_overlaps[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
+    "            actual_integrations_per_coherent_avg = time_filters.shape[0] / (time_filters.shape[1]  - overlap_filters.shape[1])\n",
+    "        \n",
+    "        integrations_per_coherent_avg = int(AVERAGING_TIME // (dt * NINTERLEAVE))\n",
+    "        coherent_avg_correction_factor = actual_integrations_per_coherent_avg / integrations_per_coherent_avg\n",
+    "    return coherent_avg_correction_factor"
    ]
   },
   {
@@ -1418,42 +1461,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "if USE_CORR_MATRIX:\n",
-    "    deint_vars = []\n",
-    "    deint_frops = []\n",
-    "    deint_covs = []\n",
-    "\n",
-    "    for stream_ind in range(NINTERLEAVE):\n",
-    "        var_dict = {}\n",
-    "        frop_dict = {}\n",
-    "        cov_dict = {}\n",
-    "        times=deint_filt_data[stream_ind].times * 24 * 3600\n",
-    "\n",
-    "        for band_ind, band in enumerate(bands):\n",
-    "            var_dict[band] = {}\n",
-    "            frop_dict[band] = {}\n",
-    "            cov_dict[band] = {}\n",
+    "coherent_avg_correction_factors = []\n",
+    "for spw, band in enumerate(bands):\n",
+    "    if USE_CORR_MATRIX:\n",
+    "        corr_factors = []\n",
+    "        for stream_ind in range(NINTERLEAVE):\n",
+    "            times=deint_filt_data[stream_ind].times * 24 * 3600    \n",
+    "            cov_here = 0\n",
     "            for pol in (\"ee\", \"nn\"):\n",
     "                cross_antpairpol = ANTPAIR + (pol,)\n",
-    "                freq_slice = band_slices[band_ind]\n",
-    "                var_dict[band][pol] = frf.prep_var_for_frop(deint_filt_data[stream_ind],\n",
-    "                                                            deint_nsamples[stream_ind],\n",
-    "                                                            deint_wgts[stream_ind],\n",
-    "                                                            cross_antpairpol,\n",
-    "                                                            freq_slice,\n",
-    "                                                            auto_ant=0)\n",
-    "                frop_dict[band][pol] = get_frop_wrapper(pol=pol, stream_ind=stream_ind, band_ind=band_ind,\n",
-    "                                                        nsamples=deint_nsamples[stream_ind][cross_antpairpol][:, freq_slice],\n",
-    "                                                        dlst=dlst, bl_vec=bl_vec[cross_antpairpol],\n",
-    "                                                        times=times)\n",
-    "\n",
-    "                cov_dict[band][pol] = frf.get_FRF_cov(frop_dict[band][pol], var_dict[band][pol])\n",
-    "            cov_dict[band][\"pI\"] = cov_dict[band][\"ee\"] + cov_dict[band][\"nn\"]\n",
+    "                freq_slice = band_slices[spw]\n",
+    "                var = frf.prep_var_for_frop(deint_filt_data[stream_ind],\n",
+    "                                            deint_nsamples[stream_ind],\n",
+    "                                            deint_wgts[stream_ind],\n",
+    "                                            cross_antpairpol,\n",
+    "                                            freq_slice,\n",
+    "                                            auto_ant=0)\n",
+    "                frop = get_frop_wrapper(pol=pol, stream_ind=stream_ind, band_ind=spw,\n",
+    "                                        nsamples=deint_nsamples[stream_ind][cross_antpairpol][:, freq_slice],\n",
+    "                                        dlst=dlst, bl_vec=bl_vec[cross_antpairpol],\n",
+    "                                        times=times)\n",
+    "\n",
+    "                cov_here += frf.get_FRF_cov(frop, var)  # computes covariance for pI by adding ee and nn\n",
     "            if hd.pol_convention == \"avg\":\n",
-    "                cov_dict[band][\"pI\"] *= 0.25\n",
-    "        deint_vars.append(var_dict)\n",
-    "        deint_frops.append(frop_dict)\n",
-    "        deint_covs.append(cov_dict)"
+    "                cov_here /= 4\n",
+    "\n",
+    "            ALLed_flags = np.all(deint_avg_flags[stream_ind][ANTPAIR + ('pI',)], axis=1)\n",
+    "            tslc = slice(true_stretches(~ALLed_flags)[0].start, true_stretches(~ALLed_flags)[-1].stop)\n",
+    "            corr_factors.append(frf.get_correction_factor_from_cov(cov_here, tslc=tslc))\n",
+    "\n",
+    "        assert np.all(np.isfinite(corr_factors)), f'For band {band}, corr_factors={corr_factors}'\n",
+    "        coherent_avg_correction_factors.append(np.mean(corr_factors))\n",
+    "\n",
+    "    else:\n",
+    "        # use the mode-counting method, which is simpler but less accurate\n",
+    "        coherent_avg_correction_factors.append(dpss_coherent_avg_correction(spw)) "
    ]
   },
   {
@@ -1687,42 +1729,6 @@
     "time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dd45cc89",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# TODO: this function should probably be graduated into hera_pspec or hera_cal\n",
-    "\n",
-    "def dpss_coherent_avg_correction(spw):\n",
-    "    '''This function computes an approximate correction to the noise calculation after fringe-rate filtering. It assumes the that number of integrations\n",
-    "    that are coherently averaged together is equal the ratio of the number of integrations per interleave divided by the number of FR modes kept. This\n",
-    "    is then used to correct the calculation done in hera_pspec, which doesn't know about the FRF. The actual noise power spectrum is reduced by this factor\n",
-    "    compared to what naively comes out of hera_pspec. However, when performing incoherent averaging of power spectra, one needs to raise noise power spectrum\n",
-    "    by the square-root of this factor to account for the correlations between coherently-averaged power spectrum bins.'''\n",
-    "    if SKIP_XTALK_AND_FRF:\n",
-    "        coherent_avg_correction_factor = 1.0\n",
-    "    else: \n",
-    "        band = bands[spw]\n",
-    "        time_in_seconds = (deint_filt_data[0].times - deint_filt_data[0].times.min()) * 60 * 60 * 24  # time array in seconds\n",
-    "        time_filters = dspec.dpss_operator(time_in_seconds, [np.mean(fr_ranges[band]) / 1000], \n",
-    "                                           [np.diff(fr_ranges[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
-    "\n",
-    "        # count the effective number of integrations that go into each coherent average, accounting for overlap with the xtalk filter\n",
-    "        if xtalk_overlaps[band] is None:\n",
-    "            actual_integrations_per_coherent_avg = time_filters.shape[0] / time_filters.shape[1]  # ratio of total number of DPSS FR modes to modes kept after filtering\n",
-    "        else:\n",
-    "            overlap_filters = dspec.dpss_operator(time_in_seconds, [np.mean(xtalk_overlaps[band]) / 1000], \n",
-    "                                                  [np.diff(xtalk_overlaps[band]) / 2 / 1000], eigenval_cutoff=[FR_EIGENVAL_CUTOFF])[0].real\n",
-    "            actual_integrations_per_coherent_avg = time_filters.shape[0] / (time_filters.shape[1]  - overlap_filters.shape[1])\n",
-    "        \n",
-    "        integrations_per_coherent_avg = int(AVERAGING_TIME // (dt * NINTERLEAVE))\n",
-    "        coherent_avg_correction_factor = actual_integrations_per_coherent_avg / integrations_per_coherent_avg\n",
-    "    return coherent_avg_correction_factor"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1760,22 +1766,7 @@
     "# TODO: graduate this code into hera_pspec\n",
     "\n",
     "# Apply coherent_avg_correction_factor and compute P_SN\n",
-    "coherent_avg_correction_factors = []\n",
     "for spw, band in enumerate(bands):\n",
-    "    # calculate correction factors associated with the FRF, since times are not actually independent\n",
-    "    if USE_CORR_MATRIX:\n",
-    "        corr_factor_streams = []\n",
-    "        for stream_ind in range(NINTERLEAVE):\n",
-    "            # exclude any fully-flagged integrations at the edges of the dataset\n",
-    "            ALLed_flags = np.all(deint_avg_flags[stream_ind][ANTPAIR + ('pI',)], axis=1)\n",
-    "            tslc = slice(true_stretches(~ALLed_flags)[0].start, true_stretches(~ALLed_flags)[-1].stop)\n",
-    "            corr_factor_streams.append(frf.get_correction_factor_from_cov(deint_covs[stream_ind][band][\"pI\"], tslc=tslc))\n",
-    "        assert np.all(np.isfinite(corr_factor_streams)), f'For band {band}, corr_factor_streams={corr_factor_streams}'\n",
-    "        # average over the streams\n",
-    "        coherent_avg_correction_factors.append(np.mean(corr_factor_streams))\n",
-    "    else:\n",
-    "        coherent_avg_correction_factors.append(dpss_coherent_avg_correction(spw))\n",
-    "    \n",
     "    for i, uvp in enumerate(uvps):\n",
     "        # loop over all pols, but only this spw\n",
     "        for key in uvp.get_all_keys():\n",