diff --git a/_toc.yml b/_toc.yml
index 805e2699d..8ce76a466 100644
--- a/_toc.yml
+++ b/_toc.yml
@@ -131,3 +131,9 @@ parts:
title: BOTS Time Series Observations
- file: notebooks/galaxy_redshift/redshift_fitting.ipynb
title: Redshift and Template Fitting
+ - file: notebooks/NIRSpec_NSClean/FS_NSClean_example.ipynb
+ title: FS Products with NSClean
+ - file: notebooks/NIRSpec_NSClean/IFU_NSClean_example.ipynb
+ title: IFU Products with NSClean
+ - file: notebooks/NIRSpec_NSClean/MOS_NSClean_example.ipynb
+ title: MOS Products with NSClean
diff --git a/notebooks/NIRSpec_NSClean/FS_NSClean_example.ipynb b/notebooks/NIRSpec_NSClean/FS_NSClean_example.ipynb
new file mode 100644
index 000000000..63d7ca8c4
--- /dev/null
+++ b/notebooks/NIRSpec_NSClean/FS_NSClean_example.ipynb
@@ -0,0 +1,1435 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "4ee4766c-2f11-4783-9d84-cbbbd1382f53",
+ "metadata": {},
+ "source": [
+ "\n",
+ "# Cleaning Residual 1/f Noise in NIRSpec FS Products with NSClean \n",
+ "
\n",
+ "\n",
+ "
Note: Again, cleaning with this modified mask, the bright spectrum in primary slit S200A1 still shows little difference after cleaning (at some wavelengths, the cleaned spectrum has a subtle increase in flux according to the ratio). The mask applied to the faint background spectra in the secondary slits (S200A2, S400A1, S1600A1, S200B1) remains identical to the default mask provided by the pipeline. Consequently, the results obtained for these slits using this mask remain unchanged when compared to the default mask.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b3dff8f4-b74a-4f82-b454-fb2a135db5f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Subarray SUBS200A1 1D Extracted Spectra\n",
+ "\n",
+ "# Plot each FS in the Full-frame\n",
+ "for original, cleaned, rate_type in zip(\n",
+ " x1d_nsclean_skipped, x1d_nsclean_modified, rate_types\n",
+ "):\n",
+ " if rate_type != \"subarray\":\n",
+ " continue\n",
+ " for slit in range(5): # 5 FS in full-frame\n",
+ " plot_spectra([original, cleaned], ext_num=slit, scale_percent=4)\n",
+ "\n",
+ "# Subarray SUBS200A1 1D Extracted Spectra -- smaller wavelength region of interest\n",
+ "\n",
+ "# Wavelength region of interest\n",
+ "wavelength_range = {\"nrs1\": [4.3, 4.7]}\n",
+ "flux_range = {\"nrs1\": [0.004, 0.007]}\n",
+ "\n",
+ "\n",
+ "for original, cleaned, rate_type in zip(\n",
+ " x1d_nsclean_skipped, x1d_nsclean_modified, rate_types\n",
+ "):\n",
+ " if rate_type != \"subarray\":\n",
+ " continue\n",
+ " for slit in range(5): # 1 slit S200A1\n",
+ " plot_spectra(\n",
+ " [original, cleaned],\n",
+ " ext_num=slit,\n",
+ " scale_percent=5,\n",
+ " wavelength_range=wavelength_range,\n",
+ " flux_range=flux_range,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5e5b9e21-22e9-4dcd-bfbf-4da9a114be36",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ "Note: For the fainter spectrum in the subarray data, we can see small changes to the continuum level from the cleaning process. Cleaning with this mask still removes the wavelength-dependent variation near 4.55 um.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3ff38306-8096-4334-b9b8-b55387d406ca",
+ "metadata": {},
+ "source": [
+ "## 8. Conclusion
\n",
+ "
\n",
+ "\n",
+ "The final plots below show the countrate images and the resulting 1D extracted spectra side-by-side to compare the different cleaning methods: the original (no NSClean applied), the cleaned countrate image (with the default pipeline mask), the cleaned countrate image (with an alternate pipeline mask), and finally, the cleaned countrate image (with the hand-modified mask).\n",
+ "\n",
+ "Please note that the results presented in this notebook may vary for different datasets (e.g., targets of different brightness, spatial extent, etc.). Users are encouraged to explore NSClean using different masking methods to determine the optimal results.\n",
+ "\n",
+ "The output from the cleaning algorithm is now ready for further processing. The (*_cal.fits*) files produced by the above `Spec2Pipeline` run may be used as input to the `Spec3Pipeline`, for generating final combined spectra.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "03a20e8f-35da-4210-9075-3143f30aa487",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load rate data and cleaned masks\n",
+ "original_rate_data = [\n",
+ " fits.open(mast_products_dir + rate_name)[1].data for rate_name in rate_names\n",
+ "]\n",
+ "cleaned_rate_default_data = [\n",
+ " fits.open(cleaned_default_mask)[1].data\n",
+ " for cleaned_default_mask in cleaned_default_masks\n",
+ "]\n",
+ "cleaned_rate_alternate_data = [\n",
+ " fits.open(cleaned_alternate_mask)[1].data\n",
+ " for cleaned_alternate_mask in cleaned_alternate_masks\n",
+ "]\n",
+ "cleaned_rate_modified_data = [\n",
+ " fits.open(cleaned_modified_mask)[1].data\n",
+ " for cleaned_modified_mask in cleaned_modified_masks\n",
+ "]\n",
+ "\n",
+ "# For plotting visualization\n",
+ "for data_list in [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ "]:\n",
+ " for data in data_list:\n",
+ " data[np.isnan(data)] = 0\n",
+ "\n",
+ "# Set up clim values\n",
+ "clim_first_row = (-0.5, 2)\n",
+ "clim_subsequent_rows = (-1e-1, 1e-1)\n",
+ "\n",
+ "# Original vs. cleaned data (with default mask)\n",
+ "fig, axs = plt.subplots(3, 4, figsize=(25, 15), gridspec_kw={\"hspace\": 0.9})\n",
+ "\n",
+ "# Set titles and plot data\n",
+ "titles = [\n",
+ " \"Original Rate Data\",\n",
+ " \"Cleaned Rate Data (Default Mask)\",\n",
+ " \"Cleaned Rate Data (Alternate Mask)\",\n",
+ " \"Cleaned Rate Data (Hand-Modified Mask)\",\n",
+ "]\n",
+ "for i, title in enumerate(titles):\n",
+ " for j, data in enumerate(\n",
+ " [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ " ][i]\n",
+ " ):\n",
+ " ax = axs[j, i]\n",
+ " aspect = 10 if j == 0 else \"auto\"\n",
+ " clim = (\n",
+ " clim_first_row if j == 0 else clim_subsequent_rows\n",
+ " ) # Adjust clim based on row\n",
+ " rate_name = rate_names[0][:-10] if i == 1 and j == 0 else rate_names[1][:-10]\n",
+ " ax.set_title(f\"{rate_name} \\n {title}\", fontsize=12)\n",
+ " im = ax.imshow(data, origin=\"lower\", aspect=aspect, clim=clim)\n",
+ " fig.colorbar(im, ax=ax, pad=0.05, shrink=0.7, label=\"DN/s\")\n",
+ " ax.set_xlabel(\"Pixel Column\", fontsize=10)\n",
+ " ax.set_ylabel(\"Pixel Row\", fontsize=10)\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3e9f088-8dcb-4a80-88ec-d74b51275a63",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def plot_spectra(\n",
+ " spec_list,\n",
+ " ext_num=1,\n",
+ " scale_percent=2.0,\n",
+ " wavelength_range=None,\n",
+ " flux_range=None,\n",
+ " xlim_low=None,\n",
+ "):\n",
+ " \"\"\"\n",
+ " Plot the cleaned extracted 1D spectrum and compare against\n",
+ " the original (not applying NSClean) extracted 1D spectrum.\n",
+ "\n",
+ " Parameters:\n",
+ " ----------\n",
+ " spec_list: list\n",
+ " List of paths to the FITS files containing original/cleaned 1D extracted data.\n",
+ " scale_percent: float\n",
+ " Scaling factor applied to determine the intensity range for visualization.\n",
+ " ext_num: int\n",
+ " Index/extension of the slice to be plotted.\n",
+ " The EXTVER header value. The default is 1 for 2D data.\n",
+ " wavelength_range: dict\n",
+ " Wavelength range (x-axis) {'nrs1': [3.6, 3.65], 'nrs2': [1.65, 1.75]}.\n",
+ " flux_range: dict\n",
+ " Flux range (y-axis) {'nrs1': [1, 2], 'nrs2': [1, 2]}.\n",
+ " xlim_low: int\n",
+ " Define a lower wavelength end for the 1D spectrum. Helpful for BOTS data.\n",
+ " \"\"\"\n",
+ "\n",
+ " if wavelength_range is None:\n",
+ " wavelength_range = {}\n",
+ "\n",
+ " # Open the FITS files.\n",
+ " original_hdul = fits.open(spec_list[0])\n",
+ " cleaned_hdul = fits.open(spec_list[1])\n",
+ " if len(spec_list) == 3:\n",
+ " alternate_hdul = fits.open(spec_list[2])\n",
+ " elif len(spec_list) == 4:\n",
+ " alternate_hdul = fits.open(spec_list[2])\n",
+ " handmod_hdul = fits.open(spec_list[3])\n",
+ " else:\n",
+ " alternate_hdul = None\n",
+ " handmod_hdul = None\n",
+ "\n",
+ " # Find the spectral extension (EXTRACT1D).\n",
+ " for extnum in range(len(original_hdul)):\n",
+ " hdu = original_hdul[extnum]\n",
+ " if hdu.name != \"EXTRACT1D\":\n",
+ " continue\n",
+ " slit_name = hdu.header[\"SLTNAME\"]\n",
+ "\n",
+ " if hdu.header[\"EXTVER\"] == ext_num:\n",
+ "\n",
+ " # Plot the original and cleaned spectra together.\n",
+ " fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n",
+ " plt.suptitle(\n",
+ " f\"Compare 1D Spectra for {os.path.basename(spec_list[0])};\"\n",
+ " f\"EXP_TYPE/Slit = {slit_name}\",\n",
+ " fontsize=15,\n",
+ " )\n",
+ "\n",
+ " if \"nrs1\" in spec_list[0] and xlim_low is not None:\n",
+ " xlim_low = xlim_low\n",
+ " else:\n",
+ " xlim_low = 0\n",
+ "\n",
+ " ax[0].step(\n",
+ " hdu.data[\"WAVELENGTH\"][xlim_low:],\n",
+ " hdu.data[\"FLUX\"][xlim_low:],\n",
+ " linewidth=1,\n",
+ " label=\"Original\",\n",
+ " )\n",
+ " ax[0].step(\n",
+ " cleaned_hdul[extnum].data[\"WAVELENGTH\"][xlim_low:],\n",
+ " cleaned_hdul[extnum].data[\"FLUX\"][xlim_low:],\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned\",\n",
+ " )\n",
+ " ax[0].set_xlabel(f\"Wavelength ({hdu.header['TUNIT1']})\", fontsize=12)\n",
+ " ax[0].set_ylabel(f\"Flux ({hdu.header['TUNIT2']})\", fontsize=12)\n",
+ " ax[0].set_title(\"1D Extracted Spectra\", fontsize=12)\n",
+ "\n",
+ " # Plot the difference between the spectra as a ratio.\n",
+ " diff = (\n",
+ " cleaned_hdul[extnum].data[\"FLUX\"][xlim_low:]\n",
+ " / hdu.data[\"FLUX\"][xlim_low:]\n",
+ " )\n",
+ " ax[1].step(\n",
+ " hdu.data[\"WAVELENGTH\"][xlim_low:],\n",
+ " diff,\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned/Original\",\n",
+ " )\n",
+ " ax[1].set_xlabel(f\"Wavelength ({hdu.header['TUNIT1']})\", fontsize=12)\n",
+ " ax[1].set_ylabel(\"Cleaned/Original\", fontsize=12)\n",
+ " ax[1].set_title(\"Difference After Cleaning\", fontsize=12)\n",
+ " # print(\"Average Difference = {}\".format(np.nanmean(diff)))\n",
+ "\n",
+ " # If available, also plot the alternate spectra.\n",
+ " if alternate_hdul is not None:\n",
+ " ax[0].step(\n",
+ " alternate_hdul[extnum].data[\"WAVELENGTH\"][xlim_low:],\n",
+ " alternate_hdul[extnum].data[\"FLUX\"][xlim_low:],\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned (alternate mask)\",\n",
+ " )\n",
+ " diff2 = (\n",
+ " alternate_hdul[extnum].data[\"FLUX\"][xlim_low:]\n",
+ " / hdu.data[\"FLUX\"][xlim_low:]\n",
+ " )\n",
+ " ax[1].step(\n",
+ " hdu.data[\"WAVELENGTH\"][xlim_low:],\n",
+ " diff2,\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned (alternate mask)/Original\",\n",
+ " )\n",
+ " if handmod_hdul is not None:\n",
+ "\n",
+ " ax[0].step(\n",
+ " handmod_hdul[extnum].data[\"WAVELENGTH\"][xlim_low:],\n",
+ " handmod_hdul[extnum].data[\"FLUX\"][xlim_low:],\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned (hand-modified mask)\",\n",
+ " color=\"Purple\"\n",
+ " )\n",
+ " diff3 = (\n",
+ " handmod_hdul[extnum].data[\"FLUX\"][xlim_low:]\n",
+ " / hdu.data[\"FLUX\"][xlim_low:]\n",
+ " )\n",
+ " ax[1].step(\n",
+ " hdu.data[\"WAVELENGTH\"][xlim_low:],\n",
+ " diff3,\n",
+ " linewidth=1,\n",
+ " label=\"Cleaned (hand-modified mask)/Original\",\n",
+ " )\n",
+ "\n",
+ " # Set the y-range of the plot if needed.\n",
+ " if flux_range is not None:\n",
+ " for key, y_range in flux_range.items():\n",
+ " if key.lower() in spec_list[0].lower() and y_range is not None:\n",
+ " if len(y_range) == 2:\n",
+ " ax[0].set_ylim(y_range)\n",
+ " ax[1].set_ylim(\n",
+ " [\n",
+ " np.nanpercentile(diff, scale_percent),\n",
+ " np.nanpercentile(diff, 100 - scale_percent),\n",
+ " ]\n",
+ " )\n",
+ " # ax[1].set_ylim(0.5, 1.5)\n",
+ "\n",
+ " else:\n",
+ " all_flux = [\n",
+ " hdu.data[\"FLUX\"],\n",
+ " cleaned_hdul[extnum].data[\"FLUX\"],\n",
+ " ]\n",
+ " y_range_ax0 = [\n",
+ " np.nanpercentile(all_flux[0], scale_percent),\n",
+ " np.nanpercentile(all_flux[0], 100 - scale_percent),\n",
+ " ]\n",
+ " ax[0].set_ylim(y_range_ax0)\n",
+ " # ax[1].set_ylim(0.5, 1.5)\n",
+ "\n",
+ " ax[1].set_ylim(\n",
+ " [\n",
+ " np.nanpercentile(diff, scale_percent),\n",
+ " np.nanpercentile(diff, 100 - scale_percent),\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " else:\n",
+ " all_flux = [hdu.data[\"FLUX\"], cleaned_hdul[extnum].data[\"FLUX\"]]\n",
+ " y_range_ax0 = [\n",
+ " np.nanpercentile(all_flux[0], scale_percent),\n",
+ " np.nanpercentile(all_flux[0], 100 - scale_percent),\n",
+ " ]\n",
+ " ax[0].set_ylim(y_range_ax0)\n",
+ " # ax[1].set_ylim(0.5, 1.5)\n",
+ "\n",
+ " ax[1].set_ylim(\n",
+ " [\n",
+ " np.nanpercentile(diff, scale_percent),\n",
+ " np.nanpercentile(diff, 100 - scale_percent),\n",
+ " ]\n",
+ " )\n",
+ "\n",
+ " # Set the x-range of the plot if needed.\n",
+ " for key in wavelength_range:\n",
+ " if key in spec_list[0] and wavelength_range[key] is not None:\n",
+ " ax[0].set_xlim(wavelength_range[key])\n",
+ " ax[1].set_xlim(wavelength_range[key])\n",
+ "\n",
+ " fig.tight_layout()\n",
+ "\n",
+ " ax[0].legend()\n",
+ " ax[1].legend()\n",
+ "\n",
+ " original_hdul.close()\n",
+ " cleaned_hdul.close()\n",
+ " if alternate_hdul is not None:\n",
+ " alternate_hdul.close()\n",
+ " handmod_hdul.close()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c7658cbc-d2ae-422f-8ad3-6c8261a91ad3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Final Comparison - Full Frame (all FS)\n",
+ "\n",
+ "# Plot each FS in the Full-frame\n",
+ "for original, cleaned_og, cleaned_alt, cleaned_mod, rate_type in zip(\n",
+ " x1d_nsclean_skipped,\n",
+ " x1d_nsclean_default,\n",
+ " x1d_nsclean_alternate,\n",
+ " x1d_nsclean_modified,\n",
+ " rate_types,\n",
+ "):\n",
+ " if rate_type == \"subarray\":\n",
+ " continue\n",
+ " for slit in range(5): # 5 FS in full-frame\n",
+ " plot_spectra(\n",
+ " [original, cleaned_og, cleaned_alt, cleaned_mod],\n",
+ " ext_num=slit,\n",
+ " scale_percent=4,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "78fc33ce-9f5c-4b77-8153-bde9bf278ef1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Comparison -- Subarray SUBS200A1 1D Extracted Spectra\n",
+ "\n",
+ "# Wavelength region of interest\n",
+ "wavelength_range = {\"nrs1\": [4.3, 4.7]}\n",
+ "flux_range = {\"nrs1\": [0.004, 0.007]}\n",
+ "\n",
+ "for original, cleaned_og, cleaned_alt, cleaned_mod, rate_type in zip(\n",
+ " x1d_nsclean_skipped,\n",
+ " x1d_nsclean_default,\n",
+ " x1d_nsclean_alternate,\n",
+ " x1d_nsclean_modified,\n",
+ " rate_types,\n",
+ "):\n",
+ " if rate_type != \"subarray\":\n",
+ " continue\n",
+ " for slit in range(5): # 5 FS in full-frame\n",
+ " plot_spectra(\n",
+ " [original, cleaned_og, cleaned_alt, cleaned_mod],\n",
+ " ext_num=slit,\n",
+ " wavelength_range=wavelength_range,\n",
+ " flux_range=flux_range,\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef1bf5ae-b156-47b0-8642-ce82ee5a43bc",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ " \n",
+ "Final Notes: \n",
+ "* Overall, after cleaning with each mask (default, alternate, hand-modified), the bright spectrum in S200A1 for the full-frame data shows minimal difference.\n",
+ "* The background spectra in the secondary slit of the full-frame data have been corrected for negative fluxes regardless of the mask used.\n",
+ "Regarding the faint source in the S200A1 subarray data, we observe slight changes to the continuum level from the cleaning process with any of the masks. Each mask effectively removes the wavelength-dependent variation near 4.55 um. However, the default masking introduces some high-frequency noise during cleaning, resulting in vertical striping over the spectral traces. In contrast, the hand-modified mask introduces less high-frequency noise, with the alternate (clip-based) masking introducing the least high-frequency noise.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fbe656ab-ef2c-4efb-91e1-4c020a7206a0",
+ "metadata": {},
+ "source": [
+ "\n",
+ "## About the Notebook
\n",
+ "\n",
+ "**Authors:** Melanie Clarke, Kayli Glidic; NIRSpec Instrument Team\n",
+ "\n",
+ "**Updated On**: Feburary 29, 2024.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "[Top of Page](#top)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/NIRSpec_NSClean/IFU_NSClean_example.ipynb b/notebooks/NIRSpec_NSClean/IFU_NSClean_example.ipynb
new file mode 100644
index 000000000..4fb66c2b2
--- /dev/null
+++ b/notebooks/NIRSpec_NSClean/IFU_NSClean_example.ipynb
@@ -0,0 +1,981 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "27997993-b40d-4857-94c1-2d88989b4d94",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "# Cleaning Residual 1/f Noise in NIRSpec IFU Products with NSClean \n",
+ "
\n",
+ "\n",
+ "## Notebook Goal\n",
+ "\n",
+ "The goal of this notebook is to generate cleaned IFU (*_rate.fits*) files by removing residual 1/f noise. These cleaned files will be used as input for the level 3 (`Spec3Pipeline`) pipeline.\n",
+ "\n",
+ "## Table of Contents\n",
+ "\n",
+ "* 1. [Introduction](#introduction)\n",
+ "* 2. [Import Library](#imports)\n",
+ "* 3. [Download the Data](#data)\n",
+ "* 4. [Running `Spec2Pipeline` without NSClean (Original Data)](#nsclean_skipped)\n",
+ "* 5. [Clean up 1/f Noise with NSClean (Default Pipeline Mask)](#nsclean_default)\n",
+ " * 5.1 [Verify the Mask (Default Pipeline Mask)](#verify_default_mask)\n",
+ " * 5.2 [Comparing Original vs. Cleaned Data (Default Pipeline Mask)](#nsclean_default_compare)\n",
+ "* 6. [Clean up 1/f Noise with NSClean (Alternate Mask)](#nsclean_alternate)\n",
+ " * 6.1 [Verify the Mask (Alternate Mask)](#verify_alternate_mask)\n",
+ " * 6.2 [Comparing Original vs. Cleaned Data (Alternate Mask)](#nsclean_alternate_compare)\n",
+ "* 7. [Clean up 1/f Noise with NSClean (Hand-Modified Mask)](#nsclean_modified)\n",
+ " * 7.1 [Verify the Mask (Hand-Modified Mask)](#verify_modified_mask)\n",
+ " * 7.2 [Comparing Original vs. Cleaned Data (Hand-Modified Mask)](#nsclean_modified_compare)\n",
+ "* 8. [Conclusion](#conclusion)\n",
+ "* [About the Notebook](#about)\n",
+ "\n",
+ "\n",
+ "## 1. Introduction
\n",
+ "
\n",
+ "\n",
+ "The JWST NIRSpec instrument has a number of features and characteristics that observers should be aware of when planning observations and interpreting data. One notable feature seen in NIRSpec pipeline products is negative and/or surplus flux in the extracted 1-D spectrum, typically with an irregular wavelength-dependent undulation. The cause of this artifact is correlated noise, known as [1/f noise](https://jwst-docs.stsci.edu/jwst-near-infrared-spectrograph/nirspec-instrument-features-and-caveats#NIRSpecInstrumentFeaturesandCaveats-1/fnoise), from low-level detector thermal instabilities, seen as vertical banding in 2-D count rate images, particularly in exposures of the NRS2 detector. While the IRS2 readout mode reduces this effect, it is not completely eliminated.\n",
+ "\n",
+ "To address this issue, the JWST Science Calibration Pipeline has integrated an external package developed by Bernard Rauscher, known as [NSClean](https://webb.nasa.gov/content/forScientists/publications.html#NSClean), within the `Spec2Pipeline` under [NSCleanStep](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/main.html). This algorithm uses dark areas of the detector to fit a background model to the data in Fourier space. It requires an input mask to identify all dark areas of the detector. The more thorough and complete this mask is, the better the background fit.\n",
+ "\n",
+ "In this notebook, we will use the NSClean algorithm integrated into the pipeline, utilizing a mask generated on-the-fly with default parameters to remove 1/f noise. In some cases, this mask may not be complete enough/too restrictive for the best possible noise removal. To address this, we demonstrate how one can manually modify the default mask, as well as how to create an alternative mask by adjusting the [NSCleanStep parameters](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/arguments.html). If needed, see the [NSClean documentation](https://iopscience.iop.org/article/10.1088/1538-3873/ad1b36/pdf) for some suggestions on manually creating a custom mask.\n",
+ "\n",
+ "This notebook utilizes IFU observation of quasar XID2028 with grating/filter G140H/F100LP as part of [JWST Early Release Science program ERS-1335](https://www.stsci.edu/jwst/science-execution/approved-programs/dd-ers/program-1335) observation 4, as an example. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "634f12e2-2811-4e05-a883-fd7d56acb230",
+ "metadata": {},
+ "source": [
+ "## 2. Import Library
\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "16683be1-13c7-4849-9722-a73bbf86000f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ---------- Set CRDS environment variables ----------\n",
+ "import os\n",
+ "import jwst\n",
+ "os.environ['CRDS_CONTEXT'] = 'jwst_1210.pmap'\n",
+ "os.environ['CRDS_PATH'] = os.environ['HOME']+'/crds_cache'\n",
+ "os.environ['CRDS_SERVER_URL'] = 'https://jwst-crds.stsci.edu'\n",
+ "print(f'CRDS cache location: {os.environ[\"CRDS_PATH\"]}')\n",
+ "\n",
+ "print(\"JWST Calibration Pipeline Version={}\".format(jwst.__version__))\n",
+ "# print(\"Current Operational CRDS Context = {}\".format(crds.get_default_context()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c44ae42f-8af5-407f-8ad3-248cb36b451e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ------ General Imports ------\n",
+ "import numpy as np\n",
+ "import time as tt\n",
+ "import logging\n",
+ "import warnings\n",
+ "\n",
+ "# ------ JWST Calibration Pipeline Imports ------\n",
+ "from jwst.pipeline.calwebb_spec2 import Spec2Pipeline\n",
+ "\n",
+ "# ------ Plotting/Stats Imports ------\n",
+ "from matplotlib import pyplot as plt\n",
+ "from astropy.io import fits\n",
+ "\n",
+ "from utils import get_jwst_file, plot_dark_data, plot_cleaned_data, plot_spectra\n",
+ "\n",
+ "# Hide all log and warning messages.\n",
+ "logging.disable(logging.ERROR)\n",
+ "warnings.simplefilter(\"ignore\", RuntimeWarning)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "58bafbdb-d55b-4e31-bc4e-97e0fb8f67b1",
+ "metadata": {},
+ "source": [
+ "## 3. Download the Data
\n",
+ "
\n",
+ " \n",
+ "The input data for this notebook features an IFU observation of quasar XID2028 with grating/filter G140H/F100LP. The dataset is part of the [JWST Early Release Science program ERS-1335](https://www.stsci.edu/jwst/science-execution/approved-programs/dd-ers/program-1335), specifically observation 4. It consists of 9 integrations (9 dither points) with 16 groups each. This notebook focuses on the second dithered exposure (00002) as an example. However, it's important to note that before proceeding to the `Spec3Pipeline`, all exposures must first be processed through the `Spec2Pipeline`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba9776be-c6e8-4588-8712-e0516bc6bdd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a downloads directory.\n",
+ "mast_products_dir = \"./mast_products/\"\n",
+ "# Check if the directory exists.\n",
+ "if not os.path.exists(mast_products_dir):\n",
+ " # Create the directory if it doesn't exist.\n",
+ " os.makedirs(mast_products_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "da6601f0-45d7-4bd0-ba57-a6401ab529f3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This notebook focuses on the second dithered exposure.\n",
+ "obs_ids = [\"jw01335004001_03101_00002\"]\n",
+ "detectors = [1, 2] # Both Detectors NRS1 and NRS2.\n",
+ "\n",
+ "# Specify the countrate products.\n",
+ "rate_names = []\n",
+ "\n",
+ "for obs_id in obs_ids:\n",
+ " for detector in detectors:\n",
+ " rate_names.append(f\"{obs_id}_nrs{detector}_rate.fits\")\n",
+ "\n",
+ "# Download all the FITS files.\n",
+ "for name in rate_names:\n",
+ " print(f\"Downloading {name}\")\n",
+ " get_jwst_file(name, mast_api_token=None, save_directory=mast_products_dir)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "302c90c6-3c42-4710-becc-49533b69de70",
+ "metadata": {},
+ "source": [
+ "## 4. Running `Spec2Pipeline` without NSClean (Original Data)
\n",
+ "
\n",
+ "\n",
+ "The cell below executes the `Spec2Pipeline`, explicitly skipping the NSClean step during processing. The level 2 products generated will serve as a reference point to illustrate how the countrate images and final extracted spectra appear without the removal of 1/f noise."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9d87880-8591-43a3-80b7-a6a09147a88c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running the pipeline without NSClean.\n",
+ "stage2_nsclean_skipped_dir = \"./stage2_nsclean_skipped/\"\n",
+ "if not os.path.exists(stage2_nsclean_skipped_dir):\n",
+ " os.makedirs(stage2_nsclean_skipped_dir) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b9917bd4-6e79-4e43-9864-d7fb6d970b4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Original data (no NSClean Applied).\n",
+ "# Estimated run-time: 132 minutes (may vary).\n",
+ "start = tt.time()\n",
+ "\n",
+ "for i in rate_names:\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": True\n",
+ " }, # Removes correlated read noise (1/f noise) from NIRSpec images.\n",
+ " },\n",
+ " output_dir=stage2_nsclean_skipped_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2b99dc71-32ca-4599-b598-e64ea206f9d2",
+ "metadata": {},
+ "source": [
+ "## 5. Clean up 1/f Noise with NSClean (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "If a user-supplied mask file is not provided to the [NSClean step](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/index.html) in the `Spec2Pipeline`, the pipeline will generate a mask based on default parameters. This mask will identify any pixel that is unilluminated. That is, the mask must contain True and False values, where True indicates that the pixel is dark, and False indicates that the pixel is illuminated (not dark).\n",
+ "\n",
+ "By default, the pipeline marks the following detector areas as illuminated, non-dark areas (False):\n",
+ "\n",
+ "* Pixels designated as science areas for IFU data.\n",
+ "* Traces from failed-open MSA shutters.\n",
+ "* 5-sigma outliers (default value). \n",
+ "* Any pixel set to NaN in the rate data.\n",
+ "\n",
+ "To tune the outlier detection in the mask, try modifying the `n_sigma` parameter (explored in the next section). A higher value will identify fewer outliers. A lower value will identify more. \n",
+ "\n",
+ "The default generated mask is saved and analyzed below. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "22165874-0081-47d5-8373-916829abad8e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with default parameters.\n",
+ "stage2_nsclean_default_dir = \"./stage2_nsclean_default/\"\n",
+ "if not os.path.exists(stage2_nsclean_default_dir):\n",
+ " os.makedirs(stage2_nsclean_default_dir) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80a7ebdd-6f5d-491f-8dce-645070115165",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (default NSClean pipeline mask).\n",
+ "# Estimated run time: 105 minutes (may vary).\n",
+ "start = tt.time()\n",
+ "\n",
+ "for i in rate_names:\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": False,\n",
+ " \"save_mask\": True,\n",
+ " \"save_results\": True,\n",
+ " }, # Removes correlated read noise (1/f noise) from NIRSpec images.\n",
+ " },\n",
+ " output_dir=stage2_nsclean_default_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ffc87c93-9e8a-4041-9e57-32d80f17d91a",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "Warning: \n",
+ " \n",
+ "In some situations, the NSClean step may fail to find a fit to the background noise. This failure may occur if the mask does not contain enough dark data (marked True). In particular, every column in the mask except for the first and last 4 columns must contain some pixels marked True. The background fitting procedure considers each column, one at a time, so it will crash if there is no data in a column to fit. If failure occurs, check that your mask in the image below has at least some True values in every column.\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50c5f2cf-d7a3-4606-bc92-46b3b73cfb29",
+ "metadata": {},
+ "source": [
+ "### 5.1 Verify the Mask (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n",
+ "\n",
+ "Note that there are still some remaining illuminated areas, primarily due to transient artifacts like cosmic rays and snowballs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4ff98268-379a-4712-9b22-b05289eb7edc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of on-the-fly built masks from the pipeline.\n",
+ "nsclean_default_masks = [\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs1_mask.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs2_mask.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_default_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9b4c5b7b-4dd6-4d6c-9ae3-5590ab9131b9",
+ "metadata": {},
+ "source": [
+ "### 5.2 Comparing Original vs. Cleaned Data (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "We can now compare the cleaned data (with the default pipeline mask) to the original rate file and verify that the 1/f noise has been reduced.\n",
+ "\n",
+ "In many cases, the cleaning process introduces new artifacts to the rate file. These should be carefully examined and weighed against the benefits of noise reduction. If transient artifacts, like snowballs, are interfering with the cleaning process, it may be beneficial to manually edit the mask to remove these areas from consideration in the background fit. To do so, try varying the outlier detection threshold or editing specific pixels in the mask array directly (explored in the next few sections). Otherwise, refer to the [NSClean documentation](https://iopscience.iop.org/article/10.1088/1538-3873/ad1b36/pdf) for additional suggestions on manual editing.\n",
+ "\n",
+ "Note that in the images below, there are scattered values with large relative differences from the original rate file (shown in the relative difference image below). These are artifacts of the cleaning process.\n",
+ "\n",
+ "There are also broader low-level residual background effects (shown in the relative difference image on the right, below, with scattered outliers, identified with sigma clipping, hidden by masking). These include the background patterns we are trying to remove: the 1/f noise variations in the dispersion direction and the picture frame effect at the top and bottom of the frame (for full-frame data). However, there may also be low-level artifacts introduced by over-fitting the dark data in the cleaning process.\n",
+ "\n",
+ "Check both residual images carefully to understand the impact of the cleaning process on your data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bc0bb29c-b1a1-4bf7-9b22-62696cd6f9e5",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_default_masks = [\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_default_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "79e2ed06-5604-4feb-826b-6fd6bcf72122",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cbecfa71-ff48-4715-b690-18776b84aa62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1D extracted spectra.\n",
+ "x1d_nsclean_skipped = [\n",
+ " stage2_nsclean_skipped_dir + \"jw01335004001_03101_00002_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_skipped_dir + \"jw01335004001_03101_00002_nrs2_x1d.fits\",\n",
+ "]\n",
+ "x1d_nsclean_default = [\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01335004001_03101_00002_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "# Wavelength region of interest.\n",
+ "wavelength_range = {\"nrs1\": [1.15, 1.25], \"nrs2\": [1.65, 1.75]}\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_default):\n",
+ " plot_spectra(\n",
+ " [original, cleaned], scale_percent=4, wavelength_range=wavelength_range\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07074551-7ce7-4144-90f5-29f682922205",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ "Notes: \n",
+ "* The portion of the spectrum near 1.2um for NRS1 and 1.7um for NRS2, the excess flux due to 1/f noise is reduced.\n",
+ "* There are several spikes in the difference between the cleaned and original spectra. These correspond to the scattered artifacts introduced by the cleaning process, above.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a1f85a17-cb1c-4320-a3bd-1800fa02bab9",
+ "metadata": {},
+ "source": [
+ "## 6. Clean up 1/f Noise with NSClean (Alternate Mask)
\n",
+ "
\n",
+ "\n",
+ "For some data sets, masking the entire science region may excessively mask dark areas of the detector that could be used to improve the background fit. Excessive masking can introduce some high frequency noise in the cleaning process that appears as vertical striping over the spectral traces. Also, for some data sets, there may be several illuminated regions of the detector that are not masked by the IFU slice bounding boxes. This may include transient artifacts like cosmic rays or glow from saturated sources.\n",
+ "\n",
+ "In some cases, it may be beneficial to build the mask with an alternate algorithm. Here, we do not use the bounding boxes and instead iteratively mask any data more than 1 sigma above the background. For bright sources, this might leave more dark data between the spectral traces and may improve the background fit. \n",
+ "\n",
+ "Note, however, that excessive cleaning may impact the continuum level for the spectrum, if too much or too little illuminated data is included in the mask. Again, the generated mask and output spectra should be carefully examined to weigh the benefits of cleaning against the impact on the spectra.\n",
+ "\n",
+ "To tune the illumination detection in this mask, try modifying the `n_sigma` parameter below. A higher value will identify less illumination. A lower value will identify more."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c9b47885-df6d-4737-b934-b16652e657dc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with user-supplied mask.\n",
+ "stage2_nsclean_alternate_dir = \"./stage2_nsclean_alternate/\"\n",
+ "if not os.path.exists(stage2_nsclean_alternate_dir):\n",
+ " os.makedirs(\n",
+ " stage2_nsclean_alternate_dir\n",
+ " ) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "02336a18-5741-4571-9b38-ecb50f7ced08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (alternate NSClean pipeline mask).\n",
+ "# Estimated run time: 87 minutes (may vary).\n",
+ "start = tt.time()\n",
+ "\n",
+ "for indx, i in enumerate(rate_names):\n",
+ " print(f\"Processing {i}... \")\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": False,\n",
+ " \"save_mask\": True,\n",
+ " \"n_sigma\": 1,\n",
+ " \"mask_spectral_regions\": False,\n",
+ " \"save_results\": True,\n",
+ " }, # Removes correlated read noise (1/f noise) from NIRSpec images.\n",
+ " },\n",
+ " output_dir=stage2_nsclean_alternate_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef33421e-bd16-41d0-883d-4a68202897b6",
+ "metadata": {},
+ "source": [
+ "### 6.1 Verify the Mask (Alternate Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ff0d7dd8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of on-the-fly built masks from the pipeline.\n",
+ "nsclean_alternate_masks = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs1_mask.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs2_mask.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_alternate_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b84cca59-2781-437d-a169-fa5c9e012b3a",
+ "metadata": {},
+ "source": [
+ "### 6.2 Comparing Original vs. Cleaned Data (Alternate Mask)
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "115e8929-ef97-4673-b985-dbcf5eba6be5",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_alternate_masks = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_alternate_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6376544c-12fc-45dc-9dce-0ac2b4a8ec77",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8ee33a68-ca76-4a2c-b38e-2e0fa444b4d4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x1d_nsclean_alternate = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01335004001_03101_00002_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "# Wavelength region of interest.\n",
+ "wavelength_range = {\"nrs1\": [1.15, 1.25], \"nrs2\": [1.65, 1.75]}\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_alternate):\n",
+ " plot_spectra(\n",
+ " [original, cleaned], scale_percent=4, wavelength_range=wavelength_range\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2f88a25-3262-42dc-89f8-986df2f8e2e1",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ "Notes: \n",
+ "* The overall continuum level for the spectrum on NRS1 has changed after cleaning. This is because sigma-clipping, unlike the default method, only masks the bright outliers, rather than the entire science region. Consequently, some sky regions are included in the background model that gets subtracted from the data, which can result in changes to the continuum level.\n",
+ "* The portion of the spectrum near 1.7um for NRS2, the excess flux due to 1/f noise is reduced.\n",
+ "* There are several spikes in the difference between the cleaned and original spectra. These correspond to the scattered artifacts introduced by the cleaning process, above.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fec75731",
+ "metadata": {},
+ "source": [
+ "## 7. Clean up 1/f Noise with NSClean (Hand-Modified Mask)
\n",
+ "
\n",
+ "\n",
+ "In certain scenarios, manual generation of a mask may be required. Here, we present **one** approach to manually modify the mask (excluding some large snowballs in NRS1/NRS2) starting with the default mask output from the pipeline. It is worth noting that the mask modified using this method may not necessarily outperform the two previous options."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "688909d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with user-supplied mask.\n",
+ "stage2_nsclean_modified_dir = \"./stage2_nsclean_modified/\"\n",
+ "if not os.path.exists(stage2_nsclean_modified_dir):\n",
+ " # Create the directory if it doesn't exist.\n",
+ " os.makedirs(stage2_nsclean_modified_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cee4a04a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Hand-modify certain mask regions\n",
+ "# Snowballs in NRS1/2.\n",
+ "\n",
+ "# Define the list to store paths of modified masks.\n",
+ "nsclean_modified_masks = []\n",
+ "\n",
+ "# Iterate through the list of original masks.\n",
+ "for mask in nsclean_default_masks:\n",
+ " # New mask file name.\n",
+ " output_file = os.path.basename(mask)[:-5] + \"_modified.fits\"\n",
+ "\n",
+ " # Open the FITS file.\n",
+ " with fits.open(mask) as hdul:\n",
+ " # Extract the mask data from the science extension.\n",
+ " mask_data = hdul[\"SCI\"].data.copy() # Make a copy.\n",
+ "\n",
+ " if \"nrs1\" in mask:\n",
+ " # Mask Snowballs.\n",
+ " mask_data[10:50, 180:215] = False\n",
+ " mask_data[560:580, 450:465] = False\n",
+ " mask_data[1720:1810, 1150:1240] = False\n",
+ " mask_data[1540:1580, 535:580] = False\n",
+ " mask_data[1865:1910, 725:770] = False\n",
+ " else:\n",
+ " mask_data[200:250, 130:190] = False\n",
+ " mask_data[1400:1420, 40:80] = False\n",
+ " mask_data[1700:1730, 150:180] = False\n",
+ " mask_data[630:710, 1100:1170] = False\n",
+ " mask_data[520:570, 1770:1820] = False\n",
+ "\n",
+ " # Update the data within the science extension.\n",
+ " hdul[\"SCI\"].data = mask_data\n",
+ " # Save the modified FITS file.\n",
+ " output_path = os.path.join(stage2_nsclean_modified_dir, output_file)\n",
+ " hdul_modified = hdul.copy() # Make a copy.\n",
+ " hdul_modified.writeto(output_path, overwrite=True)\n",
+ " nsclean_modified_masks.append(output_path)\n",
+ " print(f\"Saved modified mask as: {output_path}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "463ae320-2f8e-4c5a-9a47-d1698c33704e",
+ "metadata": {},
+ "source": [
+ "### 7.1 Verify the Mask (Hand-Modified Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e46cf4cc-a75b-4c0d-97b1-ff62bc0985d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of modified masks for the pipeline.\n",
+ "nsclean_modified_masks = [\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs1_mask_modified.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs2_mask_modified.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_modified_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "061f850b-404f-4476-84a0-5e88a80e0fc0",
+ "metadata": {},
+ "source": [
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4a70729-8ffe-4cc1-a7e6-46e8d7b08038",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (user-supplied mask).\n",
+ "# Estimated run time: 87 minutes (may vary).\n",
+ "start = tt.time()\n",
+ "\n",
+ "for indx, i in enumerate(rate_names):\n",
+ " print(f\"Processing {i}... \")\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": False,\n",
+ " \"save_mask\": True,\n",
+ " \"save_results\": True,\n",
+ " \"user_mask\": nsclean_modified_masks[indx],\n",
+ " }, # Removes correlated read noise (1/f noise) from NIRSpec images.\n",
+ " },\n",
+ " output_dir=stage2_nsclean_modified_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "36753340-5509-4fec-91a4-3b1dcfe63075",
+ "metadata": {},
+ "source": [
+ "### 7.2 Comparing Original vs. Cleaned Data (Hand-Modified Mask)
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80b12c21-2dce-4065-a951-b79b22f79b54",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_modified_masks = [\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_modified_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b37f102-5db8-475b-98ec-740fb6086fe4",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7aeb631e-6e0e-4d4d-a9b0-c8b6093ff1f6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x1d_nsclean_modified = [\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01335004001_03101_00002_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "# Wavelength region of interest\n",
+ "wavelength_range = {\"nrs1\": [1.15, 1.25], \"nrs2\": [1.65, 1.75]}\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_modified):\n",
+ " plot_spectra(\n",
+ " [original, cleaned], scale_percent=4, wavelength_range=wavelength_range\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5072ecbe-6b80-4b84-a104-b5477afa0cc2",
+ "metadata": {},
+ "source": [
+ "
\n",
+ " \n",
+ "Notes: \n",
+ "* Even when masking the handful of snowballs we encountered, the spectra remain similar to those cleaned with the default mask.\n",
+ "* The portion of the spectrum near 1.2um for NRS1 and 1.7um for NRS2, the excess flux due to 1/f noise is reduced.\n",
+ "* There are several spikes in the difference between the cleaned and original spectra. These correspond to the scattered artifacts introduced by the cleaning process, above.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f86daa01-29bd-4ce4-9a63-0091e0dbcf2b",
+ "metadata": {},
+ "source": [
+ "## 8. Conclusion
\n",
+ "
\n",
+ "\n",
+ "The final plots below show the countrate images and the resulting 1D extracted spectra side-by-side to compare the different cleaning methods: the original (no NSClean applied), the cleaned countrate image (with the default pipeline mask), the cleaned countrate image (with an alternate pipeline mask), and finally, the cleaned countrate image (with the hand-modified mask).\n",
+ "\n",
+ "Please note that the results presented in this notebook may vary for different datasets (e.g., targets of different brightness, spatial extent, etc.). Users are encouraged to explore NSClean using different masking methods to determine the optimal results.\n",
+ "\n",
+ "The output from the cleaning algorithm is now ready for further processing. The (*_cal.fits*) files produced by the above `Spec2Pipeline` run may be used as input to the `Spec3Pipeline`, for generating final combined spectral cubes and extracted spectra."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f55ab935-6da2-45ef-8ada-1175d63a8272",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Not cleaned vs. cleaned (default mask) vs. cleaned (alternate mask) rate data\n",
+ "original_rate_data = [\n",
+ " fits.open(mast_products_dir + rate_name)[1].data for rate_name in rate_names\n",
+ "]\n",
+ "cleaned_rate_default_data = [\n",
+ " fits.open(cleaned_default_mask)[1].data\n",
+ " for cleaned_default_mask in cleaned_default_masks\n",
+ "]\n",
+ "cleaned_rate_alternate_data = [\n",
+ " fits.open(cleaned_alternate_mask)[1].data\n",
+ " for cleaned_alternate_mask in cleaned_alternate_masks\n",
+ "]\n",
+ "cleaned_rate_modified_data = [\n",
+ " fits.open(cleaned_modified_mask)[1].data\n",
+ " for cleaned_modified_mask in cleaned_modified_masks\n",
+ "]\n",
+ "\n",
+ "# For plotting visualization\n",
+ "for data_list in [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ "]:\n",
+ " for data in data_list:\n",
+ " data[np.isnan(data)] = 0\n",
+ "\n",
+ "# Original vs. cleaned data (with default mask)\n",
+ "fig, axs = plt.subplots(2, 4, figsize=(25, 12))\n",
+ "\n",
+ "# Set y-axis titles and plot the data\n",
+ "titles = [\n",
+ " \"Original Rate Data\",\n",
+ " \"Cleaned Rate Data (Default Mask)\",\n",
+ " \"Cleaned Rate Data (Alternate Mask)\",\n",
+ " \"Cleaned Rate Data (Hand-Modified Mask)\",\n",
+ "]\n",
+ "for i, (data_list, title) in enumerate(\n",
+ " zip(\n",
+ " [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ " ],\n",
+ " titles,\n",
+ " )\n",
+ "):\n",
+ " for j, data in enumerate(data_list):\n",
+ " ax = axs[j, i]\n",
+ " ax.set_title(f'{title} \\n {\"NRS1\" if j == 0 else \"NRS2\"}', fontsize=12)\n",
+ " im = ax.imshow(data, origin=\"lower\", clim=(-1e-3, 1e-2))\n",
+ " fig.colorbar(im, ax=ax, pad=0.05, shrink=0.7, label=\"DN/s\")\n",
+ " ax.set_xlabel(\"Pixel Column\", fontsize=10)\n",
+ " ax.set_ylabel(\"Pixel Row\", fontsize=10)\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ae8622f2-0b6d-41eb-b38e-ac09f60a9155",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Final Comparison\n",
+ "\n",
+ "# Wavelength region of interest\n",
+ "wavelength_range = {\"nrs1\": [1.15, 1.25], \"nrs2\": [1.65, 1.75]}\n",
+ "\n",
+ "plot_spectra(\n",
+ " [\n",
+ " x1d_nsclean_skipped[0],\n",
+ " x1d_nsclean_default[0],\n",
+ " x1d_nsclean_alternate[0],\n",
+ " x1d_nsclean_modified[0],\n",
+ " ],\n",
+ " wavelength_range=wavelength_range,\n",
+ " scale_percent=4,\n",
+ ")\n",
+ "plot_spectra(\n",
+ " [\n",
+ " x1d_nsclean_skipped[1],\n",
+ " x1d_nsclean_default[1],\n",
+ " x1d_nsclean_alternate[1],\n",
+ " x1d_nsclean_modified[1],\n",
+ " ],\n",
+ " wavelength_range=wavelength_range,\n",
+ " scale_percent=4,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ac02101-ef88-40a1-a557-19355e967466",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
\n",
+ " \n",
+ "Note: Cleaning with the alternate mask still removes some of the wavelength-dependent variations but leaves residual background variation in NRS2. Also note the overall continuum level for the spectrum has changed, especially for NRS1. Again, this is because sigma-clipping, unlike the default method, only masks the bright outliers, rather than the entire science region. Consequently, some sky regions are included in the background model that gets subtracted from the data, which can result in changes to the continuum level. In this case, the original algorithm, which blocks the entire science region for each IFU slice (or the hand-modified method to exclude snowballs), is preferable to creating the mask via sigma-clipping.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fdcb054d-3fd0-4ea3-9fcf-3cdfefbb38a7",
+ "metadata": {},
+ "source": [
+ "## About the Notebook
\n",
+ "\n",
+ "**Authors:** Melanie Clarke, Kayli Glidic; NIRSpec Instrument Team\n",
+ "\n",
+ "**Updated On**: Feburary 29, 2024.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "[Top of Page](#top)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/NIRSpec_NSClean/MOS_NSClean_example.ipynb b/notebooks/NIRSpec_NSClean/MOS_NSClean_example.ipynb
new file mode 100644
index 000000000..93b676262
--- /dev/null
+++ b/notebooks/NIRSpec_NSClean/MOS_NSClean_example.ipynb
@@ -0,0 +1,1072 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "95dcb8aa-37e4-4707-a73b-2d8e047a3c66",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "# Cleaning Residual 1/f Noise in NIRSpec MOS Products with NSClean \n",
+ "
\n",
+ "\n",
+ "## Notebook Goal\n",
+ "\n",
+ "The goal of this notebook is to generate cleaned MOS (*_rate.fits*) files by removing residual 1/f noise. These cleaned files will be used as input for the level 3 (`Spec3Pipeline`) pipeline.\n",
+ "\n",
+ "## Table of Contents\n",
+ "\n",
+ "* 1. [Introduction](#introduction)\n",
+ "* 2. [Import Library](#imports)\n",
+ "* 3. [Download the Data](#data)\n",
+ "* 4. [Running `Spec2Pipeline` without NSClean (Original Data)](#nsclean_skipped)\n",
+ "* 5. [Clean up 1/f Noise with NSClean (Default Pipeline Mask)](#nsclean_default)\n",
+ " * 5.1 [Verify the Mask (Default Pipeline Mask)](#verify_default_mask)\n",
+ " * 5.2 [Comparing Original vs. Cleaned Data (Default Pipeline Mask)](#nsclean_default_compare)\n",
+ "* 6. [Clean up 1/f Noise with NSClean (Alternate Mask)](#nsclean_alternate)\n",
+ " * 6.1 [Verify the Mask (Alternate Mask)](#verify_alternate_mask)\n",
+ " * 6.2 [Comparing Original vs. Cleaned Data (Alternate Mask)](#nsclean_alternate_compare)\n",
+ "* 7. [Clean up 1/f Noise with NSClean (Hand-Modified Mask)](#nsclean_modified)\n",
+ " * 7.1 [Verify the Mask (Hand-Modified Mask)](#verify_modified_mask)\n",
+ " * 7.2 [Comparing Original vs. Cleaned Data (Hand-Modified Mask)](#nsclean_modified_compare)\n",
+ "* 8. [Conclusion](#conclusion)\n",
+ "* [About the Notebook](#about)\n",
+ "\n",
+ "\n",
+ "## 1. Introduction
\n",
+ "
\n",
+ "\n",
+ "The JWST NIRSpec instrument has a number of features and characteristics that observers should be aware of when planning observations and interpreting data. One notable feature seen in NIRSpec pipeline products is negative and/or surplus flux in the extracted 1-D spectrum, typically with an irregular wavelength-dependent undulation. The cause of this artifact is correlated noise, known as [1/f noise](https://jwst-docs.stsci.edu/jwst-near-infrared-spectrograph/nirspec-instrument-features-and-caveats#NIRSpecInstrumentFeaturesandCaveats-1/fnoise), from low-level detector thermal instabilities, seen as vertical banding in 2-D count rate images, particularly in exposures of the NRS2 detector. While the IRS2 readout mode reduces this effect, it is not completely eliminated.\n",
+ "\n",
+ "To address this issue, the JWST Science Calibration Pipeline has integrated an external package developed by Bernard Rauscher, known as [NSClean](https://webb.nasa.gov/content/forScientists/publications.html#NSClean), within the `Spec2Pipeline` under [NSCleanStep](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/main.html). This algorithm uses dark areas of the detector to fit a background model to the data in Fourier space. It requires an input mask to identify all dark areas of the detector. The more thorough and complete this mask is, the better the background fit.\n",
+ "\n",
+ "In this notebook, we will use the NSClean algorithm integrated into the pipeline, utilizing a mask generated on-the-fly with default parameters to remove 1/f noise. In some cases, this mask may not be complete enough/too restrictive for the best possible noise removal. To address this, we demonstrate how one can manually modify the default mask, as well as how to create an alternative mask by adjusting the [NSCleanStep parameters](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/arguments.html). If needed, see the [NSClean documentation](https://iopscience.iop.org/article/10.1088/1538-3873/ad1b36/pdf) for some suggestions on manually creating a custom mask.\n",
+ "\n",
+ "This notebook utilizes a MOS observation from the CEERS program with grating/filter G395M/F290LP, which is part of the [JWST Early Release Science program ERS-1345](https://www.stsci.edu/jwst/science-execution/approved-programs/dd-ers/program-1345) observation 61, as an example."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7c07213b-bbfb-462a-a65a-a3bc886f2a2f",
+ "metadata": {},
+ "source": [
+ "## 2. Import Library
\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6735deb1-3e9c-4020-9367-47d4c8538d97",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ---------- Set CRDS environment variables ----------\n",
+ "import os\n",
+ "import jwst\n",
+ "os.environ['CRDS_CONTEXT'] = 'jwst_1210.pmap'\n",
+ "os.environ['CRDS_PATH'] = os.environ['HOME']+'/crds_cache'\n",
+ "os.environ['CRDS_SERVER_URL'] = 'https://jwst-crds.stsci.edu'\n",
+ "print(f'CRDS cache location: {os.environ[\"CRDS_PATH\"]}')\n",
+ "\n",
+ "print(\"JWST Calibration Pipeline Version={}\".format(jwst.__version__))\n",
+ "# print(\"Current Operational CRDS Context = {}\".format(crds.get_default_context()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "92d138be-9ce1-479c-8291-21455515cf16",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ------ General Imports ------\n",
+ "import numpy as np\n",
+ "import time as tt\n",
+ "import logging\n",
+ "import warnings\n",
+ "\n",
+ "# ------ JWST Calibration Pipeline Imports ------\n",
+ "from jwst.pipeline.calwebb_spec2 import Spec2Pipeline\n",
+ "\n",
+ "# ------ Plotting/Stats Imports ------\n",
+ "from matplotlib import pyplot as plt\n",
+ "from astropy.io import fits\n",
+ "\n",
+ "from utils import get_jwst_file, plot_dark_data, plot_cleaned_data, plot_spectra\n",
+ "\n",
+ "# Hide all log and warning messages.\n",
+ "logging.disable(logging.ERROR)\n",
+ "warnings.simplefilter(\"ignore\", RuntimeWarning)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b557c042-77fb-4c76-99bc-6921a24a9a0c",
+ "metadata": {},
+ "source": [
+ "## 3. Download the Data
\n",
+ "
\n",
+ " \n",
+ "The input data for this notebook features a MOS observation from the CEERS program with grating/filter G395H/F290LP. The dataset is part of the [JWST Early Release Science program ERS-1345](https://www.stsci.edu/jwst/science-execution/approved-programs/dd-ers/program-1345), specifically observation 61. It consists of 3 integrations (3 dither points; 3-SHUTTER-SLITLET) with 9 groups each. This notebook focuses on the third dithered exposure (00003) as an example. However, it's important to note that before proceeding to the `Spec3Pipeline`, all exposures must first be processed through the `Spec2Pipeline`."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be6cdfdd-efa8-4590-ac06-1bb067a88daf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define a downloads directory.\n",
+ "mast_products_dir = \"./mast_products/\"\n",
+ "# Check if the directory exists.\n",
+ "if not os.path.exists(mast_products_dir):\n",
+ " # Create the directory if it doesn't exist.\n",
+ " os.makedirs(mast_products_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "490da514-4c98-4918-8fee-3cc3aa116810",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This notebook focuses on the third dithered exposure.\n",
+ "obs_ids = [\"jw01345061001_07101_00003\"]\n",
+ "detectors = [1, 2] # Both Detectors NRS1 and NRS2.\n",
+ "\n",
+ "# Specify countrate products\n",
+ "rate_names = []\n",
+ "for obs_id in obs_ids:\n",
+ " for detector in detectors:\n",
+ " rate_names.append(f\"{obs_id}_nrs{detector}_rate.fits\")\n",
+ "\n",
+ "# Download all the FITS files and associated MSA files.\n",
+ "msa_names = []\n",
+ "for name in rate_names:\n",
+ " print(f\"Downloading {name}\")\n",
+ " get_jwst_file(\n",
+ " name,\n",
+ " mast_api_token=\"75f915f251544013bb127b7f6f6edc80\",\n",
+ " save_directory=mast_products_dir,\n",
+ " )\n",
+ "\n",
+ " # Retrieve the MSA file from the header in the downloaded file.\n",
+ " msa_name = fits.getval(mast_products_dir + name, \"MSAMETFL\")\n",
+ " if not os.path.isfile(msa_name):\n",
+ " print(f\"Downloading {msa_name}\")\n",
+ " get_jwst_file(\n",
+ " msa_name,\n",
+ " mast_api_token=None,\n",
+ " save_directory=mast_products_dir,\n",
+ " )\n",
+ " msa_names.append(msa_name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a7af0dc0-4c21-41df-902d-8c4f179a7719",
+ "metadata": {},
+ "source": [
+ "## 4. Running `Spec2Pipeline` without NSClean (Original Data)
\n",
+ "
\n",
+ "\n",
+ "The cell below executes the `Spec2Pipeline`, explicitly skipping the NSClean step during processing. The level 2 products generated will serve as a reference point to illustrate how the countrate images and final extracted spectra appear without the removal of 1/f noise."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "00262cac-db95-4fb5-92e5-b7108f63023b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running the pipeline without NSClean.\n",
+ "stage2_nsclean_skipped_dir = \"./stage2_nsclean_skipped/\"\n",
+ "if not os.path.exists(stage2_nsclean_skipped_dir):\n",
+ " os.makedirs(stage2_nsclean_skipped_dir) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cbef0675-7aad-449a-8b69-4d0616bed8a3",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Original data (no NSClean Applied).\n",
+ "# Estimated run-time: 1-2 minutes.\n",
+ "start = tt.time()\n",
+ "\n",
+ "for i in rate_names:\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " if \"nrs1\" in i:\n",
+ " slit_name = \"80\"\n",
+ " else:\n",
+ " slit_name = \"11\"\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\"nsclean\": {\"skip\": True}, \"extract_2d\": {\"slit_name\": slit_name}},\n",
+ " output_dir=stage2_nsclean_skipped_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a42d053-0288-4ed6-a962-a265b27ec7d6",
+ "metadata": {},
+ "source": [
+ "## 5. Clean up 1/f Noise with NSClean (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "If a user-supplied mask file is not provided to the [NSClean step](https://jwst-pipeline.readthedocs.io/en/latest/jwst/nsclean/index.html) in the `Spec2Pipeline`, the pipeline will generate a mask based on default parameters. This mask will identify any pixel that is unilluminated. That is, the mask must contain True and False values, where True indicates that the pixel is dark, and False indicates that the pixel is illuminated (not dark).\n",
+ "\n",
+ "By default, the pipeline marks the following detector areas as illuminated, non-dark areas (False):\n",
+ "\n",
+ "* Pixels designated as open slits for this MOS observation.\n",
+ "* Traces from failed-open MSA shutters.\n",
+ "* 5-sigma outliers (default value).\n",
+ "* Any pixel set to NaN in the rate data.\n",
+ " \n",
+ "To tune the outlier detection in the mask, try modifying the `n_sigma` parameter (explored in the next section). A higher value will identify fewer outliers. A lower value will identify more. \n",
+ "\n",
+ "The default generated mask is saved and analyzed below. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "673e76ce-8583-4799-a1e7-51d20283966f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with default parameters.\n",
+ "stage2_nsclean_default_dir = \"./stage2_nsclean_default/\"\n",
+ "if not os.path.exists(stage2_nsclean_default_dir):\n",
+ " os.makedirs(stage2_nsclean_default_dir) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8c3c6857-2029-4a4a-8cf3-18792d48dd59",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (default NSClean pipeline mask).\n",
+ "# Estimated run time: 6 minutes.\n",
+ "start = tt.time()\n",
+ "\n",
+ "for i in rate_names:\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " if \"nrs1\" in i:\n",
+ " slit_name = \"80\"\n",
+ " else:\n",
+ " slit_name = \"11\"\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\"skip\": False, \"save_mask\": True, \"save_results\": True},\n",
+ " \"extract_2d\": {\"slit_name\": slit_name},\n",
+ " },\n",
+ " output_dir=stage2_nsclean_default_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c5dce560-cd71-4bf3-a4a5-0828b6b2830c",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "Warning: \n",
+ " \n",
+ "In some situations, the NSClean step may fail to find a fit to the background noise. This failure may occur if the mask does not contain enough dark data (marked True). In particular, every column in the mask except for the first and last 4 columns must contain some pixels marked True. The background fitting procedure considers each column, one at a time, so it will crash if there is no data in a column to fit. If failure occurs, check that your mask in the image below has at least some True values in every column.\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d9867fb-d83a-4cad-b95d-078286dce27a",
+ "metadata": {},
+ "source": [
+ "### 5.1 Verify the Mask (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n",
+ "\n",
+ "Note that there are still some remaining illuminated areas, primarily due to transient artifacts like cosmic rays and snowballs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "93bd3ba0-df01-409e-9da3-cc5fb160425f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of on-the-fly built masks from the pipeline.\n",
+ "nsclean_default_masks = [\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs1_mask.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs2_mask.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_default_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "58d0ce26-dbfb-4391-af9b-9eb206a76daa",
+ "metadata": {},
+ "source": [
+ "### 5.2 Comparing Original vs. Cleaned Data (Default Pipeline Mask)
\n",
+ "
\n",
+ "\n",
+ "We can now compare the cleaned data (with the default pipeline mask) to the original rate file and verify that the 1/f noise has been reduced.\n",
+ "\n",
+ "In many cases, the cleaning process introduces new artifacts to the rate file. These should be carefully examined and weighed against the benefits of noise reduction. If transient artifacts, like snowballs, are interfering with the cleaning process, it may be beneficial to manually edit the mask to remove these areas from consideration in the background fit. To do so, try varying the outlier detection threshold or editing specific pixels in the mask array directly (explored in the next few sections). Otherwise, refer to the [NSClean documentation](https://iopscience.iop.org/article/10.1088/1538-3873/ad1b36/pdf) for additional suggestions on manual editing.\n",
+ "\n",
+ "Note that in the images below, there are scattered values with large relative differences from the original rate file (shown in the relative difference image below). These are artifacts of the cleaning process.\n",
+ "\n",
+ "There are also broader low-level residual background effects (shown in the relative difference image on the right, below, with scattered outliers, identified with sigma clipping, hidden by masking). These include the background patterns we are trying to remove: the 1/f noise variations in the dispersion direction and the picture frame effect at the top and bottom of the frame (for full-frame data). However, there may also be low-level artifacts introduced by over-fitting the dark data in the cleaning process.\n",
+ "\n",
+ "Check both residual images carefully to understand the impact of the cleaning process on your data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71610d00-75bc-4b09-a7e0-0950abae4372",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_default_masks = [\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_default_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e9a4c199-cc30-45ae-9805-d46b38c7156e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot heavily masked region around x=1000, y=600 NRS2\n",
+ "# Masking can introduce some high frequency noise in the cleaning process that\n",
+ "# appears as vertical striping over the spectral traces\n",
+ "\n",
+ "rate_file = mast_products_dir + rate_names[1] # NRS2\n",
+ "original_rate_data = fits.open(rate_file)[1].data[550:651, 800:1401]\n",
+ "cleaned_rate_data = fits.open(cleaned_default_masks[1])[1].data[550:651, 800:1401]\n",
+ "\n",
+ "# For plotting vizualization\n",
+ "original_rate_data[np.isnan(original_rate_data)] = 0\n",
+ "cleaned_rate_data[np.isnan(cleaned_rate_data)] = 0\n",
+ "\n",
+ "vmin = np.nanpercentile(cleaned_rate_data, 5)\n",
+ "vmax = np.nanpercentile(cleaned_rate_data, 100-9)\n",
+ "\n",
+ "# Original vs. cleaned data (with default mask)\n",
+ "fig, axs = plt.subplots(1, 2, figsize=(15, 4))\n",
+ "fig.colorbar(\n",
+ " axs[0].imshow(\n",
+ " original_rate_data,\n",
+ " cmap=\"viridis\",\n",
+ " aspect=4,\n",
+ " origin=\"lower\",\n",
+ " clim=(vmin, vmax),\n",
+ " ),\n",
+ " ax=axs[0],\n",
+ " pad=0.05,\n",
+ " shrink=0.7,\n",
+ " label=\"DN/s\",\n",
+ ")\n",
+ "fig.colorbar(\n",
+ " axs[1].imshow(\n",
+ " cleaned_rate_data,\n",
+ " cmap=\"viridis\",\n",
+ " aspect=4,\n",
+ " origin=\"lower\",\n",
+ " clim=(vmin, vmax),\n",
+ " ),\n",
+ " ax=axs[1],\n",
+ " pad=0.05,\n",
+ " shrink=0.7,\n",
+ " label=\"DN/s\",\n",
+ ")\n",
+ "\n",
+ "# Set titles, tick values, xlabel, and ylabel for subplots\n",
+ "for ax, title in zip(axs, [\"Original Rate Data (NRS2)\", \"Cleaned Rate Data (NRS2)\"]):\n",
+ " ax.set(title=title, xlabel=\"Pixel Column\", ylabel=\"Pixel Row\")\n",
+ " ax.set_xticklabels([700, 800, 900, 1000, 1100, 1200, 1300, 1400])\n",
+ " ax.set_yticklabels([475, 500, 525, 575, 600, 625, 650])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f3d1fa78-3c17-45c4-afd8-9470128d94a8",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9e77330d-19d7-412e-bae8-7f6c3159c9b0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1D extracted spectra.\n",
+ "x1d_nsclean_skipped = [\n",
+ " stage2_nsclean_skipped_dir + \"jw01345061001_07101_00003_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_skipped_dir + \"jw01345061001_07101_00003_nrs2_x1d.fits\",\n",
+ "]\n",
+ "x1d_nsclean_default = [\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_default_dir + \"jw01345061001_07101_00003_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "# Wavelength region of interest.\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_default):\n",
+ " plot_spectra([original, cleaned], scale_percent=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32ca222d-daf9-4742-9e8c-fab2a6156b3b",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "Notes: \n",
+ "* In slit 80 on NRS1 and in slit 11 on NRS2, the overall continuum level has been slightly altered by the cleaning process.\n",
+ "\n",
+ "* In slit 11 on NRS2, some of the negative flux in the original spectrum has been corrected. However, excessive masking in the default mask has introduced high-frequency noise, particularly affecting slit 11 between 4.5 - 5.0 um. This results in a noticeable dip in the spectrum extracted from the cleaned data compared to the original raw data (above).\n",
+ "\n",
+ "\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dae7529b-587e-490d-8c1b-69568e472501",
+ "metadata": {},
+ "source": [
+ "## 6. Clean up 1/f Noise with NSClean (Alternate Mask)
\n",
+ "
\n",
+ "\n",
+ "For this data set, note that some regions of the detector are heavily masked, due to overlapping slit regions. For example, see the cleaned rate data for NRS2 above, around x=1000, y=600. \n",
+ "\n",
+ "In this region, the cleaning process introduces some high frequency noise that appears as vertical striping over the spectral traces. Slit 11 is extracted from this region, and shows a noticeable dip in the spectrum extracted from the cleaned data, compared to the original rate data (above).\n",
+ "\n",
+ "Also note that for MOS data, there may be several illuminated regions of the detector that are not masked by the slitlet bounding boxes. For the M gratings, zeroth-order spectra may appear on the detector, and are not easily located. For the long-pass filters, there is still some light past the red cutoff of the slitlet bounding box.\n",
+ "\n",
+ "In this case, it may be beneficial to build the mask with an alternate algorithm. Here, we do not use slitlet bounding boxes and instead iteratively mask any data more than 1 sigma above the background. This leaves more dark data between the spectral traces and improves the background fit in the problematic area.\n",
+ "\n",
+ "Note, however, that excessive cleaning may impact the continuum level for the spectra, if too much or too little illuminated data is included in the mask. Again, the generated mask and output spectra should be carefully examined to weigh the benefits of cleaning against the impact on the spectra.\n",
+ "\n",
+ "To tune the illumination detection in this mask, try modifying the `n_sigma` parameter below. A higher value will identify less illumination. A lower value will identify more."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bea676bf-2e8e-407d-ae87-e16c50d96c8b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with alternate parameters.\n",
+ "stage2_nsclean_alternate_dir = \"./stage2_nsclean_alternate/\"\n",
+ "if not os.path.exists(stage2_nsclean_alternate_dir):\n",
+ " os.makedirs(\n",
+ " stage2_nsclean_alternate_dir\n",
+ " ) # Create the directory if it doesn't exist."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "13eeab2e-4667-463c-bf1b-78d4c27189cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (alternate NSClean pipeline mask).\n",
+ "# Estimated run time: 7 minutes.\n",
+ "\n",
+ "start = tt.time()\n",
+ "\n",
+ "for indx, i in enumerate(rate_names):\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " if \"nrs1\" in i:\n",
+ " slit_name = \"80\"\n",
+ " else:\n",
+ " slit_name = \"11\"\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": False,\n",
+ " \"save_mask\": True,\n",
+ " \"save_results\": True,\n",
+ " \"n_sigma\": 1,\n",
+ " \"mask_spectral_regions\": False,\n",
+ " },\n",
+ " \"extract_2d\": {\"slit_name\": slit_name},\n",
+ " },\n",
+ " output_dir=stage2_nsclean_alternate_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dadf08d6-e425-4a79-be60-e6d7c94fb780",
+ "metadata": {},
+ "source": [
+ "### 6.1 Verify the Mask (Alternate Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6291536f-58f5-44e8-9106-355c9dbb12b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of on-the-fly built masks from the pipeline.\n",
+ "nsclean_alternate_masks = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs1_mask.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs2_mask.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_alternate_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "35e3f088-f1d9-484c-8d8d-3ec056c52152",
+ "metadata": {},
+ "source": [
+ "### 6.2 Comparing Original vs. Cleaned Data (Alternate Mask)
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b786f7b-cafc-45b8-af24-37a1757f8231",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_alternate_masks = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_alternate_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bbb1e179-810f-4cfa-b513-1aebdd8e07a3",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba9d163c-6813-42be-b1c5-8eec4fd06b33",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "x1d_nsclean_alternate = [\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_alternate_dir + \"jw01345061001_07101_00003_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_alternate):\n",
+ " plot_spectra([original, cleaned], scale_percent=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "228d10fc-9950-4883-b760-8930d48434e9",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "Notes: \n",
+ "* In slit 80 on NRS1 and in slit 11 on NRS2, the overall continuum level has been slightly altered by the cleaning process (similar to the default masking).\n",
+ "\n",
+ "* In slit 11 on NRS2, some of the negative flux in the original spectrum has been corrected. The high-frequency noise introduced by the default masking, which affected slit 11 between 4.5 - 5.0 um (resulting in a negative dip in the spectrum), is improved with the alternate masking.\n",
+ "\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f727eada",
+ "metadata": {},
+ "source": [
+ "## 7. Clean up 1/f Noise with NSClean (Hand-Modified Mask)
\n",
+ "
\n",
+ "In certain scenarios, manual generation of a mask may be required. Here, we present **one** approach to manually modify the mask (editing an overly masked region in NRS2 around x=1000, y=600 to include more background; excluding some large snowballs in NRS1), starting with the default mask output from the pipeline. It is worth noting that the mask modified using this method may not necessarily outperform the two previous options.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "317eb23f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set up directory for running NSClean with user-supplied mask.\n",
+ "stage2_nsclean_modified_dir = \"./stage2_nsclean_modified/\"\n",
+ "if not os.path.exists(stage2_nsclean_modified_dir):\n",
+ " # Create the directory if it doesn't exist.\n",
+ " os.makedirs(stage2_nsclean_modified_dir)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a68b4321",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Hand-modify certain mask regions.\n",
+ "# Specifically modifying the region in NRS2 around x=1000, y=600.\n",
+ "\n",
+ "# Define the list to store paths of modified masks.\n",
+ "nsclean_modified_masks = []\n",
+ "\n",
+ "# Iterate through the list of original masks.\n",
+ "for mask in nsclean_default_masks:\n",
+ " # New mask file name.\n",
+ " output_file = os.path.basename(mask)[:-5] + \"_modified.fits\"\n",
+ "\n",
+ " # Open the FITS file.\n",
+ " with fits.open(mask) as hdul:\n",
+ " # Extract the mask data from the science extension.\n",
+ " mask_data = hdul[\"SCI\"].data.copy() # Make a copy.\n",
+ "\n",
+ " if \"nrs2\" in mask:\n",
+ " # Step 1: Set the default masked regions back to True.\n",
+ " mask_data[550:651, :1300] = True\n",
+ "\n",
+ " # Step 2: Re-define masked regions by hand.\n",
+ " mask_data[550:575, :1300] = False # Crowded region NRS2.\n",
+ " mask_data[590:615, 150:1720] = False\n",
+ " mask_data[622:647, 100:1620] = False\n",
+ " else:\n",
+ " mask_data[50:130, 780:850] = False # Snowballs\n",
+ " mask_data[110:140, 920:945] = False\n",
+ " mask_data[820:940, 2000:2040] = False\n",
+ " mask_data[1650:1700, 1900:1960] = False\n",
+ " mask_data[1800:1900, 330:410] = False\n",
+ "\n",
+ " # Update the data within the science extension.\n",
+ " hdul[\"SCI\"].data = mask_data\n",
+ " # Save the modified FITS file\n",
+ " output_path = os.path.join(stage2_nsclean_modified_dir, output_file)\n",
+ " hdul_modified = hdul.copy() # Make a copy.\n",
+ " hdul_modified.writeto(output_path, overwrite=True)\n",
+ " nsclean_modified_masks.append(output_path)\n",
+ " print(f\"Saved modified mask as: {output_path}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "91f5cf29",
+ "metadata": {},
+ "source": [
+ "### 7.1 Verify the Mask (Hand-Modified Mask)
\n",
+ "
\n",
+ "\n",
+ "Check the mask against the rate data to make sure it keeps only dark areas of the detector.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "50907f03",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the rate data with masked areas blocked.\n",
+ "\n",
+ "# List of modified masks for the pipeline.\n",
+ "nsclean_modified_masks = [\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs1_mask_modified.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs2_mask_modified.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and mask file.\n",
+ "for rate_file, mask_file in zip(rate_names, nsclean_modified_masks):\n",
+ " plot_dark_data(mast_products_dir + rate_file, mask_file, layout=\"columns\", scale=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "407bb4ec-f053-4bfa-be0c-345fb67141b7",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "Note: When modifying the default mask for NRS2, we selected a region to unmask and then re-masked differently. However, this process inadvertently led to the unmasking of some previously masked NaN values (white pixels seen in the dark data plot for NRS2). Therefore, caution should be exercised when modifying the mask. Additionally, even though we masked various snowballs, we may not observe a difference in the 1D extracted spectra depending on the slit we extract.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ec1389d7",
+ "metadata": {},
+ "source": [
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "57973778",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 1/f noise cleaned data (user-supplied mask).\n",
+ "# Estimated run time: 5 minutes.\n",
+ "\n",
+ "start = tt.time()\n",
+ "\n",
+ "for indx, i in enumerate(rate_names):\n",
+ " print(f\"Processing {i}...\")\n",
+ "\n",
+ " if \"nrs1\" in i:\n",
+ " slit_name = \"80\"\n",
+ " else:\n",
+ " slit_name = \"11\"\n",
+ "\n",
+ " Spec2Pipeline.call(\n",
+ " mast_products_dir + i,\n",
+ " save_results=True,\n",
+ " steps={\n",
+ " \"nsclean\": {\n",
+ " \"skip\": False,\n",
+ " \"save_mask\": True,\n",
+ " \"save_results\": True,\n",
+ " \"user_mask\": nsclean_modified_masks[indx],\n",
+ " },\n",
+ " \"extract_2d\": {\"slit_name\": slit_name},\n",
+ " },\n",
+ " output_dir=stage2_nsclean_modified_dir,\n",
+ " )\n",
+ "\n",
+ " print(f\"Saved {i[:-9]}\" + \"mask.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"nsclean.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"cal.fits\")\n",
+ " print(f\"Saved {i[:-9]}\" + \"x1d.fits\")\n",
+ "\n",
+ "end = tt.time()\n",
+ "print(\"Run time: \", round(end - start, 1) / 60.0, \" min\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6ba19f8a",
+ "metadata": {},
+ "source": [
+ "### 7.2 Comparing Original vs. Cleaned Data (Hand-Modified Mask)
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cc95ef4f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Plot the original and cleaned data, as well as a residual map.\n",
+ "\n",
+ "cleaned_modified_masks = [\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs1_nsclean.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs2_nsclean.fits\",\n",
+ "]\n",
+ "\n",
+ "# Plot each associated set of rateint data and cleaned file.\n",
+ "for rate_file, cleaned_file in zip(rate_names, cleaned_modified_masks):\n",
+ " plot_cleaned_data(\n",
+ " mast_products_dir + rate_file, cleaned_file, layout=\"columns\", scale=9\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9077f84f",
+ "metadata": {},
+ "source": [
+ "Compare the extracted spectrum from the cleaned data to the spectrum extracted from the original rate file."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e2bbfe1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x1d_nsclean_modified = [\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs1_x1d.fits\",\n",
+ " stage2_nsclean_modified_dir + \"jw01345061001_07101_00003_nrs2_x1d.fits\",\n",
+ "]\n",
+ "\n",
+ "# Wavelength Region of interest.\n",
+ "for original, cleaned in zip(x1d_nsclean_skipped, x1d_nsclean_modified):\n",
+ " plot_spectra([original, cleaned], scale_percent=9)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c342881-014d-490f-9e30-9a229140f9d2",
+ "metadata": {},
+ "source": [
+ "
\n",
+ "\n",
+ "Notes: \n",
+ "* In slit 80 on NRS1, the overall continuum level has been slightly altered by the cleaning process (similar to the default masking and alternate masking).\n",
+ "\n",
+ "* In slit 11 on NRS2, the flux between 4-4.55 um has decreased due to the cleaning process with the hand-modified mask. Some of the negative flux in the original spectrum (at shorter and longer wavelengths) has been corrected.\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "797f0c0d-e694-4103-8c4d-7ad3a3d84eaf",
+ "metadata": {},
+ "source": [
+ "## 8. Conclusion
\n",
+ "
\n",
+ "\n",
+ "The final plots below show the countrate images and the resulting 1D extracted spectra side-by-side to compare the different cleaning methods: the original (no NSClean applied), the cleaned countrate image (with the default pipeline mask), the cleaned countrate image (with an alternate pipeline mask), and finally, the cleaned countrate image (with the hand-modified mask).\n",
+ "\n",
+ "Please note that the results presented in this notebook may vary for different datasets (e.g., targets of different brightness, spatial extent, etc.). Users are encouraged to explore NSClean using different masking methods to determine the optimal results.\n",
+ "\n",
+ "The output from the cleaning algorithm is now ready for further processing. The (*_cal.fits*) files produced by the above `Spec2Pipeline` run may be used as input to the `Spec3Pipeline`, for generating final combined spectra."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2ab2321-2e13-4188-82db-35b6c339d2ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Not cleaned vs. cleaned (default mask) vs. cleaned (alternate mask) rate data\n",
+ "original_rate_data = [\n",
+ " fits.open(mast_products_dir + rate_name)[1].data for rate_name in rate_names\n",
+ "]\n",
+ "cleaned_rate_default_data = [\n",
+ " fits.open(cleaned_default_mask)[1].data\n",
+ " for cleaned_default_mask in cleaned_default_masks\n",
+ "]\n",
+ "cleaned_rate_alternate_data = [\n",
+ " fits.open(cleaned_alternate_mask)[1].data\n",
+ " for cleaned_alternate_mask in cleaned_alternate_masks\n",
+ "]\n",
+ "cleaned_rate_modified_data = [\n",
+ " fits.open(cleaned_modified_mask)[1].data\n",
+ " for cleaned_modified_mask in cleaned_modified_masks\n",
+ "]\n",
+ "\n",
+ "# For plotting visualization\n",
+ "for data_list in [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ "]:\n",
+ " for data in data_list:\n",
+ " data[np.isnan(data)] = 0\n",
+ "\n",
+ "# Original vs. cleaned data (with default mask)\n",
+ "fig, axs = plt.subplots(2, 4, figsize=(25, 12))\n",
+ "\n",
+ "# Set y-axis titles and plot the data\n",
+ "titles = [\n",
+ " \"Original Rate Data\",\n",
+ " \"Cleaned Rate Data (Default Mask)\",\n",
+ " \"Cleaned Rate Data (Alternate Mask)\",\n",
+ " \"Cleaned Rate Data (Hand-Modified Mask)\",\n",
+ "]\n",
+ "for i, (data_list, title) in enumerate(\n",
+ " zip(\n",
+ " [\n",
+ " original_rate_data,\n",
+ " cleaned_rate_default_data,\n",
+ " cleaned_rate_alternate_data,\n",
+ " cleaned_rate_modified_data,\n",
+ " ],\n",
+ " titles,\n",
+ " )\n",
+ "):\n",
+ " for j, data in enumerate(data_list):\n",
+ " ax = axs[j, i]\n",
+ " ax.set_title(f'{title} \\n {\"NRS1\" if j == 0 else \"NRS2\"}', fontsize=12)\n",
+ " im = ax.imshow(data, origin=\"lower\", clim=(-1e-2, 1e-2))\n",
+ " fig.colorbar(im, ax=ax, pad=0.05, shrink=0.7, label=\"DN/s\")\n",
+ " ax.set_xlabel(\"Pixel Column\", fontsize=10)\n",
+ " ax.set_ylabel(\"Pixel Row\", fontsize=10)\n",
+ "\n",
+ "plt.tight_layout()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f579db06-1587-4354-ba8e-d1939a7942ec",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Final Comparison\n",
+ "plot_spectra(\n",
+ " [\n",
+ " x1d_nsclean_skipped[0],\n",
+ " x1d_nsclean_default[0],\n",
+ " x1d_nsclean_alternate[0],\n",
+ " x1d_nsclean_modified[0],\n",
+ " ],\n",
+ " scale_percent=4,\n",
+ ")\n",
+ "plot_spectra(\n",
+ " [\n",
+ " x1d_nsclean_skipped[1],\n",
+ " x1d_nsclean_default[1],\n",
+ " x1d_nsclean_alternate[1],\n",
+ " x1d_nsclean_modified[1],\n",
+ " ],\n",
+ " scale_percent=4,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f446267d-ad10-4e9b-8ad7-0f70cb76ec07",
+ "metadata": {},
+ "source": [
+ "\n",
+ "
\n",
+ " \n",
+ "Final Notes: \n",
+ "* The high-frequency noise in NRS2 introduced by the default masking, which was affecting the 1D extracted spectrum for slit 11 around 4.55um, no longer appears when using the alternate mask (slight improvement with the hand-modified mask), significantly improving the spectrum for slit 11.\n",
+ "* Negative flux in slit 11 (at shorter and longer wavelengths) has been corrected with either of the masks. However, the hand-modified mask appears to decrease the continuum level for the spectrum of slit 11 between 4-4.55um. In this case, the alternate masking (clip-based) algorithm is preferable to blocking the entire science region for each MSA shutter.\n",
+ "\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "63478af5-10fc-4f8b-be62-b0fcd9f65423",
+ "metadata": {},
+ "source": [
+ "## About the Notebook
\n",
+ "\n",
+ "**Authors:** Melanie Clarke, Kayli Glidic; NIRSpec Instrument Team\n",
+ "\n",
+ "**Updated On**: Feburary 29, 2024.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "[Top of Page](#top)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/notebooks/NIRSpec_NSClean/requirements.txt b/notebooks/NIRSpec_NSClean/requirements.txt
new file mode 100644
index 000000000..c037a19b3
--- /dev/null
+++ b/notebooks/NIRSpec_NSClean/requirements.txt
@@ -0,0 +1,7 @@
+numpy == 1.26.4
+requests == 2.31.0
+jwst == 1.13.4
+crds == 11.17.19
+stpipe == 0.5.1
+matplotlib == 3.8.3
+astropy == 6.0.0
\ No newline at end of file
diff --git a/notebooks/NIRSpec_NSClean/utils.py b/notebooks/NIRSpec_NSClean/utils.py
new file mode 100644
index 000000000..9075f47f2
--- /dev/null
+++ b/notebooks/NIRSpec_NSClean/utils.py
@@ -0,0 +1,525 @@
+import os
+import numpy as np
+import requests
+
+# ------ Plotting/Stats Imports ------
+from matplotlib import pyplot as plt
+from astropy.stats import sigma_clip
+from astropy.io import fits
+
+
+def get_jwst_file(name, mast_api_token=None, save_directory=".", redownload=False):
+ """
+ Retrieve a JWST data file from MAST archive and save it to a specified directory.
+
+ Parameters:
+ ----------
+ name: str
+ File name.
+ mast_api_token: str
+ MAST authorization token. Get your MAST Token Here: https://auth.mast.stsci.edu/token.
+ save_directory: str
+ Save directory path.
+ redownload: bool
+ Redownload the data even if it exsits already?
+ """
+ mast_url = "https://mast.stsci.edu/api/v0.1/Download/file"
+ params = dict(uri=f"mast:JWST/product/{name}")
+
+ if mast_api_token:
+ headers = dict(Authorization=f"token {mast_api_token}")
+ else:
+ headers = {}
+
+ file_path = os.path.join(save_directory, name)
+
+ # Check if the file already exists in the save directory.
+ if os.path.exists(file_path) and not redownload:
+ print(f"The file {name} already exists in the directory. Skipping download.")
+ return
+
+ r = requests.get(mast_url, params=params, headers=headers, stream=True)
+ r.raise_for_status()
+
+ with open(file_path, "wb") as fobj:
+ for chunk in r.iter_content(chunk_size=1024000):
+ fobj.write(chunk)
+
+
+# Define a helper function for plotting the dark areas (the mask).
+def plot_dark_data(
+ rate_file,
+ mask_file,
+ cmap="viridis",
+ scale=0.2,
+ slice_index=1,
+ axis=0,
+ aspect="auto",
+ layout="rows",
+):
+ """
+ Plot the dark areas on the detector, masking out the illuminated areas.
+ This function can take 2D (_rate.fits) and 3D (_rateints.fits) files as input.
+
+ Parameters:
+ ----------
+ rate_file: str
+ File path to the FITS file containing countrate data.
+ mask_file: str
+ File path to the FITS file containing mask data.
+ slice_index: int
+ Index of the slice to be plotted, 2D data default is 1.
+ axis: int (0-2)
+ Axis along which the slice will be taken
+ (0 for x-axis, 1 for y-axis, 2 for z-axis).
+ aspect: int or 'auto'
+ Plot's aspect ratio.
+ cmap: str
+ Color map.
+ layout: str
+ Layout subplots in rows or columns?
+ scale: float
+ Scaling factor applied to determine the intensity range for visualization.
+ """
+
+ # Make a matplotlib figure.
+ if layout == "rows":
+ fig, ax = plt.subplots(3, 1, figsize=(10, 5))
+ else:
+ fig, ax = plt.subplots(1, 3, figsize=(20, 5))
+ plt.suptitle(f"Dark Areas for {os.path.basename(rate_file)}", fontsize=15)
+
+ # Open the mask and science data.
+ with fits.open(mask_file) as hdulist:
+ mask = hdulist[1].data
+
+ with fits.open(rate_file) as hdulist:
+ sci = hdulist["SCI"].data
+
+ # Determine if countrate data is 2D or 3D.
+ if len(sci.shape) == 3:
+ dim = 3
+ else:
+ dim = 2
+
+ # Get data limits from the dark data.
+ masked_sci = sci.copy()
+ masked_sci[mask == 0] = 0
+ vmin = np.nanpercentile(masked_sci, scale)
+ vmax = np.nanpercentile(masked_sci, 100 - scale)
+
+ # Plot the science image with limits from the dark data.
+ sci[np.isnan(sci)] = 0
+
+ # If the countrate data is 3D, determine the integration (slice) to plot.
+ if dim == 3:
+ sci = np.take(sci, slice_index, axis=axis)
+ mask = np.take(mask, slice_index, axis=axis)
+ masked_sci = np.take(masked_sci, slice_index, axis=axis)
+
+ ax[0].set_title("Original Rate Data: Integration [{},:,:]".format(slice_index))
+ ax[1].set_title(
+ "Illuminated Pixel Mask: Integration [{},:,:]".format(slice_index)
+ )
+ ax[2].set_title(
+ "Dark Data (Illuminated Pixels Masked): Integration [{},:,:]".format(
+ slice_index
+ )
+ )
+
+ else:
+ sci = sci
+ ax[0].set_title("Original Rate Data")
+ ax[1].set_title("Illuminated Pixel Mask")
+ ax[2].set_title("Dark Data (Illuminated Pixels Masked)")
+
+ # Plot the science image with limits from the dark data.
+ ax[0].set_ylabel("Pixel Row", fontsize=12)
+ ax[0].set_xlabel("Pixel Column", fontsize=12)
+ im1 = ax[0].imshow(
+ sci, cmap=cmap, origin="lower", aspect=aspect, vmin=vmin, vmax=vmax
+ )
+ cbar1 = fig.colorbar(im1, ax=ax[0], pad=0.05, shrink=0.7, label="DN/s")
+
+ # Plot the mask: values are 1 or 0.
+ ax[1].set_ylabel("Pixel Row", fontsize=12)
+ ax[1].set_xlabel("Pixel Column", fontsize=12)
+ im2 = ax[1].imshow(mask, cmap=cmap, aspect=aspect, origin="lower", vmin=0, vmax=1)
+ cbar2 = fig.colorbar(im2, ax=ax[1], pad=0.05, shrink=0.7, ticks=[0, 1])
+ cbar2.set_ticklabels(["Illuminated Pixel Masked", "Un-Illuminated Pixel"])
+
+ # Plot the dark data with the same limits as the science data.
+ ax[2].set_ylabel("Pixel Row", fontsize=12)
+ ax[2].set_xlabel("Pixel Column", fontsize=12)
+ im3 = ax[2].imshow(
+ masked_sci, cmap=cmap, aspect=aspect, origin="lower", vmin=vmin, vmax=vmax
+ )
+ cbar3 = fig.colorbar(im3, ax=ax[2], pad=0.05, shrink=0.7, label="DN/s")
+
+ if layout == "rows": # Adjust the multiplier as needed.
+ cbar1.ax.figure.set_size_inches(cbar1.ax.figure.get_size_inches() * 1.2)
+ cbar2.ax.figure.set_size_inches(cbar2.ax.figure.get_size_inches() * 1.2)
+ cbar3.ax.figure.set_size_inches(cbar3.ax.figure.get_size_inches() * 1.2)
+
+ fig.tight_layout() # Adjusted tight_layout for better suptitle spacing.
+
+
+# Define a helper function for plotting the cleaned data.
+def plot_cleaned_data(
+ rate_file,
+ cleaned_file,
+ slice_index=1,
+ aspect="auto",
+ cmap="viridis",
+ scale=0.2,
+ layout="rows",
+):
+ """
+ Plot the 2D cleaned rate data (or 2D slice from a 3D cleaned data cube)
+ and compare against the original data (not applying NSClean).
+
+ Parameters:
+ ----------
+ rate_file: str
+ File path to the FITS file containing original countrate data.
+ cleaned_file: str
+ File path to the FITS file containing cleaned countrate data.
+ slice_index: int
+ Index of the slice to be plotted, 2D data default is 1.
+ aspect: int or 'auto'
+ Plot's aspect ratio.
+ cmap: str
+ Color map.
+ layout: str
+ Layout subplots in rows or columns?
+ scale: float
+ Scaling factor applied to determine the intensity range for visualization.
+ """
+
+ # Make a matplotlib figure.
+ if layout == "rows":
+ fig, ax = plt.subplots(4, 1, figsize=(10, 12))
+ else:
+ fig, ax = plt.subplots(2, 2, figsize=(12, 10))
+ ax = ax.flatten()
+ plt.suptitle(
+ f"Cleaned Data (1/f noise removed) for {os.path.basename(rate_file)}",
+ fontsize=15,
+ y=1,
+ )
+
+ # Open the original and cleaned data.
+ with fits.open(rate_file) as hdulist:
+ original = hdulist["SCI"].data
+
+ with fits.open(cleaned_file) as hdulist:
+ cleaned = hdulist["SCI"].data
+ # Determine if rate data is 2D or 3D.
+ if len(cleaned.shape) == 3:
+ dim = 3
+ else:
+ dim = 2
+
+ # Define image limits from the original data.
+ vmin = np.nanpercentile(original, scale)
+ vmax = np.nanpercentile(original, 100 - scale)
+
+ original[np.isnan(original)] = 0
+ cleaned[np.isnan(cleaned)] = 0
+
+ # If the rate data is 3D, determine the integration (slice) to plot.
+ if dim == 3:
+ original = original[slice_index, :, :]
+ cleaned = cleaned[slice_index, :, :]
+ ax[0].set_title(
+ "Original Rate Data: Integration [{},:,:]".format(slice_index), fontsize=15
+ )
+ ax[1].set_title(
+ "Cleaned Rate Data: Integration [{},:,:]".format(slice_index), fontsize=15
+ )
+
+ else:
+ ax[0].set_title("Original Rate Data", fontsize=12)
+ ax[1].set_title("Cleaned Rate Data", fontsize=12)
+
+ # Plot the original rate data.
+ ax[0].set_xlabel("Pixel Column", fontsize=10)
+ ax[0].set_ylabel("Pixel Row", fontsize=10)
+ fig.colorbar(
+ ax[0].imshow(
+ original, cmap=cmap, origin="lower", aspect=aspect, vmin=vmin, vmax=vmax
+ ),
+ ax=ax[0],
+ pad=0.05,
+ shrink=0.7,
+ label="DN/s",
+ )
+
+ # Plot the cleaned data with the same image limits.
+ ax[1].set_xlabel("Pixel Column", fontsize=10)
+ ax[1].set_ylabel("Pixel Row", fontsize=10)
+ fig.colorbar(
+ ax[1].imshow(
+ cleaned, cmap=cmap, origin="lower", aspect=aspect, vmin=vmin, vmax=vmax
+ ),
+ ax=ax[1],
+ pad=0.05,
+ shrink=0.7,
+ label="DN/s",
+ )
+
+ # Plot the relative difference between the original and cleaned data.
+ diff = (original - cleaned) / original
+ diff[~np.isfinite(diff)] = 0
+ vmin_diff = np.nanpercentile(diff, scale)
+ vmax_diff = np.nanpercentile(diff, 100 - scale)
+ ax[2].set_title("Relative Difference (Original - Cleaned Rate Data)", fontsize=12)
+ ax[2].set_xlabel("Pixel Column", fontsize=10)
+ ax[2].set_ylabel("Pixel Row", fontsize=10)
+ fig.colorbar(
+ ax[2].imshow(
+ diff,
+ cmap=cmap,
+ origin="lower",
+ aspect=aspect,
+ vmin=vmin_diff,
+ vmax=vmax_diff,
+ ),
+ ax=ax[2],
+ pad=0.05,
+ shrink=0.7,
+ label="DN/s",
+ )
+
+ # Plot the relative difference again,
+ # this time hiding the outliers so that low-level
+ # background changes can be seen.
+ hide_outliers = np.ma.filled(sigma_clip(diff, masked=True), fill_value=0)
+ vmin_outliers = np.nanpercentile(hide_outliers, scale)
+ vmax_outliers = np.nanpercentile(hide_outliers, 100 - scale)
+ ax[3].set_title(
+ "Relative Difference (Original - Cleaned Rate Data) \n with Outliers Hidden",
+ fontsize=12,
+ )
+ ax[3].set_xlabel("Pixel Column", fontsize=10)
+ ax[3].set_ylabel("Pixel Row", fontsize=10)
+ fig.colorbar(
+ ax[3].imshow(
+ hide_outliers,
+ cmap=cmap,
+ origin="lower",
+ aspect=aspect,
+ vmin=vmin_outliers,
+ vmax=vmax_outliers,
+ ),
+ ax=ax[3],
+ pad=0.05,
+ shrink=0.7,
+ label="DN/s",
+ )
+
+ fig.tight_layout()
+
+
+# Define a helper function for plotting the 1D spectra.
+def plot_spectra(
+ spec_list,
+ ext_num=1,
+ scale_percent=2.0,
+ wavelength_range=None,
+ flux_range=None,
+ xlim_low=None,
+):
+ """
+ Plot the cleaned extracted 1D spectrum and compare against
+ the original (not applying NSClean) extracted 1D spectrum.
+
+ Parameters:
+ ----------
+ spec_list: list
+ List of paths to the FITS files containing original/cleaned 1D extracted data.
+ scale_percent: float
+ Scaling factor applied to determine the intensity range for visualization.
+ ext_num: int
+ Index/extension of the slice to be plotted.
+ The EXTVER header value. The default is 1 for 2D data.
+ wavelength_range: dict
+ Wavelength range (x-axis) {'nrs1': [3.6, 3.65], 'nrs2': [1.65, 1.75]}.
+ flux_range: dict
+ Flux range (y-axis) {'nrs1': [1, 2], 'nrs2': [1, 2]}.
+ xlim_low: int
+ Define a lower wavelength end for the 1D spectrum. Helpful for BOTS data.
+ """
+
+ if wavelength_range is None:
+ wavelength_range = {}
+
+ # Open the FITS files.
+ original_hdul = fits.open(spec_list[0])
+ cleaned_hdul = fits.open(spec_list[1])
+ if len(spec_list) == 3:
+ alternate_hdul = fits.open(spec_list[2])
+ elif len(spec_list) == 4:
+ alternate_hdul = fits.open(spec_list[2])
+ handmod_hdul = fits.open(spec_list[3])
+ else:
+ alternate_hdul = None
+ handmod_hdul = None
+
+ # Find the spectral extension (EXTRACT1D).
+ for extnum in range(len(original_hdul)):
+ hdu = original_hdul[extnum]
+ if hdu.name != "EXTRACT1D":
+ continue
+ slit_name = hdu.header["SLTNAME"]
+
+ if hdu.header["EXTVER"] == ext_num:
+
+ # Plot the original and cleaned spectra together.
+ fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+ plt.suptitle(
+ f"Compare 1D Spectra for {os.path.basename(spec_list[0])};"
+ f"EXP_TYPE/Slit = {slit_name}",
+ fontsize=15,
+ )
+
+ if "nrs1" in spec_list[0] and xlim_low is not None:
+ xlim_low = xlim_low
+ else:
+ xlim_low = 0
+
+ ax[0].step(
+ hdu.data["WAVELENGTH"][xlim_low:],
+ hdu.data["FLUX"][xlim_low:],
+ linewidth=1,
+ label="Original",
+ )
+ ax[0].step(
+ cleaned_hdul[extnum].data["WAVELENGTH"][xlim_low:],
+ cleaned_hdul[extnum].data["FLUX"][xlim_low:],
+ linewidth=1,
+ label="Cleaned",
+ )
+ ax[0].set_xlabel(f"Wavelength ({hdu.header['TUNIT1']})", fontsize=12)
+ ax[0].set_ylabel(f"Flux ({hdu.header['TUNIT2']})", fontsize=12)
+ ax[0].set_title("1D Extracted Spectra", fontsize=12)
+
+ # Plot the difference between the spectra as a ratio.
+ diff = (
+ cleaned_hdul[extnum].data["FLUX"][xlim_low:]
+ / hdu.data["FLUX"][xlim_low:]
+ )
+ ax[1].step(
+ hdu.data["WAVELENGTH"][xlim_low:],
+ diff,
+ linewidth=1,
+ label="Cleaned/Original",
+ )
+ ax[1].set_xlabel(f"Wavelength ({hdu.header['TUNIT1']})", fontsize=12)
+ ax[1].set_ylabel("Cleaned/Original", fontsize=12)
+ ax[1].set_title("Difference After Cleaning", fontsize=12)
+ # print("Average Difference = {}".format(np.nanmean(diff)))
+
+ # If available, also plot the alternate spectra.
+ if alternate_hdul is not None:
+ ax[0].step(
+ alternate_hdul[extnum].data["WAVELENGTH"][xlim_low:],
+ alternate_hdul[extnum].data["FLUX"][xlim_low:],
+ linewidth=1,
+ label="Cleaned (alternate mask)",
+ )
+ diff2 = (
+ alternate_hdul[extnum].data["FLUX"][xlim_low:]
+ / hdu.data["FLUX"][xlim_low:]
+ )
+ ax[1].step(
+ hdu.data["WAVELENGTH"][xlim_low:],
+ diff2,
+ linewidth=1,
+ label="Cleaned (alternate mask)/Original",
+ )
+ if handmod_hdul is not None:
+
+ ax[0].step(
+ handmod_hdul[extnum].data["WAVELENGTH"][xlim_low:],
+ handmod_hdul[extnum].data["FLUX"][xlim_low:],
+ linewidth=1,
+ label="Cleaned (hand-modified mask)",
+ color="Purple"
+ )
+ diff3 = (
+ handmod_hdul[extnum].data["FLUX"][xlim_low:]
+ / hdu.data["FLUX"][xlim_low:]
+ )
+ ax[1].step(
+ hdu.data["WAVELENGTH"][xlim_low:],
+ diff3,
+ linewidth=1,
+ label="Cleaned (hand-modified mask)/Original",
+ )
+
+ # Set the y-range of the plot if needed.
+ if flux_range is not None:
+ for key, y_range in flux_range.items():
+ if key.lower() in spec_list[0].lower() and y_range is not None:
+ if len(y_range) == 2:
+ ax[0].set_ylim(y_range)
+ ax[1].set_ylim(
+ [
+ np.nanpercentile(diff, scale_percent),
+ np.nanpercentile(diff, 100 - scale_percent),
+ ]
+ )
+ # ax[1].set_ylim(0.5, 1.5)
+
+ else:
+ all_flux = [
+ hdu.data["FLUX"],
+ cleaned_hdul[extnum].data["FLUX"],
+ ]
+ y_range_ax0 = [
+ np.nanpercentile(all_flux[0], scale_percent),
+ np.nanpercentile(all_flux[0], 100 - scale_percent),
+ ]
+ ax[0].set_ylim(y_range_ax0)
+ # ax[1].set_ylim(0.5, 1.5)
+
+ ax[1].set_ylim(
+ [
+ np.nanpercentile(diff, scale_percent),
+ np.nanpercentile(diff, 100 - scale_percent),
+ ]
+ )
+
+ else:
+ all_flux = [hdu.data["FLUX"], cleaned_hdul[extnum].data["FLUX"]]
+ y_range_ax0 = [
+ np.nanpercentile(all_flux[0], scale_percent),
+ np.nanpercentile(all_flux[0], 100 - scale_percent),
+ ]
+ ax[0].set_ylim(y_range_ax0)
+ # ax[1].set_ylim(0.5, 1.5)
+
+ ax[1].set_ylim(
+ [
+ np.nanpercentile(diff, scale_percent),
+ np.nanpercentile(diff, 100 - scale_percent),
+ ]
+ )
+
+ # Set the x-range of the plot if needed.
+ for key in wavelength_range:
+ if key in spec_list[0] and wavelength_range[key] is not None:
+ ax[0].set_xlim(wavelength_range[key])
+ ax[1].set_xlim(wavelength_range[key])
+
+ fig.tight_layout()
+
+ ax[0].legend()
+ ax[1].legend()
+
+ original_hdul.close()
+ cleaned_hdul.close()
+ if alternate_hdul is not None:
+ alternate_hdul.close()
+ handmod_hdul.close()