From ca3777780e676951cd0d454c9743549198649e0a Mon Sep 17 00:00:00 2001
From: Rachel Plesha <rplesha@stsci.edu>
Date: Thu, 21 Mar 2024 22:02:56 -0400
Subject: [PATCH] Adding box link files to run notebooks independently

---
 .../01_niriss_wfss_image2_image3.ipynb        | 34 ++++++++-
 .../02_niriss_wfss_spec2.ipynb                | 73 ++++++++++++++-----
 2 files changed, 86 insertions(+), 21 deletions(-)
diff --git a/notebooks/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb b/notebooks/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb
index 953087769..5674bbb62 100644
--- a/notebooks/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb
+++ b/notebooks/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb
@@ -11,7 +11,7 @@
     "\n",
     "**Use case**: The default parameters for the pipeline do not extract the expected sources, so custom parameters need to be set to obtain new combined image and source catalog.<br>\n",
     "**Data**: JWST/NIRISS images and spectra from program 2079 observation 004. This should be stored in a single directory `data`, and can be downloaded from the previous notebook, 00_niriss_mast_query_data_setup.ipynb.<br>\n",
-    "**Tools**: astropy, crds, glob, jdaviz, json, jwst, matplotlib, numpy, os, pandas, warnings<br>\n",
+    "**Tools**: astropy, crds, glob, jdaviz, json, jwst, matplotlib, numpy, os, pandas, urllib, warnings, zipfile<br>\n",
     "**Cross-instrument**: NIRISS<br>\n",
     "\n",
     "**Content**\n",
@@ -73,6 +73,8 @@
     "import glob\n",
     "import json\n",
     "import warnings\n",
+    "import urllib\n",
+    "import zipfile\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "\n",
@@ -138,6 +140,36 @@
     "The association files expect that 1) all of the data are in the same directory and 2) that you are performing the pipeline call also in that directory. Because of that, we need to change into the data directory to run the imaging pipelines."
    ]
   },
+  {
+   "cell_type": "raw",
+   "id": "e34b0097-47df-468e-ad12-a7ca1d5a9565",
+   "metadata": {},
+   "source": [
+    "# if you have not downloaded the data from notebook 00, run this cell in \"code\" mode rather than \"raw\"\n",
+    "\n",
+    "# Download uncalibrated data from Box into the data directory:\n",
+    "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/niriss_wfss_advanced_01_input.zip'\n",
+    "boxfile = os.path.basename(boxlink)\n",
+    "urllib.request.urlretrieve(boxlink, boxfile)\n",
+    "\n",
+    "zf = zipfile.ZipFile(boxfile, 'r')\n",
+    "zf.extractall(path=data_dir)\n",
+    "\n",
+    "# move the files downloaded from the box file into the top level data directory\n",
+    "box_download_dir = os.path.join(data_dir, boxfile.split('.zip')[0])\n",
+    "for filename in glob.glob(os.path.join(box_download_dir, '*')):\n",
+    "    if '.csv' in filename:\n",
+    "        # move to the current directory\n",
+    "        os.rename(filename, os.path.basename(filename))\n",
+    "    else:\n",
+    "        # move to the data directory \n",
+    "        os.rename(filename, os.path.join(data_dir, os.path.basename(filename)))\n",
+    "\n",
+    "# remove unnecessary files now\n",
+    "os.remove(boxfile)\n",
+    "os.rmdir(box_download_dir)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb b/notebooks/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb
index 85965dc61..f3e052f1e 100644
--- a/notebooks/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb
+++ b/notebooks/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb
@@ -11,7 +11,7 @@
     "\n",
     "**Use case**: After creating a custom source catalog, spec2 should be run on the dispersed WFSS images.<br>\n",
     "**Data**: JWST/NIRISS images and spectra from program 2079 obs 004. This should be stored in a single directory `data`, and can be downloaded from the notebook 00_niriss_mast_query_data_setup.ipynb.<br>\n",
-    "**Tools**: astropy, crds, glob, jdaviz, json, jwst, matplotlib, numpy, os, pandas, shutil<br>\n",
+    "**Tools**: astropy, crds, glob, jdaviz, json, jwst, matplotlib, numpy, os, pandas, shutil, urllib, zipfile<br>\n",
     "**Cross-instrument**: NIRISS<br>\n",
     "\n",
     "**Content**\n",
@@ -71,6 +71,8 @@
     "import json\n",
     "import os\n",
     "import shutil\n",
+    "import urllib\n",
+    "import zipfile\n",
     "import numpy as np\n",
     "import pandas as pd\n",
     "\n",
@@ -137,12 +139,53 @@
    "outputs": [],
    "source": [
     "data_dir = 'data'\n",
-    "custom_run_spec2 = 'custom_spec2'\n",
-    "custom_run_image3 = os.path.join(data_dir, 'custom_image3_calibrated') # results of custom image3 calibration\n",
+    "if not os.path.exists(data_dir):\n",
+    "    os.mkdir(data_dir)\n",
+    "    \n",
+    "custom_run_spec2 = 'custom_spec2' # saving files here in this notebook\n",
+    "custom_run_image3 = 'custom_image3_calibrated' # results of custom image3 calibration\n",
+    "\n",
+    "# if the directories dont't exist yet, make it\n",
+    "for custom_dir in [custom_run_spec2, custom_run_image3]:\n",
+    "    if not os.path.exists(os.path.join(data_dir, custom_dir)):\n",
+    "        os.mkdir(os.path.join(data_dir, custom_dir))"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "id": "e3611a58-e99e-47aa-b190-b821eb60e47f",
+   "metadata": {},
+   "source": [
+    "# if you have not downloaded the data from notebook 00 or have not run notebook 01, run this cell in \"code\" mode rather than \"raw\"\n",
+    "\n",
+    "# Download uncalibrated data from Box into the data directory:\n",
+    "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/niriss_wfss_advanced_02_input.zip'\n",
+    "boxfile = os.path.basename(boxlink)\n",
+    "urllib.request.urlretrieve(boxlink, boxfile)\n",
+    "\n",
+    "zf = zipfile.ZipFile(boxfile, 'r')\n",
+    "zf.extractall(path=data_dir)\n",
+    "\n",
+    "# move the files downloaded from the box file into the top level data directory\n",
+    "box_download_dir = os.path.join(data_dir, boxfile.split('.zip')[0])\n",
     "\n",
-    "# if the custom spec2 run directory doesn't exist yet, make it\n",
-    "if not os.path.exists(os.path.join(data_dir, custom_run_spec2)):\n",
-    "    os.mkdir(os.path.join(data_dir, custom_run_spec2))"
+    "for filename in glob.glob(os.path.join(box_download_dir, '*')):\n",
+    "    if '.csv' in filename:\n",
+    "        # move to the current directory\n",
+    "        os.rename(filename, os.path.basename(filename))\n",
+    "    elif '_segm.fits' in filename or '_cat.ecsv' in filename:\n",
+    "        # move the image2 products to the appropriate directory\n",
+    "        os.rename(filename, os.path.join(data_dir, custom_run_spec2, os.path.basename(filename)))\n",
+    "    elif '_i2d.fits' in filename:\n",
+    "        # move image3 products to their directory, too\n",
+    "        os.rename(filename, os.path.join(data_dir, custom_run_image3, os.path.basename(filename)))\n",
+    "    else:\n",
+    "        # move to the data directory \n",
+    "        os.rename(filename, os.path.join(data_dir, os.path.basename(filename)))\n",
+    "        \n",
+    "# remove unnecessary files now\n",
+    "os.remove(boxfile)\n",
+    "os.rmdir(box_download_dir)"
    ]
   },
   {
@@ -198,8 +241,8 @@
    "outputs": [],
    "source": [
     "# copy all of the necessary image3 output files\n",
-    "cats = glob.glob(os.path.join(custom_run_image3, '*source*_cat.ecsv')) # copy both the source-match and source118 catalogs\n",
-    "segm = glob.glob(os.path.join(custom_run_image3, '*_segm.fits'))\n",
+    "cats = glob.glob(os.path.join(data_dir, custom_run_image3, '*source*_cat.ecsv')) # copy both the source-match and source118 catalogs\n",
+    "segm = glob.glob(os.path.join(data_dir, custom_run_image3, '*_segm.fits'))\n",
     "\n",
     "for image3_file in cats + segm:\n",
     "    if os.path.exists(image3_file):\n",
@@ -912,7 +955,7 @@
    "id": "c5dd5d4c-5ef0-4690-a4da-a795c295a313",
    "metadata": {},
    "source": [
-    "Now when we calibrate everything, it should take a lot less time because there are a limited number of sources."
+    "Now when we calibrate everything, for a single file it should take a lot less time because there are a limited number of sources. However, we will calibrate all of the files in this visit, so this cell might take a bit of time to run."
    ]
   },
   {
@@ -992,16 +1035,6 @@
     "##### Look at all of the sources for a single file"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "59b1458a-1544-4593-aea9-987764c0ac9a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ls new_catalog_calibrated/*jw02079004002_11101_00002*"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1095,7 +1128,7 @@
     "asn_data = json.load(open(fits.getval(x1d_file, 'ASNTABLE')))\n",
     "i2d_name = asn_data['products'][0]['members'][1]['expname']\n",
     "cat_name = asn_data['products'][0]['members'][2]['expname']\n",
-    "with fits.open(os.path.join('../../', custom_run_image3, i2d_name)) as i2d:\n",
+    "with fits.open(os.path.join('../../', data_dir, custom_run_image3, i2d_name)) as i2d:\n",
     "    ax2.imshow(i2d[1].data, origin='lower', aspect='auto', vmin=0, vmax=np.nanmax(i2d[1].data)*0.01)\n",
     "    ax2.set_title(f\"{os.path.basename(i2d_name).split('_i2d')[0]}\")\n",
     "\n",