Merge pull request #271 from roman-corgi/combine_images

Combine images
roman-corgi · Dec 20, 2024 · 6208510 · 6208510
2 parents 6be78a8 + 2bee643
commit 6208510
Show file tree

Hide file tree

Showing 8 changed files with 219 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -95,11 +95,11 @@ def example_step(dataset, calib_data, tuneable_arg=1, another_arg="test"):
 
 Inside the function can be nearly anything you want, but the function signature and start/end of the function should follow a few rules.
 
-  * Each function should include a docstring that descibes what the function is doing, what the inputs (including units if appropriate) are and what the outputs (also with units). The dosctrings should be [goggle style docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). 
+  * Each function should include a docstring that describes what the function is doing, what the inputs (including units if appropriate) are and what the outputs (also with units). The dosctrings should be [google style docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). 
   * The input dataset should always be the first input
   * Additional arguments and keywords exist only if you need them--many relevant parameters might already by in Dataset headers. A pipeline step can only have a single argument (the input dataset) if needed.
   * All additional function arguments/keywords should only consist of the following types: int, float, str, or a class defined in corgidrp.Data. 
-    * (Long explaination for the curious: The reason for this is that pipeline steps can be written out as text files. Int/float/str are easily represented succinctly by textfiles. All classes in corgidrp.Data can be created simply by passing in a filepath. Therefore, all pipeline steps have easily recordable arguments for easy reproducibility.)
+    * (Long explanation for the curious: The reason for this is that pipeline steps can be written out as text files. Int/float/str are easily represented succinctly by textfiles. All classes in corgidrp.Data can be created simply by passing in a filepath. Therefore, all pipeline steps have easily recordable arguments for easy reproducibility.)
   * The first line of the function generally should be creating a copy of the dataset (which will be the output dataset). This way, the output dataset is not the same instance as the input dataset. This will make it easier to ensure reproducibility. 
   * The function should always end with updating the header and (typically) the data of the output dataset. The history of running this pipeline step should be recorded in the header. 
 
@@ -133,7 +133,7 @@ End-to-end testing refers to processing data as one would when we get the real d
       - if you need to create mock L1 data, please do it in the script as well. 
       - See the existing tests in `tests/e2e_tests/` for how to structure this script. You should only need to write a single script.
   4. Test that the script runs successfully on your local machine and produces the expected output. Debug as necessary. When appropriate, test your results against those obtained from the II&T/TVAC pipeline using the same input data. 
-  5. Determine how resource intensive your recipe is. There are many ways to do this, but Linux users can run `/usr/bin/time -v python your_e2e_test.py` and Mac userse can run `/usr/bin/time -l -h -p python <your_e2e_test.py>`. Record elapsed (wall clock) time, the percent of CPU this job got (only if parallelization was used), and total memory used (labelled "Maximum resident set size"). 
+  5. Determine how resource intensive your recipe is. There are many ways to do this, but Linux users can run `/usr/bin/time -v python your_e2e_test.py` and Mac users can run `/usr/bin/time -l -h -p python <your_e2e_test.py>`. Record elapsed (wall clock) time, the percent of CPU this job got (only if parallelization was used), and total memory used (labelled "Maximum resident set size"). 
   6. Document your recipe on the "Corgi-DRP Implementation Document" on Confluence (see the big table in Section 2.0). You should fill out an entire row with your recipe. Under addition notes, note if your recipe took significant run time (> 1 minute) and significant memory (> 1 GB). 
   7. PR! 
 
@@ -199,6 +199,9 @@ Before creating a pull request, review the design Principles below. Use the Gith
 
 ## Change Log
 
+**v1.1.2**
+ * Flat field correction marks pixels divided by 0 as bad
+
 **v1.1.1**
  * Fix unit test that wasn't cleaning up environment properly
 

diff --git a/corgidrp/__init__.py b/corgidrp/__init__.py
@@ -3,7 +3,7 @@
 import pathlib
 import configparser
 
-__version__ = "1.1.1"
+__version__ = "1.1.2"
 version = __version__ # temporary backwards compatability 
 
 #### Create a configuration file for the corgidrp if it doesn't exist. 

diff --git a/corgidrp/combine.py b/corgidrp/combine.py
@@ -32,7 +32,7 @@ def combine_images(data_subset, err_subset, dq_subset, collapse, num_frames_scal
     n_samples = np.sum(n_samples, axis=0)
     if collapse.lower() == "mean":
         data_collapse = np.nanmean(data_subset, axis=0)
-        err_collapse = np.sqrt(np.nanmean(err_subset**2, axis=0)) /np.sqrt(n_samples) # not sure if this is correct, but good enough for now
+        err_collapse = np.sqrt(np.nanmean(err_subset**2, axis=0)) /np.sqrt(n_samples) # correct assuming standard error propagation
     elif collapse.lower() == "median":
         data_collapse = np.nanmedian(data_subset, axis=0)
         err_collapse = np.sqrt(np.nanmean(err_subset**2, axis=0)) /np.sqrt(n_samples) * np.sqrt(np.pi/2) # inflate median error
@@ -72,7 +72,7 @@ def combine_subexposures(input_dataset, num_frames_per_group=None, collapse="mea
         num_frames_per_group = len(input_dataset)
 
     if len(input_dataset) % num_frames_per_group != 0:
-        raise ValueError("Input dataset of lenght {0} cannot be grouped in sets of {1}".format(len(input_dataset, num_frames_per_group)))
+        raise ValueError("Input dataset of length {0} cannot be grouped in sets of {1}".format(len(input_dataset), num_frames_per_group))
 
     if collapse.lower() not in ["mean", "median"]:
         raise ValueError("combine_subexposures can only collapse with mean or median")
@@ -82,7 +82,7 @@ def combine_subexposures(input_dataset, num_frames_per_group=None, collapse="mea
     for i in range(num_groups):
         data_subset = np.copy(input_dataset.all_data[num_frames_per_group*i:num_frames_per_group*(i+1)])
         err_subset = np.copy(input_dataset.all_err[num_frames_per_group*i:num_frames_per_group*(i+1)])
-        dq_subset = input_dataset.all_dq[num_frames_per_group*i:num_frames_per_group*(i+1)]
+        dq_subset = np.copy(input_dataset.all_dq[num_frames_per_group*i:num_frames_per_group*(i+1)])
 
         data_collapse, err_collapse, dq_collapse = combine_images(data_subset, err_subset, dq_subset, collapse=collapse, 
                                                                   num_frames_scaling=num_frames_scaling)
@@ -91,7 +91,7 @@ def combine_subexposures(input_dataset, num_frames_per_group=None, collapse="mea
         pri_hdr = input_dataset[num_frames_per_group*i].pri_hdr.copy()
         ext_hdr = input_dataset[num_frames_per_group*i].ext_hdr.copy()
         err_hdr = input_dataset[num_frames_per_group*i].err_hdr.copy()
-        dq_hdr = input_dataset[num_frames_per_group*i].err_hdr.copy()
+        dq_hdr = input_dataset[num_frames_per_group*i].dq_hdr.copy()
         hdulist = input_dataset[num_frames_per_group*i].hdu_list.copy()
         new_image = data.Image(data_collapse, pri_hdr=pri_hdr, ext_hdr=ext_hdr, err=err_collapse, dq=dq_collapse, err_hdr=err_hdr, 
                                 dq_hdr=dq_hdr, input_hdulist=hdulist)   

diff --git a/corgidrp/l2a_to_l2b.py b/corgidrp/l2a_to_l2b.py
@@ -1,5 +1,6 @@
 # A file that holds the functions that transmogrify l2a data to l2b data
 import numpy as np
+import copy
 import corgidrp.data as data
 from corgidrp.darks import build_synthesized_dark
 from corgidrp.detector import detector_areas
@@ -128,6 +129,12 @@ def flat_division(input_dataset, flat_field):
     #Divide by the master flat
     flatdiv_cube = flatdiv_dataset.all_data /  flat_field.data
 
+    #Find where the flat_field is 0 and set a DQ flag: 
+    where_zero = np.where(flat_field.data == 0)
+    flatdiv_dq = copy.deepcopy(flatdiv_dataset.all_dq)
+    for i in range(len(flatdiv_dataset)):
+       flatdiv_dq[i][where_zero] = np.bitwise_or(flatdiv_dataset[i].dq[where_zero], 4)
+
     # propagate the error of the master flat frame
     if hasattr(flat_field, "err"):
         flatdiv_dataset.rescale_error(1/flat_field.data, "FlatField")
@@ -138,7 +145,7 @@ def flat_division(input_dataset, flat_field):
     history_msg = "Flat calibration done using Flat field {0}".format(flat_field.filename)
 
     # update the output dataset with this new flat calibrated data and update the history
-    flatdiv_dataset.update_after_processing_step(history_msg,new_all_data=flatdiv_cube)
+    flatdiv_dataset.update_after_processing_step(history_msg,new_all_data=flatdiv_cube, new_all_dq = flatdiv_dq)
 
     return flatdiv_dataset
 

diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@ def get_requires():
 
 setup(
     name='corgidrp',
-    version="1.1.1",
+    version="1.1.2",
     description='(Roman Space Telescope) CORonaGraph Instrument Data Reduction Pipeline',
     #long_description="",
     #long_description_content_type="text/markdown",

diff --git a/tests/test_combine.py b/tests/test_combine.py
@@ -61,6 +61,37 @@ def test_mean_combine_subexposures():
     assert combined_dataset_2[0].ext_hdr['FILE2'] in ["2.fits", "1.fits", "3.fits", "4.fits"]
     assert combined_dataset_2[0].ext_hdr['FILE3'] in ["2.fits", "1.fits", "3.fits", "4.fits"]
 
+def test_mean_combine_subexposures_without_scaling():
+    """
+    Test mean combine of subexposures for case where num_frames_scaling=False
+    """
+
+    image1 = data.Image(img1, err=err1, dq=dq, pri_hdr = prhd, ext_hdr = exthd)
+    image1.filename = "1.fits"
+    image2 = image1.copy()
+    image2.filename = "2.fits"
+    image3 = image1.copy()
+    image3.filename = "3.fits"
+    image4 = image1.copy()
+    image4.filename = "4.fits"
+
+    dataset = data.Dataset([image1, image2, image3, image4])
+
+    combined_dataset = combine.combine_subexposures(dataset, 2, num_frames_scaling=False)
+
+    # Check that data and error values are not scaled up
+    assert(len(combined_dataset) == 2)
+    assert(np.all(combined_dataset[0].data == 1))
+    assert(np.all(combined_dataset[0].err == pytest.approx(1/np.sqrt(2))))
+    assert(np.all(combined_dataset[0].dq == 0))
+
+    # combine again
+    combined_dataset_2 = combine.combine_subexposures(combined_dataset, 2, num_frames_scaling=False)
+
+    assert(len(combined_dataset_2) == 1)
+    assert(np.all(combined_dataset_2[0].data == 1))
+    assert(np.all(combined_dataset_2[0].err == pytest.approx((1/np.sqrt(2))/np.sqrt(2))))
+    assert(np.all(combined_dataset_2[0].dq == 0))
 
 def test_mean_combine_subexposures_with_bad():
     """
@@ -91,9 +122,11 @@ def test_mean_combine_subexposures_with_bad():
     # the pixel with one bad pixel should have same value but higher error
     assert combined_dataset[0].data[0][0] == 2
     assert combined_dataset[0].err[0][0][0] == pytest.approx(2)
+    assert combined_dataset[0].dq[0][0] == 0 # 0 because one of the two frames had a good value
     # compare against a pixel without any bad pixels
     assert combined_dataset[1].data[0][0] == 2
     assert combined_dataset[1].err[0][0][0] == pytest.approx(np.sqrt(2))
+    assert combined_dataset[1].dq[0][0] == 0
 
     # the pixel with two bad pixels should be nan
     assert np.isnan(combined_dataset[0].data[0][1])
@@ -102,7 +135,7 @@ def test_mean_combine_subexposures_with_bad():
 
 def test_median_combine_subexposures():
     """
-    Test median combine of subexposures. And tests defualt case wihere num_frames_per_group isn't specified. 
+    Test median combine of subexposures. And tests default case where num_frames_per_group isn't specified. 
     """
 
     image1 = data.Image(img1, err=err1, dq=dq, pri_hdr = prhd, ext_hdr = exthd)
@@ -123,6 +156,153 @@ def test_median_combine_subexposures():
     assert(np.all(combined_dataset[0].err == pytest.approx(np.sqrt(2*np.pi))))
     assert(np.all(combined_dataset[0].dq == 0))
 
+def test_median_combine_subexposures_with_bad():
+    """
+    Test median combine of subexposures with bad pixels over multiple combinations
+    """
+    # use copies since we are going to modify their values
+    image1 = data.Image(np.copy(img1), err=np.copy(err1), dq=np.copy(dq), 
+                        pri_hdr = prhd, ext_hdr = exthd)
+    image1.filename = "1.fits"
+    image2 = image1.copy()
+    image2.filename = "2.fits"
+    image3 = image1.copy()
+    image3.filename = "3.fits"
+    image4 = image1.copy()
+    image4.filename = "4.fits"
+
+    # (0,0) has one bad frame
+    image1.dq[0][0] = 1
+    # (0,1) has both pixels bad
+    image1.dq[0][1] = 1
+    image2.dq[0][1] = 1
+
+    dataset = data.Dataset([image1, image2, image3, image4])
+
+    combined_dataset = combine.combine_subexposures(dataset, 2, collapse="median")
+
+    assert(len(combined_dataset) == 2)
+    # the pixel with one bad pixel should have same value but higher error. In both cases the error should be inflated by np.sqrt(np.pi/2) compared to mean error.
+    assert combined_dataset[0].data[0][0] == 2
+    assert combined_dataset[0].err[0][0][0] == pytest.approx(2 * np.sqrt(np.pi/2))
+    assert combined_dataset[0].dq[0][0] == 0 # 0 because one of the two frames had a good value
+    # compare against a pixel without any bad pixels
+    assert combined_dataset[1].data[0][0] == 2
+    assert combined_dataset[1].err[0][0][0] == pytest.approx(np.sqrt(2) * np.sqrt(np.pi/2))
+    assert combined_dataset[1].dq[0][0] == 0
+
+    # the pixel with two bad pixels should be nan
+    assert np.isnan(combined_dataset[0].data[0][1])
+    assert np.isnan(combined_dataset[0].err[0][0][1])
+    assert combined_dataset[0].dq[0][1] == 1
+
+    # combine again
+    combined_dataset_2 = combine.combine_subexposures(combined_dataset, 2, collapse="median")
+
+    assert(len(combined_dataset_2) == 1)
+    assert(np.all(combined_dataset_2[0].data == 4))
+
+    # error for pixel with no bad pixels in original data (i.e. most pixels in data)
+    assert combined_dataset_2[0].err[0][5][0] == pytest.approx(np.pi)
+
+    # error for pixel with one bad pixel in original data (i.e. no nans after first combination)
+    assert combined_dataset_2[0].err[0][0][0] == pytest.approx(0.5 * np.pi * np.sqrt(6))
+
+    # error for pixel with two bad pixels in original data (i.e. 1 nan after first combination)
+    assert combined_dataset_2[0].err[0][0][1] == pytest.approx(np.pi * np.sqrt(2))
+
+    assert(np.all(combined_dataset_2[0].dq == 0))
+
+def test_combine_different_values():
+    """
+    Test whether the function correctly combines different values.
+    """
+
+    # use copies since we are going to modify their values
+    image1 = data.Image(np.copy(img1), err=np.copy(err1), dq=np.copy(dq), 
+                        pri_hdr = prhd, ext_hdr = exthd)
+    image1.filename = "1.fits"
+    image2 = image1.copy()
+    image2.filename = "2.fits"
+    image3 = image1.copy()
+    image3.filename = "3.fits"
+    image4 = image1.copy()
+    image4.filename = "4.fits"
+
+    # (0,0) has different values in each frame. Some are bad pixels.
+    image1.data[0][0] = 5
+    image2.data[0][0] = 6
+    image3.data[0][0] = 9
+    image4.data[0][0] = 19
+
+    image2.dq[0][0] = 1
+    image4.dq[0][0] = 1
+
+    # (0,1) is a bad pixel in every frame
+    image1.dq[0][1] = 1
+    image2.dq[0][1] = 1
+    image3.dq[0][1] = 1
+    image4.dq[0][1] = 1
+
+    dataset = data.Dataset([image1, image2, image3, image4])
+
+    combined_dataset = combine.combine_subexposures(dataset, collapse="median")
+
+    assert(len(combined_dataset) == 1)
+
+    # Most pixels had good values of 1 in all frames
+    assert combined_dataset[0].data[0][2] == 4
+    assert combined_dataset[0].err[0][0][2] == pytest.approx(2*np.sqrt(np.pi/2))
+
+    # (0,0) has a different median value calculated ignoring nans
+    assert combined_dataset[0].data[0][0] == 7 * 4 # median value scaled by number of images
+    assert combined_dataset[0].err[0][0][0] == pytest.approx(2 * np.sqrt(np.pi))
+
+    # (0,1) is a nan
+    assert np.isnan(combined_dataset[0].data[0][1])
+    assert np.isnan(combined_dataset[0].err[0][0][1])
+
+    # the updated bad pixel map only contains one bad pixel (i.e. the pixel for which there were no good values)
+    assert combined_dataset[0].dq[0][0] == 0
+    assert combined_dataset[0].dq[0][1] == 1
+
+def test_not_divisible():
+    """
+    Tests that function correctly fails when the length of the dataset is not divisible by num_frames_per_group.
+    """
+
+    image1 = data.Image(img1, err=err1, dq=dq, pri_hdr = prhd, ext_hdr = exthd)
+    image1.filename = "1.fits"
+    image2 = image1.copy()
+    image2.filename = "2.fits"
+    image3 = image1.copy()
+    image3.filename = "3.fits"
+    image4 = image1.copy()
+    image4.filename = "4.fits"
+
+    dataset = data.Dataset([image1, image2, image3, image4])
+
+    with pytest.raises(ValueError):
+        combined_dataset = combine.combine_subexposures(dataset, 3) # Should fail as 4 % 3 != 0
+
+def test_invalid_collapse():
+    """
+    Tests that function correctly fails when collapse type is not valid.
+    """
+
+    image1 = data.Image(img1, err=err1, dq=dq, pri_hdr = prhd, ext_hdr = exthd)
+    image1.filename = "1.fits"
+    image2 = image1.copy()
+    image2.filename = "2.fits"
+    image3 = image1.copy()
+    image3.filename = "3.fits"
+    image4 = image1.copy()
+    image4.filename = "4.fits"
+
+    dataset = data.Dataset([image1, image2, image3, image4])
+
+    with pytest.raises(ValueError):
+        combined_dataset = combine.combine_subexposures(dataset, collapse="invalid_option")
 
 if __name__ == "__main__":
     test_mean_combine_subexposures()
diff --git a/tests/test_flat_div.py b/tests/test_flat_div.py
@@ -77,9 +77,25 @@ def test_flat_div():
     print("Error estimated:",err_estimated)
     assert(err_flatdiv == pytest.approx(err_estimated, abs = 1e-2))
 
-    print(flatdivided_dataset[0].ext_hdr)
+    # print(flatdivided_dataset[0].ext_hdr)
     corgidrp.track_individual_errors = old_err_tracking
 
+
+    ### Check to make sure DQ gets set when flatfield zero. ###
+
+    ## Injected some 0s. 
+    pixel_list = [[110,120],[50,50], [100,100]]
+    for pixel in pixel_list:
+        flatfield.data[pixel[0],pixel[1]] = 0.
+
+    ## Perform flat division
+    flatdivided_dataset_w_zeros = l2a_to_l2b.flat_division(simflat_dataset, flatfield)
+
+    ## Check that all the pixels that were zeroed out have the DQ flag set to 4. 
+    for pixel in pixel_list:
+        for i in range(len(simdata_filenames)):
+            assert np.bitwise_and(flatdivided_dataset_w_zeros.all_dq[i,pixel[0],pixel[1]],4)
+
 
 if __name__ == "__main__":
     test_flat_div()
diff --git a/tests/test_mean_combine.py b/tests/test_mean_combine.py
@@ -29,7 +29,7 @@
 
 
 def test_mean_im():
-    """Verify method calculates mean image and erorr term."""
+    """Verify method calculates mean image and error term."""
     tol = 1e-13
 
     check_combined_im = np.mean(check_ims, axis=0)