bug fixes. Plotting improvement. Astropy Units added.

nasa-fornax · Jun 11, 2024 · 957d340 · 957d340
1 parent e25d0c5
commit 957d340
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 39 deletions.
diff --git a/spectroscopy/code_src/desi_functions.py b/spectroscopy/code_src/desi_functions.py
@@ -47,8 +47,8 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
     for stab in sample_table:
 
         ## Search
-        #data_releases = ['DESI-EDR','BOSS-DR16']
-        data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
+        data_releases = ['DESI-EDR','BOSS-DR16']
+        #data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16'] # we want to use DR17 directly using SDSS query
 
         search_coords = stab["coord"]
         dra = (search_radius_arcsec*u.arcsec).to(u.degree)

diff --git a/spectroscopy/code_src/mast_functions.py b/spectroscopy/code_src/mast_functions.py
@@ -1,8 +1,10 @@
-import os
+import os, sys, io
 
 import numpy as np
+from contextlib import redirect_stdout
 
 import astropy.units as u
+import astropy.constants as const
 from astropy.coordinates import SkyCoord
 from astropy.table import Table
 
@@ -17,7 +19,7 @@
 import matplotlib.pyplot as plt
 
 
-def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
+def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
     '''
     Retrieves HST spectra for a list of sources and groups/stacks them.
     This main function runs two sub-functions:
@@ -33,6 +35,8 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
     datadir : `str`
         Data directory where to store the data. Each function will create a
         separate data directory (for example "[datadir]/HST/" for HST data).
+    verbose : `bool`
+        Verbosity level. Set to True for extra talking.
 
     Returns
     -------
@@ -43,17 +47,18 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
 
     ## Get the spectra
     print("Searching and Downloading Spectra... ")
-    df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir)
-
+    df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose)
+    print("done")
+
     ## Group
     print("Grouping Spectra... ")
-    df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=True, quickplot=True)
-
+    df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=verbose, quickplot=False)
+    print("done")
 
     return(df_jwst_group)
 
 
-def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
+def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
     '''
     Retrieves HST spectra for a list of sources.
 
@@ -66,6 +71,8 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
     datadir : `str`
         Data directory where to store the data. Each function will create a
         separate data directory (for example "[datadir]/HST/" for HST data).
+    verbose : `bool`
+        Verbosity level. Set to True for extra talking.
 
     Returns
     -------
@@ -75,6 +82,8 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
     '''
 
     ## Create directory
+    if not os.path.exists(datadir):
+        os.mkdir(datadir)
     this_data_dir = os.path.join(datadir , "JWST/")
 
 
@@ -116,9 +125,12 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
 
             if len(data_products_list_filter) > 0:
 
-                ## Download
-                download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
-
+                ## Download (supress output)
+                trap = io.StringIO()
+                with redirect_stdout(trap):
+                    download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
+                if verbose: print(trap.getvalue())
+
 
                 ## Create table
                 # NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
@@ -129,7 +141,7 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
                     idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
                     tmp = query_results[idx_cross][keys]
                     tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
-                print("number in table {}".format(len(tab)))
+                #print("number in table {}".format(len(tab)))
 
                 ## Create multi-index object
                 for jj in range(len(tab)):
@@ -140,7 +152,7 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
 
                     # open spectrum
                     filepath = download_results["Local Path"][file_idx[0]]
-                    print(filepath)
+                    #print(filepath)
                     spec1d = Spectrum1D.read(filepath)
 
                     dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
@@ -222,8 +234,12 @@ def JWST_group_spectra(df, verbose, quickplot):
                 fluxes_stack = np.nanmedian(fluxes_int,axis=0)
                 if verbose: print("Units of fluxes for each spectrum: {}".format( ",".join([str(tt) for tt in fluxes_units]) ) )
 
+                ## Unit conversion to erg/s/cm2/A (note fluxes are nominally in Jy. So have to do the step with dividing by lam^2)
+                fluxes_stack_cgs = (fluxes_stack * fluxes_units[0]).to(u.erg / u.second / (u.centimeter**2) / u.hertz) * (const.c.to(u.angstrom/u.second)) / (wave_grid.to(u.angstrom)**2)
+                fluxes_stack_cgs = fluxes_stack_cgs.to(u.erg / u.second / (u.centimeter**2) / u.angstrom)
+
                 ## Add to data frame
-                dfsingle = pd.DataFrame(dict(wave=[wave_grid * u.micrometer] , flux=[fluxes_stack * fluxes_units[0]], err=[np.repeat(0,len(fluxes_stack))],
+                dfsingle = pd.DataFrame(dict(wave=[wave_grid.to(u.micrometer)] , flux=[fluxes_stack_cgs], err=[np.repeat(0,len(fluxes_stack_cgs))],
                                              label=[tab_sel["label"].iloc[0]],
                                              objectid=[tab_sel["objectid"].iloc[0]],
                                              mission=[tab_sel["mission"].iloc[0]],
@@ -244,7 +260,7 @@ def JWST_group_spectra(df, verbose, quickplot):
 
     return(df_spec)
 
-def HST_get_spec(sample_table, search_radius_arcsec, datadir):
+def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
     '''
     Retrieves HST spectra for a list of sources.
 
@@ -257,6 +273,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
     datadir : `str`
         Data directory where to store the data. Each function will create a
         separate data directory (for example "[datadir]/HST/" for HST data).
+    verbose : `bool`
+        Verbosity level. Set to True for extra talking.
 
     Returns
     -------
@@ -266,6 +284,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
     '''
 
     ## Create directory
+    if not os.path.exists(datadir):
+        os.mkdir(datadir)
     this_data_dir = os.path.join(datadir , "HST/")
 
 
@@ -301,7 +321,10 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
             if len(data_products_list_filter) > 0:
 
                 ## Download
-                download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
+                trap = io.StringIO()
+                with redirect_stdout(trap):
+                    download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
+                if verbose: print(trap.getvalue())
 
                 ## Create table
                 # NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
@@ -322,9 +345,10 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
 
                     # open spectrum
                     filepath = download_results["Local Path"][file_idx[0]]
-                    print(filepath)
+                    #print(filepath)
                     spec1d = Spectrum1D.read(filepath)
-
+
+                    # Note: this should be in erg/s/cm2/A and any wavelength unit.
                     dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
                                                  label=[stab["label"]],
                                                  objectid=[stab["objectid"]],

diff --git a/spectroscopy/code_src/plot_functions.py b/spectroscopy/code_src/plot_functions.py
@@ -3,8 +3,10 @@
 import numpy as np
 
 import astropy.units as u
+import astropy.constants as const
 from astropy.coordinates import SkyCoord
 from astropy.table import Table
+from astropy.stats import sigma_clip
 
 import matplotlib.pyplot as plt
 import matplotlib as mpl
@@ -63,10 +65,26 @@ def bin_spectra(wave,flux, bin_factor):
 
     dlam = np.nanmedian(np.diff(wave.value)) * bin_factor
 
+    ## This is the faster way, however, it outputs warnings because there are empty slices.
+    #lam_bin = np.arange(np.nanmin(wave.value)+dlam/2, np.nanmax(wave.value)+dlam/2 + dlam , dlam)
+    #flux_bin = np.asarray( [ np.nanmedian(flux[(wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) )].value) for ll in lam_bin ] )
+
+    ## This way is a bit slower but we can avoid empty slices.
     lam_bin = np.arange(np.nanmin(wave.value)+dlam/2, np.nanmax(wave.value)+dlam/2 + dlam , dlam)
-    flux_bin = np.asarray( [ np.nanmedian(flux[(wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) )].value) for ll in lam_bin ] )
+    lam_bins = []
+    flux_bins = []
+    for ll in lam_bin:
+        sel_tmp = np.where( (wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) ) )[0]
+        if len(sel_tmp) > 0:
+            flux_bins.append( np.nanmedian(flux[sel_tmp].value) )
+            lam_bins.append(ll)
+
+    ## Add the units back!
+    lam_bins = np.asarray(lam_bins) * wave[0].unit
+    flux_bins = np.asarray(flux_bins) * flux[0].unit
+    dlam = dlam * wave[0].unit
 
-    return(lam_bin , flux_bin, dlam)
+    return(lam_bins , flux_bins, dlam)
 
 
 def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):
@@ -89,7 +107,6 @@ def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):
         Whether to save the lightcurve figures. If saved, they will be in the "output" directory.
     
     '''
-
 
     for cc, (objectid, singleobj_df) in enumerate(df_spec.data.groupby('objectid')):
 
@@ -107,20 +124,32 @@ def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):
 
         for ff, (filt,filt_df) in enumerate(singleobj_df.groupby('filter')):
 
-            print("{} entries for a object {} and filter {}".format(len(filt_df.flux), objectid , filt))
+            #print("{} entries for a object {} and filter {}".format(len(filt_df.flux), objectid , filt))
             for ii in range(len(filt_df.flux)):
-
-                wave = filt_df.reset_index().wave[ii]
+
+                # get data
+                wave = filt_df.reset_index().wave[ii].to(u.micrometer)
                 flux = filt_df.reset_index().flux[ii]
                 err = filt_df.reset_index().err[ii]
-                mask = np.where(np.isfinite(err))[0]
+
+                # do masking to remove value that are not finite
+                mask = np.where((np.isfinite(err)) & (flux > 0))[0]
                 wave = wave[mask]
                 flux = flux[mask]
                 err = err[mask]
-                
-                #ax1.plot(wave/1e4 , flux , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[0]))
+
+                #ax1.plot(wave , flux , "-"  , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]) )
                 wave_bin , flux_bin, _ = bin_spectra(wave, flux, bin_factor=bin_factor)
-                ax1.step(wave_bin/1e4 , flux_bin , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]), where="mid")
+
+                # do some more clearning (mainly to remove some very low values)
+                selnotnan = np.where(~np.isnan(flux_bin))[0]
+                flux_bin = flux_bin[selnotnan]
+                wave_bin = wave_bin[selnotnan]
+                clip_mask = sigma_clip(flux_bin , sigma_lower = 3, cenfunc=np.nanmedian , sigma_upper = 5, stdfunc = np.nanstd).mask
+                wave_bin = wave_bin[~clip_mask]
+                flux_bin = flux_bin[~clip_mask]
+
+                ax1.step(wave_bin.to(u.micrometer) , flux_bin.to(u.erg / u.second / (u.centimeter**2) / u.angstrom) , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]), where="mid") # all in um and..
 
         ax1.set_title(this_label)
         if LOGX: ax1.set_xscale("log")

diff --git a/spectroscopy/spectra_generator.md b/spectroscopy/spectra_generator.md
@@ -101,7 +101,7 @@ Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, D
 
 ```python
 ## IMPORTS
-import sys
+import sys, os
 import numpy as np
 
 import matplotlib.pyplot as plt
@@ -161,16 +161,17 @@ labels.append("TestJWST")
 
 sample_table = clean_sample(coords, labels, precision=2.0* u.arcsecond , verbose=1)
 
-print("Number of sources in sample table: {}".format(len(sample_table)))
 ```
 
 ### 1.2 Write out your sample to disk
 
-At this point you may wish to write out your sample to disk and reuse that in future work sessions, instead of creating it from scratch again.
+At this point you may wish to write out your sample to disk and reuse that in future work sessions, instead of creating it from scratch again. Note that we first check if the `data` directory exists and if not, we will create one.
 
 For the format of the save file, we would suggest to choose from various formats that fully support astropy objects(eg., SkyCoord).  One example that works is Enhanced Character-Separated Values or ['ecsv'](https://docs.astropy.org/en/stable/io/ascii/ecsv.html)
 
 ```python
+if not os.path.exists("./data"):
+    os.mkdir("./data")
 sample_table.write('data/input_sample.ecsv', format='ascii.ecsv', overwrite = True)
 ```
 
@@ -207,15 +208,16 @@ This archive includes spectra taken by
 
 ```python
 %%time
-
 ## Get Keck Spectra (COSMOS only)
 df_spec_DEIMOS = KeckDEIMOS_get_spec(sample_table = sample_table, search_radius_arcsec=1)
 df_spec.append(df_spec_DEIMOS)
+```
 
+```python
+%%time
 ## Get Spitzer IRS Spectra
 df_spec_IRS = SpitzerIRS_get_spec(sample_table, search_radius_arcsec=1 , COMBINESPEC=False)
 df_spec.append(df_spec_IRS)
-
 ```
 
 ### 2.2 MAST Archive
@@ -230,14 +232,14 @@ This archive includes spectra taken by
 ```python
 %%time
 ## Get Spectra for HST
-df_spec_HST = HST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/")
+df_spec_HST = HST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
 df_spec.append(df_spec_HST)
 ```
 
 ```python
 %%time
-## Get Spectra for HST
-df_jwst = JWST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/")
+## Get Spectra for JWST
+df_jwst = JWST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
 df_spec.append(df_jwst)
 ```
 
@@ -255,11 +257,11 @@ df_spec.append(df_spec_SDSS)
 ### 2.4 DESI Archive
 
 This includes DESI spectra. Here, we use the `SPARCL` query. Note that this can also be used
-for SDSS searches, however, according to the SPARCL webpage, only up to DR16 is included.
+for SDSS searches, however, according to the SPARCL webpage, only up to DR16 is included. Therefore, we will not include SDSS DR16 here (this is treated in the SDSS search above).
 
 ```python
 %%time
-## Get DESI and BOSS and SDSS spectra with SPARCL
+## Get DESI and BOSS spectra with SPARCL
 df_spec_DESIBOSS = DESIBOSS_get_spec(sample_table, search_radius_arcsec=5)
 df_spec.append(df_spec_DESIBOSS)
 ```
@@ -277,3 +279,7 @@ create_figures(df_spec = df_spec,
              save_output = False,
              )
 ```
+
+<!-- #raw -->
+
+<!-- #endraw -->