Skip to content

Commit

Permalink
bug fixes. Plotting improvement. Astropy Units added.
Browse files Browse the repository at this point in the history
  • Loading branch information
afaisst committed Jun 11, 2024
1 parent e25d0c5 commit 957d340
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 39 deletions.
4 changes: 2 additions & 2 deletions spectroscopy/code_src/desi_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def DESIBOSS_get_spec(sample_table, search_radius_arcsec):
for stab in sample_table:

## Search
#data_releases = ['DESI-EDR','BOSS-DR16']
data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16']
data_releases = ['DESI-EDR','BOSS-DR16']
#data_releases = ['DESI-EDR','BOSS-DR16','SDSS-DR16'] # we want to use DR17 directly using SDSS query

search_coords = stab["coord"]
dra = (search_radius_arcsec*u.arcsec).to(u.degree)
Expand Down
58 changes: 41 additions & 17 deletions spectroscopy/code_src/mast_functions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import os, sys, io

import numpy as np
from contextlib import redirect_stdout

import astropy.units as u
import astropy.constants as const
from astropy.coordinates import SkyCoord
from astropy.table import Table

Expand All @@ -17,7 +19,7 @@
import matplotlib.pyplot as plt


def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
def JWST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources and groups/stacks them.
This main function runs two sub-functions:
Expand All @@ -33,6 +35,8 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir):
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
Expand All @@ -43,17 +47,18 @@ def JWST_get_spec(sample_table, search_radius_arcsec, datadir):

## Get the spectra
print("Searching and Downloading Spectra... ")
df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir)

df_jwst_all = JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose)
print("done")

## Group
print("Grouping Spectra... ")
df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=True, quickplot=True)

df_jwst_group = JWST_group_spectra(df_jwst_all, verbose=verbose, quickplot=False)
print("done")

return(df_jwst_group)


def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources.
Expand All @@ -66,6 +71,8 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
Expand All @@ -75,6 +82,8 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
'''

## Create directory
if not os.path.exists(datadir):
os.mkdir(datadir)
this_data_dir = os.path.join(datadir , "JWST/")


Expand Down Expand Up @@ -116,9 +125,12 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):

if len(data_products_list_filter) > 0:

## Download
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)

## Download (supress output)
trap = io.StringIO()
with redirect_stdout(trap):
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
if verbose: print(trap.getvalue())


## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
Expand All @@ -129,7 +141,7 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):
idx_cross = np.where(query_results["obsid"] == data_products_list_filter["obsID"][jj] )[0]
tmp = query_results[idx_cross][keys]
tab.add_row( list(tmp[0]) + [data_products_list_filter["productFilename"][jj]] )
print("number in table {}".format(len(tab)))
#print("number in table {}".format(len(tab)))

## Create multi-index object
for jj in range(len(tab)):
Expand All @@ -140,7 +152,7 @@ def JWST_get_spec_helper(sample_table, search_radius_arcsec, datadir):

# open spectrum
filepath = download_results["Local Path"][file_idx[0]]
print(filepath)
#print(filepath)
spec1d = Spectrum1D.read(filepath)

dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
Expand Down Expand Up @@ -222,8 +234,12 @@ def JWST_group_spectra(df, verbose, quickplot):
fluxes_stack = np.nanmedian(fluxes_int,axis=0)
if verbose: print("Units of fluxes for each spectrum: {}".format( ",".join([str(tt) for tt in fluxes_units]) ) )

## Unit conversion to erg/s/cm2/A (note fluxes are nominally in Jy. So have to do the step with dividing by lam^2)
fluxes_stack_cgs = (fluxes_stack * fluxes_units[0]).to(u.erg / u.second / (u.centimeter**2) / u.hertz) * (const.c.to(u.angstrom/u.second)) / (wave_grid.to(u.angstrom)**2)
fluxes_stack_cgs = fluxes_stack_cgs.to(u.erg / u.second / (u.centimeter**2) / u.angstrom)

## Add to data frame
dfsingle = pd.DataFrame(dict(wave=[wave_grid * u.micrometer] , flux=[fluxes_stack * fluxes_units[0]], err=[np.repeat(0,len(fluxes_stack))],
dfsingle = pd.DataFrame(dict(wave=[wave_grid.to(u.micrometer)] , flux=[fluxes_stack_cgs], err=[np.repeat(0,len(fluxes_stack_cgs))],
label=[tab_sel["label"].iloc[0]],
objectid=[tab_sel["objectid"].iloc[0]],
mission=[tab_sel["mission"].iloc[0]],
Expand All @@ -244,7 +260,7 @@ def JWST_group_spectra(df, verbose, quickplot):

return(df_spec)

def HST_get_spec(sample_table, search_radius_arcsec, datadir):
def HST_get_spec(sample_table, search_radius_arcsec, datadir, verbose):
'''
Retrieves HST spectra for a list of sources.
Expand All @@ -257,6 +273,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
datadir : `str`
Data directory where to store the data. Each function will create a
separate data directory (for example "[datadir]/HST/" for HST data).
verbose : `bool`
Verbosity level. Set to True for extra talking.
Returns
-------
Expand All @@ -266,6 +284,8 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
'''

## Create directory
if not os.path.exists(datadir):
os.mkdir(datadir)
this_data_dir = os.path.join(datadir , "HST/")


Expand Down Expand Up @@ -301,7 +321,10 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):
if len(data_products_list_filter) > 0:

## Download
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
trap = io.StringIO()
with redirect_stdout(trap):
download_results = Observations.download_products(data_products_list_filter, download_dir=this_data_dir)
if verbose: print(trap.getvalue())

## Create table
# NOTE: `download_results` has NOT the same order as `data_products_list_filter`. We therefore
Expand All @@ -322,9 +345,10 @@ def HST_get_spec(sample_table, search_radius_arcsec, datadir):

# open spectrum
filepath = download_results["Local Path"][file_idx[0]]
print(filepath)
#print(filepath)
spec1d = Spectrum1D.read(filepath)


# Note: this should be in erg/s/cm2/A and any wavelength unit.
dfsingle = pd.DataFrame(dict(wave=[spec1d.spectral_axis] , flux=[spec1d.flux], err=[np.repeat(0,len(spec1d.flux))],
label=[stab["label"]],
objectid=[stab["objectid"]],
Expand Down
49 changes: 39 additions & 10 deletions spectroscopy/code_src/plot_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
import numpy as np

import astropy.units as u
import astropy.constants as const
from astropy.coordinates import SkyCoord
from astropy.table import Table
from astropy.stats import sigma_clip

import matplotlib.pyplot as plt
import matplotlib as mpl
Expand Down Expand Up @@ -63,10 +65,26 @@ def bin_spectra(wave,flux, bin_factor):

dlam = np.nanmedian(np.diff(wave.value)) * bin_factor

## This is the faster way, however, it outputs warnings because there are empty slices.
#lam_bin = np.arange(np.nanmin(wave.value)+dlam/2, np.nanmax(wave.value)+dlam/2 + dlam , dlam)
#flux_bin = np.asarray( [ np.nanmedian(flux[(wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) )].value) for ll in lam_bin ] )

## This way is a bit slower but we can avoid empty slices.
lam_bin = np.arange(np.nanmin(wave.value)+dlam/2, np.nanmax(wave.value)+dlam/2 + dlam , dlam)
flux_bin = np.asarray( [ np.nanmedian(flux[(wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) )].value) for ll in lam_bin ] )
lam_bins = []
flux_bins = []
for ll in lam_bin:
sel_tmp = np.where( (wave.value >= (ll-dlam/2)) & (wave.value < (ll+dlam/2) ) )[0]
if len(sel_tmp) > 0:
flux_bins.append( np.nanmedian(flux[sel_tmp].value) )
lam_bins.append(ll)

## Add the units back!
lam_bins = np.asarray(lam_bins) * wave[0].unit
flux_bins = np.asarray(flux_bins) * flux[0].unit
dlam = dlam * wave[0].unit

return(lam_bin , flux_bin, dlam)
return(lam_bins , flux_bins, dlam)


def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):
Expand All @@ -89,7 +107,6 @@ def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):
Whether to save the lightcurve figures. If saved, they will be in the "output" directory.
'''


for cc, (objectid, singleobj_df) in enumerate(df_spec.data.groupby('objectid')):

Expand All @@ -107,20 +124,32 @@ def create_figures(df_spec, bin_factor, show_nbr_figures , save_output):

for ff, (filt,filt_df) in enumerate(singleobj_df.groupby('filter')):

print("{} entries for a object {} and filter {}".format(len(filt_df.flux), objectid , filt))
#print("{} entries for a object {} and filter {}".format(len(filt_df.flux), objectid , filt))
for ii in range(len(filt_df.flux)):

wave = filt_df.reset_index().wave[ii]

# get data
wave = filt_df.reset_index().wave[ii].to(u.micrometer)
flux = filt_df.reset_index().flux[ii]
err = filt_df.reset_index().err[ii]
mask = np.where(np.isfinite(err))[0]

# do masking to remove value that are not finite
mask = np.where((np.isfinite(err)) & (flux > 0))[0]
wave = wave[mask]
flux = flux[mask]
err = err[mask]
#ax1.plot(wave/1e4 , flux , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[0]))

#ax1.plot(wave , flux , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]) )
wave_bin , flux_bin, _ = bin_spectra(wave, flux, bin_factor=bin_factor)
ax1.step(wave_bin/1e4 , flux_bin , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]), where="mid")

# do some more clearning (mainly to remove some very low values)
selnotnan = np.where(~np.isnan(flux_bin))[0]
flux_bin = flux_bin[selnotnan]
wave_bin = wave_bin[selnotnan]
clip_mask = sigma_clip(flux_bin , sigma_lower = 3, cenfunc=np.nanmedian , sigma_upper = 5, stdfunc = np.nanstd).mask
wave_bin = wave_bin[~clip_mask]
flux_bin = flux_bin[~clip_mask]

ax1.step(wave_bin.to(u.micrometer) , flux_bin.to(u.erg / u.second / (u.centimeter**2) / u.angstrom) , "-" , label="{} ({})".format(filt, filt_df.reset_index().mission[ii]), where="mid") # all in um and..

ax1.set_title(this_label)
if LOGX: ax1.set_xscale("log")
Expand Down
26 changes: 16 additions & 10 deletions spectroscopy/spectra_generator.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ Andreas Faisst, Jessica Krick, Shoubaneh Hemmati, Troy Raen, Brigitta Sipőcz, D

```python
## IMPORTS
import sys
import sys, os
import numpy as np

import matplotlib.pyplot as plt
Expand Down Expand Up @@ -161,16 +161,17 @@ labels.append("TestJWST")

sample_table = clean_sample(coords, labels, precision=2.0* u.arcsecond , verbose=1)

print("Number of sources in sample table: {}".format(len(sample_table)))
```

### 1.2 Write out your sample to disk

At this point you may wish to write out your sample to disk and reuse that in future work sessions, instead of creating it from scratch again.
At this point you may wish to write out your sample to disk and reuse that in future work sessions, instead of creating it from scratch again. Note that we first check if the `data` directory exists and if not, we will create one.

For the format of the save file, we would suggest to choose from various formats that fully support astropy objects(eg., SkyCoord). One example that works is Enhanced Character-Separated Values or ['ecsv'](https://docs.astropy.org/en/stable/io/ascii/ecsv.html)

```python
if not os.path.exists("./data"):
os.mkdir("./data")
sample_table.write('data/input_sample.ecsv', format='ascii.ecsv', overwrite = True)
```

Expand Down Expand Up @@ -207,15 +208,16 @@ This archive includes spectra taken by

```python
%%time

## Get Keck Spectra (COSMOS only)
df_spec_DEIMOS = KeckDEIMOS_get_spec(sample_table = sample_table, search_radius_arcsec=1)
df_spec.append(df_spec_DEIMOS)
```

```python
%%time
## Get Spitzer IRS Spectra
df_spec_IRS = SpitzerIRS_get_spec(sample_table, search_radius_arcsec=1 , COMBINESPEC=False)
df_spec.append(df_spec_IRS)

```

### 2.2 MAST Archive
Expand All @@ -230,14 +232,14 @@ This archive includes spectra taken by
```python
%%time
## Get Spectra for HST
df_spec_HST = HST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/")
df_spec_HST = HST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
df_spec.append(df_spec_HST)
```

```python
%%time
## Get Spectra for HST
df_jwst = JWST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/")
## Get Spectra for JWST
df_jwst = JWST_get_spec(sample_table , search_radius_arcsec = 0.5, datadir = "./data/", verbose = False)
df_spec.append(df_jwst)
```

Expand All @@ -255,11 +257,11 @@ df_spec.append(df_spec_SDSS)
### 2.4 DESI Archive

This includes DESI spectra. Here, we use the `SPARCL` query. Note that this can also be used
for SDSS searches, however, according to the SPARCL webpage, only up to DR16 is included.
for SDSS searches, however, according to the SPARCL webpage, only up to DR16 is included. Therefore, we will not include SDSS DR16 here (this is treated in the SDSS search above).

```python
%%time
## Get DESI and BOSS and SDSS spectra with SPARCL
## Get DESI and BOSS spectra with SPARCL
df_spec_DESIBOSS = DESIBOSS_get_spec(sample_table, search_radius_arcsec=5)
df_spec.append(df_spec_DESIBOSS)
```
Expand All @@ -277,3 +279,7 @@ create_figures(df_spec = df_spec,
save_output = False,
)
```

<!-- #raw -->

<!-- #endraw -->

0 comments on commit 957d340

Please sign in to comment.