Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: SDSS-V SpectrumList format ambiguity, mwmVisit BOSS load fail #1185

Merged
merged 15 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,20 @@ Bug Fixes
- Fixed ``Spectrum1D.with_flux_unit()`` not converting uncertainty along
with flux unit. [#1181]

- Fixed ``mwmVisit`` SDSS-V ``Spectrum1D`` and ``SpectrumList`` default loader being unable to load files containing only BOSS instrument spectra. [#1185]

- Fixed automatic format detection for SDSS-V ``SpectrumList`` default loaders. [#1185]

Other Changes and Additions
^^^^^^^^^^^^^^^^^^^^^^^^^^^

- Replaced ``LevMarLSQFitter`` with ``TRFLSQFitter`` as the former is no longer
recommended by ``astropy``. [#1180]

- All "Multi" loaders have been merged into the standard SDSS-V ``SpectrumList`` default loaders. [#1185]

- All ``Spectrum1D`` loaders have been removed from SDSS-V default loaders. [#1185]

1.17.0 (2024-10-04)
-------------------

Expand Down
260 changes: 83 additions & 177 deletions specutils/io/default_loaders/sdss_v.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
"""Register reader functions for various spectral formats."""
import warnings
from typing import Optional

import numpy as np
from astropy.units import Unit, Quantity, Angstrom
from astropy.nddata import StdDevUncertainty, InverseVariance
from astropy.io.fits import HDUList, BinTableHDU, ImageHDU
from astropy.io.fits import BinTableHDU, HDUList, ImageHDU
from astropy.nddata import InverseVariance, StdDevUncertainty
from astropy.units import Angstrom, Quantity, Unit
from astropy.utils.exceptions import AstropyUserWarning

from ...spectra import Spectrum1D, SpectrumList
from ..registers import data_loader
from ..parsing_utils import read_fileobj_or_hdulist
from ..registers import data_loader

__all__ = [
"load_sdss_apVisit_1D",
"load_sdss_apVisit_list",
"load_sdss_apStar_1D",
"load_sdss_apStar_list",
"load_sdss_spec_1D",
"load_sdss_spec_list",
"load_sdss_mwm_1d",
"load_sdss_mwm_list",
]

Expand Down Expand Up @@ -119,16 +117,9 @@ def _fetch_flux_unit(hdu):


# APOGEE files
@data_loader(
"SDSS-V apStar",
identifier=apStar_identify,
dtype=Spectrum1D,
priority=10,
extensions=["fits"],
)
def load_sdss_apStar_1D(file_obj, idx: int = 0, **kwargs):
def _load_sdss_apStar_1D(file_obj, idx: int = 0, **kwargs):
"""
Load an apStar file as a Spectrum1D.
Subloader to load an apStar file as a Spectrum1D.

Parameters
----------
Expand Down Expand Up @@ -183,9 +174,10 @@ def load_sdss_apStar_1D(file_obj, idx: int = 0, **kwargs):


@data_loader(
"SDSS-V apStar multi",
"SDSS-V apStar",
identifier=apStar_identify,
dtype=SpectrumList,
force=True,
priority=10,
extensions=["fits"],
)
Expand All @@ -205,63 +197,20 @@ def load_sdss_apStar_list(file_obj, **kwargs):
"""
with read_fileobj_or_hdulist(file_obj, **kwargs) as hdulist:
nvisits = hdulist[0].header.get("NVISITS")
if nvisits <= 1:
raise ValueError(
"Only 1 visit in this file. Use Spectrum1D.read() instead.")
if nvisits < 1:
raise RuntimeError(
"No visits in this file. Use Spectrum1D.read() instead.")
return SpectrumList([
load_sdss_apStar_1D(file_obj, idx=i, **kwargs)
_load_sdss_apStar_1D(file_obj, idx=i, **kwargs)
for i in range(nvisits)
])


@data_loader(
"SDSS-V apVisit",
identifier=apVisit_identify,
dtype=Spectrum1D,
priority=10,
extensions=["fits"],
)
def load_sdss_apVisit_1D(file_obj, **kwargs):
"""
Load an apVisit file as a Spectrum1D.

Parameters
----------
file_obj : str, file-like, or HDUList
FITS file name, file object, or HDUList.

Returns
-------
Spectrum1D
The spectrum contained in the file
"""
flux_unit = Unit("1e-17 erg / (Angstrom cm2 s)")

with read_fileobj_or_hdulist(file_obj, memmap=False, **kwargs) as hdulist:
# Fetch and flatten spectrum parameters
spectral_axis = Quantity(hdulist[4].data.flatten(), unit=Angstrom)
flux_unit = _fetch_flux_unit(hdulist[1])
flux = Quantity(hdulist[1].data.flatten(), unit=flux_unit)
e_flux = StdDevUncertainty(hdulist[2].data.flatten())

# Get metadata and attach bitmask and MJD
meta = dict()
meta["header"] = hdulist[0].header
mask = hdulist[3].data.flatten()
# NOTE: specutils considers 0/False as valid values, simlar to numpy convention
mask = mask != 0

return Spectrum1D(spectral_axis=spectral_axis,
flux=flux,
mask=mask,
uncertainty=e_flux,
meta=meta)


@data_loader(
"SDSS-V apVisit multi",
identifier=apVisit_identify,
dtype=SpectrumList,
force=True,
priority=10,
extensions=["fits"],
)
Expand Down Expand Up @@ -311,46 +260,12 @@ def load_sdss_apVisit_list(file_obj, **kwargs):


# BOSS REDUX products (specLite, specFull, custom coadd files, etc)

@data_loader(
"SDSS-V spec",
identifier=spec_sdss5_identify,
dtype=Spectrum1D,
priority=5,
extensions=["fits"],
)
def load_sdss_spec_1D(file_obj, *args, hdu: Optional[int] = None, **kwargs):
"""
Load a given BOSS spec file as a Spectrum1D object.

Parameters
----------
file_obj : str, file-like, or HDUList
FITS file name, file object, or HDUList..
hdu : int
The specified HDU to load a given spectra from.

Returns
-------
Spectrum1D
The spectrum contained in the file at the specified HDU.
"""
if hdu is None:
# TODO: how should we handle this -- multiple things in file, but the user cannot choose.
hdu = 1 # defaulting to coadd
# raise ValueError("HDU not specified! Please specify a HDU to load.")
elif hdu in [2, 3, 4]:
raise ValueError("Invalid HDU! HDU{} is not spectra.".format(hdu))
with read_fileobj_or_hdulist(file_obj, memmap=False, **kwargs) as hdulist:
# directly load the coadd at HDU1
return _load_BOSS_HDU(hdulist, hdu, **kwargs)


@data_loader(
"SDSS-V spec multi",
identifier=spec_sdss5_identify,
dtype=SpectrumList,
priority=5,
force=True,
priority=10,
extensions=["fits"],
)
def load_sdss_spec_list(file_obj, **kwargs):
Expand Down Expand Up @@ -429,49 +344,9 @@ def _load_BOSS_HDU(hdulist: HDUList, hdu: int, **kwargs):
@data_loader(
"SDSS-V mwm",
identifier=mwm_identify,
dtype=Spectrum1D,
priority=20,
extensions=["fits"],
)
def load_sdss_mwm_1d(file_obj, hdu: Optional[int] = None, **kwargs):
"""
Load an unspecified spec file as a Spectrum1D.

Parameters
----------
file_obj : str, file-like, or HDUList
FITS file name, file object, or HDUList..
hdu : int
Specified HDU to load.

Returns
-------
Spectrum1D
The spectrum contained in the file
"""
with read_fileobj_or_hdulist(file_obj, memmap=False, **kwargs) as hdulist:
# Check if file is empty first
datasums = []
for i in range(1, len(hdulist)):
datasums.append(int(hdulist[i].header.get("DATASUM")))
if (np.array(datasums) == 0).all():
raise ValueError("Specified file is empty.")

# TODO: how should we handle this -- multiple things in file, but the user cannot choose.
if hdu is None:
for i in range(len(hdulist)):
if hdulist[i].header.get("DATASUM") != "0":
hdu = i
break

return _load_mwmVisit_or_mwmStar_hdu(hdulist, hdu, **kwargs)


@data_loader(
"SDSS-V mwm multi",
identifier=mwm_identify,
force=True,
dtype=SpectrumList,
priority=20,
priority=10,
extensions=["fits"],
)
def load_sdss_mwm_list(file_obj, **kwargs):
Expand All @@ -486,11 +361,7 @@ def load_sdss_mwm_list(file_obj, **kwargs):
Returns
-------
SpectrumList
The spectra contained in the file, where:
Spectrum1D
A given spectra of nD flux
None
If there are no spectra for that spectrograph/observatory
The spectra contained in the file, each of 1D flux.
"""
spectra = SpectrumList()
with read_fileobj_or_hdulist(file_obj, memmap=False, **kwargs) as hdulist:
Expand All @@ -499,18 +370,18 @@ def load_sdss_mwm_list(file_obj, **kwargs):
for hdu in range(1, len(hdulist)):
datasums.append(int(hdulist[hdu].header.get("DATASUM")))
if (np.array(datasums) == 0).all():
raise ValueError("Specified file is empty.")
raise RuntimeError("Specified file is empty.")

# Now load file
for hdu in range(1, len(hdulist)):
if hdulist[hdu].header.get("DATASUM") == "0":
# Skip zero data HDU's
# TODO: validate if we want this printed warning or not.
# it might get annoying & fill logs with useless alerts.
print("WARNING: HDU{} ({}) is empty.".format(
hdu, hdulist[hdu].name))
warnings.warn(
"WARNING: HDU{} ({}) is empty.".format(
hdu, hdulist[hdu].name), AstropyUserWarning)
continue
spectra.append(_load_mwmVisit_or_mwmStar_hdu(hdulist, hdu))
spectra_list = _load_mwmVisit_or_mwmStar_hdu(hdulist, hdu)
[spectra.append(spectra_list[i]) for i in range(len(spectra_list))]
return spectra


Expand Down Expand Up @@ -552,7 +423,7 @@ def _load_mwmVisit_or_mwmStar_hdu(hdulist: HDUList, hdu: int, **kwargs):
spectral_axis = Quantity(wavelength, unit=Angstrom)

# Fetch flux, e_flux
# NOTE:: flux info is not written into mwm files
# NOTE: flux info is not written into mwm files
flux_unit = Unit("1e-17 erg / (Angstrom cm2 s)") # NOTE: hardcoded unit
flux = Quantity(hdulist[hdu].data["flux"], unit=flux_unit)
e_flux = InverseVariance(array=hdulist[hdu].data["ivar"])
Expand All @@ -562,14 +433,6 @@ def _load_mwmVisit_or_mwmStar_hdu(hdulist: HDUList, hdu: int, **kwargs):
# NOTE: specutils considers 0/False as valid values, simlar to numpy convention
mask = mask != 0

# collapse shape if 1D spectra in 2D array
# NOTE: this fixes a jdaviz handling bug for 2D of shape 1,
# it could be that it's expected to be parsed this way.
if flux.shape[0] == 1:
flux = flux[0]
e_flux = e_flux[0]
mask = mask[0]

# Create metadata
meta = dict()
meta["header"] = hdulist[0].header
Expand All @@ -578,28 +441,71 @@ def _load_mwmVisit_or_mwmStar_hdu(hdulist: HDUList, hdu: int, **kwargs):
meta["snr"] = np.array(hdulist[hdu].data["snr"])

# Add identifiers (obj, telescope, mjd, datatype)
# TODO: need to see what metadata we're interested in for the MWM files.
meta["telescope"] = hdulist[hdu].data["telescope"]
meta["instrument"] = hdulist[hdu].header.get("INSTRMNT")
try:
try: # get obj if exists
meta["obj"] = hdulist[hdu].data["obj"]
except KeyError:
pass

# choose between mwmVisit/Star via KeyError except
try:
meta["date"] = hdulist[hdu].data["date_obs"]
meta["mjd"] = hdulist[hdu].data["mjd"]
meta['mjd'] = hdulist[hdu].data['mjd']
meta["datatype"] = "mwmVisit"
except KeyError:
meta["mjd"] = (str(hdulist[hdu].data["min_mjd"][0]) + "->" +
str(hdulist[hdu].data["max_mjd"][0]))
meta["min_mjd"] = str(hdulist[hdu].data["min_mjd"][0])
meta["max_mjd"] = str(hdulist[hdu].data["max_mjd"][0])
meta["datatype"] = "mwmStar"
finally:
meta["name"] = hdulist[hdu].name
meta["sdss_id"] = hdulist[hdu].data['sdss_id']

return Spectrum1D(
spectral_axis=spectral_axis,
flux=flux,
uncertainty=e_flux,
mask=mask,
meta=meta,
)
# drop back a list of Spectrum1Ds to unpack
metadicts = _split_mwm_meta_dict(meta)
return [
Spectrum1D(
spectral_axis=spectral_axis,
flux=flux[i],
uncertainty=e_flux[i],
mask=mask[i],
meta=metadicts[i],
) for i in range(flux.shape[0])
]

Copy link
Contributor

@havok2063 havok2063 Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if not already in place, we will need to do a similar thing for the SDSS-V spec loader, but only for any of the spec-full files.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't spec-full visits dispersed across the HDUs past the spall and zall HDU's? Or am I mistaken about the format?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that's right. They're appended at the end as

5  MJD_EXP_60052-00    1 BinTableHDU    273   4648R x 8C   [E, E, E, J, J, E, E, E]   
6  MJD_EXP_60052-01    1 BinTableHDU    271   4648R x 8C   [E, E, E, J, J, E, E, E]   
7  MJD_EXP_60052-02    1 BinTableHDU    273   4648R x 8C   [E, E, E, J, J, E, E, E]   
8  MJD_EXP_60052-03    1 BinTableHDU    273   4648R x 8C   [E, E, E, J, J, E, E, E] 
...  

So, yeah, a change isn't needed here since they are 1d spectra in separate extensions.


def _split_mwm_meta_dict(d):
"""
Metadata sub-loader subfunction for MWM files.

Parameters
----------
d: dict
Initial metadata dictionary.

Returns
-------
dicts: list[dict]
List of dicts with unpacked metadata for length > 1 array objects.

"""
# find the length of entries
N = max(len(v) if isinstance(v, np.ndarray) else 1 for v in d.values())

# create N dictionaries to hold the split results
dicts = [{} for _ in range(N)]

for key, value in d.items():
if isinstance(value, np.ndarray):
# Ensure that the length of the list matches N
if len(value) != N:
# an error case we ignore
continue
# distribute each element to the corresponding metadict
for i in range(N):
dicts[i][key] = value[i]
else:
# if it's a single object, copy it to each metadict
for i in range(N):
dicts[i][key] = value

return dicts
Loading