-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Auto-configuration identification #2124
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,12 +2,22 @@ | |
import pathlib | ||
|
||
import astropy.io | ||
from astropy.io import registry, fits | ||
from astropy.nddata import CCDData | ||
from astropy.wcs import WCS | ||
|
||
from specutils.io.registers import identify_spectrum_format | ||
from specutils import SpectrumList | ||
from specutils import Spectrum1D, SpectrumList, SpectrumCollection | ||
from stdatamodels import asdf_in_fits | ||
|
||
from jdaviz.core.config import list_configurations | ||
|
||
__all__ = ['guess_dimensionality', 'get_valid_format', 'identify_data'] | ||
__all__ = [ | ||
'guess_dimensionality', | ||
'get_valid_format', | ||
'identify_data', | ||
'identify_helper' | ||
] | ||
|
||
# create a default file format to configuration mapping | ||
default_mapping = {'JWST x1d': 'specviz', 'JWST s2d': 'specviz2d', | ||
|
@@ -112,3 +122,153 @@ def identify_data(filename, current=None): | |
raise ValueError('Mismatch between input file format and loaded configuration.') | ||
|
||
return valid_format, config | ||
|
||
|
||
def _get_wcs(filename, header): | ||
""" | ||
Get gwcs.wcs.WCS or astropy.wcs.WCS from FITS file. | ||
""" | ||
try: | ||
with asdf_in_fits.open(filename) as af: | ||
wcs = af.tree['meta']['wcs'] | ||
|
||
# if the file doesn't have ASDF-in-FITS, then | ||
# the 'meta' key doesn't exist, yielding a KeyError: | ||
except KeyError: | ||
# fall back on using astropy WCS: | ||
wcs = WCS(header) | ||
|
||
return wcs | ||
|
||
|
||
def identify_helper(filename, ext=1): | ||
""" | ||
Guess the appropriate viz helper for a data file. | ||
|
||
Parameters | ||
---------- | ||
filename : str (path-like) | ||
Name for a local data file. | ||
ext : int | ||
Extension from the FITS file. | ||
|
||
Returns | ||
------- | ||
helper_name : str | ||
Name of the best-guess helper for ``filename``. | ||
""" | ||
supported_dtypes = [ | ||
Spectrum1D, | ||
SpectrumList, | ||
SpectrumCollection, | ||
CCDData | ||
] | ||
|
||
if filename.lower().endswith('asdf'): | ||
# ASDF files are only supported in jdaviz for | ||
# Roman WFI 2D images, so suggest imviz: | ||
return 'imviz' | ||
|
||
header = fits.getheader(filename, ext=ext) | ||
data = fits.getdata(filename, ext=ext) | ||
wcs = _get_wcs(filename, header) | ||
has_spectral_axis = 'spectral' in wcs.world_axis_object_classes | ||
|
||
n_axes = ( | ||
int(has_spectral_axis) + | ||
|
||
sum([component[0] in ['celestial', 'angle'] | ||
for component in wcs.world_axis_object_components]) - | ||
|
||
# remove any slit_frame axis from the count | ||
(0 if not hasattr(wcs, 'available_frames') else | ||
int('slit_frame' in wcs.available_frames)) | ||
) | ||
|
||
# use astropy to recognize some data formats: | ||
possible_formats = {} | ||
for cls in supported_dtypes: | ||
fmt = registry.identify_format( | ||
'read', cls, filename, None, {}, {} | ||
) | ||
if fmt: | ||
possible_formats[cls] = fmt | ||
|
||
# If CCDData is the only match: | ||
if len(possible_formats) == 1: | ||
only_key, only_value = possible_formats.popitem() | ||
if only_key == CCDData: | ||
# could be 2D spectrum or 2D image. break tie with WCS: | ||
if has_spectral_axis: | ||
if n_axes > 1: | ||
return 'specviz2d' | ||
return 'specviz' | ||
elif not isinstance(data, fits.BinTableHDU): | ||
return 'imviz' | ||
|
||
# Ensure specviz is chosen when ``data`` is a table or recarray | ||
# and there's a "known" spectral column name: | ||
if isinstance(data, (fits.BinTableHDU, fits.fitsrec.FITS_rec)): | ||
# now catch spectra in FITS tables, looking for | ||
# columns with "wave" or "flux" in the names: | ||
table_columns = [getattr(col, 'name', col).lower() for col in data.columns] | ||
|
||
# these are "known" prefixes for column names | ||
# in FITS tables of spectral observations | ||
known_spectral_columns = [ | ||
'wave', | ||
'flux' | ||
] | ||
Comment on lines
+219
to
+221
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I worry about hard-coding these kinds of column names, but can't think of a more general way to do this. I suggested we could try to auto-identify wavelength or flux columns based on any attached units on the column. Maybe that's out of scope for this PR though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think so. Let's get v0.1 done and iterate. |
||
|
||
# this list of bools indicates any | ||
# spectral column names found: | ||
found_spectral_columns = [ | ||
found_col.startswith(known_col) | ||
for known_col in known_spectral_columns | ||
for found_col in table_columns | ||
] | ||
|
||
# if at least one spectral column is found: | ||
if sum(found_spectral_columns): | ||
return 'specviz' | ||
|
||
# If the data could be spectral: | ||
for cls in [Spectrum1D, SpectrumList]: | ||
if cls in possible_formats.keys(): | ||
recognized_spectrum_format = possible_formats[cls][0].lower() | ||
|
||
# first catch known JWST spectrum types: | ||
if (n_axes == 3 and | ||
recognized_spectrum_format.find('s3d') > -1): | ||
return 'cubeviz' | ||
elif (n_axes == 2 and | ||
recognized_spectrum_format.find('x1d') > -1): | ||
return 'specviz' | ||
Comment on lines
+242
to
+246
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it's worth generalizing these now to something non-JWST specific. For example, this function already fails with MaNGA cubes files, which is maybe a tad bittersweet since Jdaviz was originally developed against those files. :( And for MAST to switch to this function, we'd need this generalized before we can start any work on our end. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that there's a better way to do this! Stepping away for a few days and seeing this comment makes it clearer. This PR relies on But there is already a registry which identifies JWST, MaNGA, and many others: the specutils registry. The easy solution to all this is to also try the specutils registry. I'll do this today! 🤦🏻 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Support for MaNGA introduced in 7b8d713. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the update. The MaNGA cube and rss files do seem to now correctly identify as cubeviz and specviz, which is great. Since JWST formats are covered by the |
||
|
||
# we intentionally don't choose specviz2d for | ||
# data recognized as 's2d' as we did with the cases above, | ||
# because 2D data products could be 2D spectra *or* 2D images | ||
# that the registry recognizes as s2d. | ||
|
||
# Use WCS to break the tie below: | ||
elif n_axes == 2: | ||
if has_spectral_axis: | ||
return 'specviz2d' | ||
return 'imviz' | ||
|
||
elif n_axes == 1: | ||
return 'specviz' | ||
|
||
try: | ||
# try using the specutils registry: | ||
valid_format, config = identify_data(filename) | ||
return config | ||
except ValueError: | ||
# if file type not recognized: | ||
pass | ||
|
||
if n_axes == 2 and not has_spectral_axis: | ||
# at this point, non-spectral 2D data are likely images: | ||
return 'imviz' | ||
|
||
raise ValueError(f"No helper could be auto-identified for {filename}.") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This makes me vaguely unhappy, but I don't know of another way to check for this other than trying to open it, so...not a change request, more of a change wish 😆
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel this. I thought about something like
but thought that was too obfuscated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I actually like that idea and since its a key and not an attribute, it might be slightly less ugly (although on second thought you will need two layers of dictionaries which makes it a bit ugly again 😬 ). This entire
_get_wcs
function could be removed andreplaced with:
which also avoids having to parse the header just to have on hand for the fallback.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree @kecnry that this is a better way to do it, but even I have trouble reading it. 🤔