Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto-configuration identification #2124

Merged
merged 3 commits into from
Apr 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ New Features

* Model fitting results are logged in a table within the plugin [#2093].

* Auto-identify a configuration/helper for a given data file [#2124]

Cubeviz
^^^^^^^

Expand Down
164 changes: 162 additions & 2 deletions jdaviz/core/data_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,22 @@
import pathlib

import astropy.io
from astropy.io import registry, fits
from astropy.nddata import CCDData
from astropy.wcs import WCS

from specutils.io.registers import identify_spectrum_format
from specutils import SpectrumList
from specutils import Spectrum1D, SpectrumList, SpectrumCollection
from stdatamodels import asdf_in_fits

from jdaviz.core.config import list_configurations

__all__ = ['guess_dimensionality', 'get_valid_format', 'identify_data']
__all__ = [
'guess_dimensionality',
'get_valid_format',
'identify_data',
'identify_helper'
]

# create a default file format to configuration mapping
default_mapping = {'JWST x1d': 'specviz', 'JWST s2d': 'specviz2d',
Expand Down Expand Up @@ -112,3 +122,153 @@ def identify_data(filename, current=None):
raise ValueError('Mismatch between input file format and loaded configuration.')

return valid_format, config


def _get_wcs(filename, header):
"""
Get gwcs.wcs.WCS or astropy.wcs.WCS from FITS file.
"""
try:
with asdf_in_fits.open(filename) as af:
wcs = af.tree['meta']['wcs']
Comment on lines +131 to +133
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes me vaguely unhappy, but I don't know of another way to check for this other than trying to open it, so...not a change request, more of a change wish 😆

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel this. I thought about something like

     with asdf_in_fits.open(filename) as af:
         meta = getattr(af.tree, 'meta', fits.getheader(filename))

but thought that was too obfuscated.

Copy link
Member

@kecnry kecnry Apr 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually like that idea and since its a key and not an attribute, it might be slightly less ugly (although on second thought you will need two layers of dictionaries which makes it a bit ugly again 😬 ). This entire _get_wcs function could be removed and

header = fits.getheader(filename, ext=ext)
data = fits.getdata(filename, ext=ext)
wcs = _get_wcs(filename, header)

replaced with:

data = fits.getdata(filename, ext=ext)
with asdf_in_fits.open(filename) as af:
    meta = af.tree.get('meta', {}).get('wcs', WCS(fits.getheader(filename, ext=ext)))

which also avoids having to parse the header just to have on hand for the fallback.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree @kecnry that this is a better way to do it, but even I have trouble reading it. 🤔


# if the file doesn't have ASDF-in-FITS, then
# the 'meta' key doesn't exist, yielding a KeyError:
except KeyError:
# fall back on using astropy WCS:
wcs = WCS(header)

return wcs


def identify_helper(filename, ext=1):
"""
Guess the appropriate viz helper for a data file.

Parameters
----------
filename : str (path-like)
Name for a local data file.
ext : int
Extension from the FITS file.

Returns
-------
helper_name : str
Name of the best-guess helper for ``filename``.
"""
supported_dtypes = [
Spectrum1D,
SpectrumList,
SpectrumCollection,
CCDData
]

if filename.lower().endswith('asdf'):
# ASDF files are only supported in jdaviz for
# Roman WFI 2D images, so suggest imviz:
return 'imviz'

header = fits.getheader(filename, ext=ext)
data = fits.getdata(filename, ext=ext)
wcs = _get_wcs(filename, header)
has_spectral_axis = 'spectral' in wcs.world_axis_object_classes

n_axes = (
int(has_spectral_axis) +

sum([component[0] in ['celestial', 'angle']
for component in wcs.world_axis_object_components]) -

# remove any slit_frame axis from the count
(0 if not hasattr(wcs, 'available_frames') else
int('slit_frame' in wcs.available_frames))
)

# use astropy to recognize some data formats:
possible_formats = {}
for cls in supported_dtypes:
fmt = registry.identify_format(
'read', cls, filename, None, {}, {}
)
if fmt:
possible_formats[cls] = fmt

# If CCDData is the only match:
if len(possible_formats) == 1:
only_key, only_value = possible_formats.popitem()
if only_key == CCDData:
# could be 2D spectrum or 2D image. break tie with WCS:
if has_spectral_axis:
if n_axes > 1:
return 'specviz2d'
return 'specviz'
elif not isinstance(data, fits.BinTableHDU):
return 'imviz'

# Ensure specviz is chosen when ``data`` is a table or recarray
# and there's a "known" spectral column name:
if isinstance(data, (fits.BinTableHDU, fits.fitsrec.FITS_rec)):
# now catch spectra in FITS tables, looking for
# columns with "wave" or "flux" in the names:
table_columns = [getattr(col, 'name', col).lower() for col in data.columns]

# these are "known" prefixes for column names
# in FITS tables of spectral observations
known_spectral_columns = [
'wave',
'flux'
]
Comment on lines +219 to +221
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I worry about hard-coding these kinds of column names, but can't think of a more general way to do this. I suggested we could try to auto-identify wavelength or flux columns based on any attached units on the column. Maybe that's out of scope for this PR though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of scope for this PR though.

I think so. Let's get v0.1 done and iterate.


# this list of bools indicates any
# spectral column names found:
found_spectral_columns = [
found_col.startswith(known_col)
for known_col in known_spectral_columns
for found_col in table_columns
]

# if at least one spectral column is found:
if sum(found_spectral_columns):
return 'specviz'

# If the data could be spectral:
for cls in [Spectrum1D, SpectrumList]:
if cls in possible_formats.keys():
recognized_spectrum_format = possible_formats[cls][0].lower()

# first catch known JWST spectrum types:
if (n_axes == 3 and
recognized_spectrum_format.find('s3d') > -1):
return 'cubeviz'
elif (n_axes == 2 and
recognized_spectrum_format.find('x1d') > -1):
return 'specviz'
Comment on lines +242 to +246
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it's worth generalizing these now to something non-JWST specific. For example, this function already fails with MaNGA cubes files, which is maybe a tad bittersweet since Jdaviz was originally developed against those files. :( And for MAST to switch to this function, we'd need this generalized before we can start any work on our end.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that there's a better way to do this! Stepping away for a few days and seeing this comment makes it clearer.

This PR relies on astropy.io.registry to auto-identify file types, and then when that fails, uses heuristics to make a guess. The astropy registry knows some JWST files without a problem, so I didn't need to use something else.

But there is already a registry which identifies JWST, MaNGA, and many others: the specutils registry. The easy solution to all this is to also try the specutils registry. I'll do this today! 🤦🏻

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Support for MaNGA introduced in 7b8d713.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the update. The MaNGA cube and rss files do seem to now correctly identify as cubeviz and specviz, which is great. Since JWST formats are covered by the identify_data function, it's not clear to me why we're still explicitly checking on the filename suffix at lines 241-246?


# we intentionally don't choose specviz2d for
# data recognized as 's2d' as we did with the cases above,
# because 2D data products could be 2D spectra *or* 2D images
# that the registry recognizes as s2d.

# Use WCS to break the tie below:
elif n_axes == 2:
if has_spectral_axis:
return 'specviz2d'
return 'imviz'

elif n_axes == 1:
return 'specviz'

try:
# try using the specutils registry:
valid_format, config = identify_data(filename)
return config
except ValueError:
# if file type not recognized:
pass

if n_axes == 2 and not has_spectral_axis:
# at this point, non-spectral 2D data are likely images:
return 'imviz'

raise ValueError(f"No helper could be auto-identified for {filename}.")
48 changes: 48 additions & 0 deletions jdaviz/core/tests/test_data_menu.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,30 @@
import pytest
import numpy as np

from glue.core.roi import XRangeROI
from astropy.utils.data import download_file

from jdaviz.core.data_formats import identify_helper

# URIs to example JWST/HST files on MAST, and their
# corresponding jdaviz helpers:
example_uri_helper = [
['mast:HST/product/id4301ouq_drz.fits', 'imviz'],
['mast:HST/product/ldq601030_x1dsum.fits', 'specviz'],
['mast:HST/product/o4xw01dkq_flt.fits', 'specviz2d'],
['mast:JWST/product/jw01324-o001_s00094_niriss_f200w-gr150c-gr150r_x1d.fits',
'specviz'],
['mast:JWST/product/jw01324-o006_s00005_nirspec_f100lp-g140h_s2d.fits',
'specviz2d'],
['mast:JWST/product/jw01345-o001_t021_nircam_clear-f200w_i2d.fits', 'imviz'],
['mast:JWST/product/jw01373-o028_t001_nirspec_g395h-f290lp_s3d.fits',
'cubeviz'],
['mast:JWST/product/jw01373-o031_t007_miri_ch1-shortmediumlong_s3d.fits',
'cubeviz'],
['mast:JWST/product/jw01783-o004_t008_nircam_clear-f444w_i2d.fits', 'imviz'],
['mast:JWST/product/jw02732-o004_t004_miri_ch1-shortmediumlong_x1d.fits',
'specviz']
]


def test_data_menu_toggles(specviz_helper, spectrum1d):
Expand Down Expand Up @@ -75,3 +99,27 @@ def test_visibility_toggle(imviz_helper):
visible=True)
assert iv.layers[0].visible is True
assert po.stretch_preset.value == 90


@pytest.mark.remote_data
@pytest.mark.filterwarnings(r"ignore::astropy.wcs.wcs.FITSFixedWarning")
@pytest.mark.parametrize(
"uri, expected_helper", example_uri_helper
)
def test_auto_config_detection(uri, expected_helper):
url = f'https://mast.stsci.edu/api/v0.1/Download/file/?uri={uri}'
fn = download_file(url, cache=True)
helper_name = identify_helper(fn)
assert helper_name == expected_helper


@pytest.mark.remote_data
@pytest.mark.filterwarnings(r"ignore::astropy.wcs.wcs.FITSFixedWarning")
def test_auto_config_manga():
# Check that MaNGA cubes go to cubeviz. This file is
# originally from
# https://data.sdss.org/sas/dr14/manga/spectro/redux/v2_1_2/7495/stack/manga-7495-12704-LOGCUBE.fits.gz
URL = 'https://stsci.box.com/shared/static/28a88k1qfipo4yxc4p4d40v4axtlal8y.fits'
fn = download_file(URL, cache=True)
helper_name = identify_helper(fn)
assert helper_name == 'cubeviz'