Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Esa ehst download improvements #2797

Merged
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ esa.hubble

- Update to TAP url to query data and download files, aligned with the new eHST Science Archive. [#2567][#2597]
- Status and maintenance messages from eHST TAP when the module is instantiated. get_status_messages method to retrieve them. [#2597]
- New methods to download single files ``download_file`` and download FITS associated to an observation ``download_fits_files``. [#2797]
- New function to retrieve all the files associated to an observation. [#2797]

solarsystem.neodys
^^^^^^^^^^^^^^^^^^
Expand Down Expand Up @@ -64,6 +66,7 @@ esa.hubble
a lot faster. [#2524]
- Method query_hst_tap has been deprecated and is replaced with query_tap, with the same arguments. [#2597]
- Product types in download_product method have been modified to: PRODUCT, SCIENCE_PRODUCT or POSTCARD. [#2597]
- Added ``proposal`` keyword argument to several methods now allows to filter by Proposal ID. [#2797]

alma
^^^^
Expand Down
139 changes: 114 additions & 25 deletions astroquery/esa/hubble/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
European Space Agency (ESA)

"""
import os
from urllib.parse import urlencode

from astropy import units
Expand All @@ -27,14 +28,29 @@
__all__ = ['ESAHubble', 'ESAHubbleClass']


def _check_rename_to_gz(filename):
rename = False
if os.path.exists(filename):
with open(filename, 'rb') as test_f:
if test_f.read(2) == b'\x1f\x8b' and not filename.endswith('.fits.gz'):
rename = True

if rename:
output = os.path.splitext(filename)[0] + '.fits.gz'
os.rename(filename, output)
return output
else:
return filename


class ESAHubbleClass(BaseQuery):
"""
Class to init ESA Hubble Module and communicate with eHST TAP
"""
TIMEOUT = conf.TIMEOUT
calibration_levels = {"AUXILIARY": 0, "RAW": 1, "CALIBRATED": 2,
"PRODUCT": 3}
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL" or "AUXILIARY"]
product_types = ["SCIENCE", "PREVIEW", "THUMBNAIL", "AUXILIARY"]
bsipocz marked this conversation as resolved.
Show resolved Hide resolved
copying_string = "Copying file to {0}..."

def __init__(self, *, tap_handler=None, show_messages=True):
Expand Down Expand Up @@ -93,7 +109,7 @@ def download_product(self, observation_id, *, calibration_level=None,
"RETRIEVAL_TYPE": "OBSERVATION"}

if filename is None:
filename = observation_id + ".tar"
filename = observation_id

if calibration_level:
params["CALIBRATIONLEVEL"] = calibration_level
Expand All @@ -107,7 +123,7 @@ def download_product(self, observation_id, *, calibration_level=None,
filename = self._get_product_filename(product_type, filename)
self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)

return filename
return _check_rename_to_gz(filename=filename)

def __set_product_type(self, product_type):
if product_type:
Expand Down Expand Up @@ -216,18 +232,12 @@ def __validate_product_type(self, product_type):
raise ValueError("This product_type is not allowed")

def _get_product_filename(self, product_type, filename):
if (product_type == "PRODUCT"):
return filename
elif (product_type == "SCIENCE"):
log.info("This is a SCIENCE_PRODUCT, the filename will be "
f"renamed to {filename}.fits.gz")
return f"{filename}.fits.gz"
elif (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
log.info("This is a POSTCARD, the filename will be "
if (product_type == "THUMBNAIL" or product_type == "PREVIEW"):
log.info("This is an image, the filename will be "
f"renamed to {filename}.jpg")
return f"{filename}.jpg"

return filename
else:
return f"{filename}.zip"

def get_artifact(self, artifact_id, *, filename=None, verbose=False):
"""
Expand All @@ -236,7 +246,7 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
Parameters
----------
artifact_id : string
id of the artifact to be downloaded, mandatory
filename to be downloaded, mandatory
The identifier of the physical product (file) we want to retrieve.
filename : string
file name to be used to store the artifact, optional, default None
Expand All @@ -250,13 +260,83 @@ def get_artifact(self, artifact_id, *, filename=None, verbose=False):
None. It downloads the artifact indicated
"""

params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": artifact_id, "TAPCLIENT": "ASTROQUERY"}
return self.download_file(file=artifact_id, filename=filename, verbose=verbose)

def get_associated_files(self, observation_id, *, verbose=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had another go through this looking into some consistency with the currently present API.

Practically this has the purpose as get_obs_products for jwst, right? https://github.com/astropy/astroquery/blob/main/astroquery/esa/jwst/core.py#L947
While the return differs a bit, how do you feel about calling them the same, and in a follow-up PR making them do the same?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not the same, in this case we are only listing the file names, in case users want to filter them before download anything. get_obs_products downloads them.

"""
Retrieves all the files associated to an observation

Parameters
----------
observation_id : string
id of the observation to be downloaded, mandatory
The identifier of the observation we want to retrieve, regardless
of whether it is simple or composite.
verbose : bool
optional, default 'False'
flag to display information about the process

Returns
-------
None. The file is associated
"""
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
return self.query_tap(query=query)

def download_fits_files(self, observation_id, *, verbose=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same API considerations, maybe this could be called download_files, and having a kwarg for extensions defaulting to 'fits'? This is really only a discussion item as download_files is not really wide-spread, and the currently existing ones in alma and casda are having different signature

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We wanted a specific method naming FITS, to make it more visible to users.

"""
Retrieves all the FITS files associated to an observation

Parameters
----------
observation_id : string
id of the observation to be downloaded, mandatory
The identifier of the observation we want to retrieve, regardless
of whether it is simple or composite.
verbose : bool
optional, default 'False'
flag to display information about the process

Returns
-------
None. The file is associated
"""
results = self.get_associated_files(observation_id=observation_id, verbose=verbose)
for file in [i['filename'] for i in results if i['filename'].endswith('.fits')]:
if verbose:
print(f"Downloading {file} ...")
self.download_file(file=file, filename=file, verbose=verbose)

def download_file(self, file, *, filename=None, verbose=False):
"""
Download a file from eHST based on its filename.

Parameters
----------
file : string
file name of the artifact to be downloaded

filename : string
file name to be used to store the file, optional, default None
verbose : bool
optional, default 'False'
flag to display information about the process

Returns
-------
None. The file is associated
"""

params = {"RETRIEVAL_TYPE": "PRODUCT", "ARTIFACTID": file, "TAPCLIENT": "ASTROQUERY"}
if filename is None:
filename = artifact_id
filename = file

self._tap.load_data(params_dict=params, output_file=filename, verbose=verbose)

return filename
return _check_rename_to_gz(filename=filename)

def get_postcard(self, observation_id, *, calibration_level="RAW",
resolution=256, filename=None, verbose=False):
Expand Down Expand Up @@ -391,6 +471,7 @@ def cone_search_criteria(self, radius, *, target=None,
obs_collection=None,
instrument_name=None,
filters=None,
proposal=None,
async_job=True,
filename=None,
output_format='votable',
Expand Down Expand Up @@ -428,6 +509,8 @@ def cone_search_criteria(self, radius, *, target=None,
Name(s) of the instrument(s) used to generate the dataset
filters : list of str, optional
Name(s) of the filter(s) used to generate the dataset
proposal : int, optional
Proposal ID associated to the observations
async_job : bool, optional, default 'False'
executes the query (job) in asynchronous/synchronous mode (default
synchronous)
Expand Down Expand Up @@ -460,6 +543,7 @@ def cone_search_criteria(self, radius, *, target=None,
obs_collection=obs_collection,
instrument_name=instrument_name,
filters=filters,
proposal=proposal,
async_job=True,
get_query=True)
if crit_query.endswith(")"):
Expand Down Expand Up @@ -619,7 +703,7 @@ def query_hst_tap(self, query, *, async_job=False, output_file=None,
def query_criteria(self, *, calibration_level=None,
data_product_type=None, intent=None,
obs_collection=None, instrument_name=None,
filters=None, async_job=True, output_file=None,
filters=None, proposal=None, async_job=True, output_file=None,
output_format="votable", verbose=False,
get_query=False):
"""
Expand All @@ -639,13 +723,15 @@ def query_criteria(self, *, calibration_level=None,
intent : str, optional
The intent of the original observer in acquiring this observation.
SCIENCE or CALIBRATION
collection : list of str, optional
obs_collection : list of str, optional
List of collections that are available in eHST catalogue.
HLA, HST
HLA, HST, HAP
instrument_name : list of str, optional
Name(s) of the instrument(s) used to generate the dataset
filters : list of str, optional
Name(s) of the filter(s) used to generate the dataset
proposal : int, optional
Proposal ID associated to the observations
async_job : bool, optional, default 'True'
executes the query (job) in asynchronous/synchronous mode (default
synchronous)
Expand Down Expand Up @@ -680,6 +766,11 @@ def query_criteria(self, *, calibration_level=None,
parameters.append("intent LIKE '%{}%'".format(intent.lower()))
else:
raise ValueError("intent must be a string")
if proposal is not None:
if isinstance(proposal, int):
parameters.append("proposal_id = '{}'".format(proposal))
else:
raise ValueError("Proposal ID must be an integer")
if self.__check_list_strings(obs_collection):
parameters.append("(collection LIKE '%{}%')".format(
"%' OR collection LIKE '%".join(obs_collection)
Expand Down Expand Up @@ -767,7 +858,7 @@ def get_status_messages(self):
if response.status == 200:
for line in response:
string_message = line.decode("utf-8")
print(string_message[string_message.index('=')+1:])
print(string_message[string_message.index('=') + 1:])
except OSError:
print("Status messages could not be retrieved")

Expand Down Expand Up @@ -810,10 +901,8 @@ def get_columns(self, table_name, *, only_names=True, verbose=False):
return columns

def _getCoordInput(self, value):
if not (isinstance(value, str)
or isinstance(value, SkyCoord)):
raise ValueError("Coordinates"
+ " must be either a string or astropy.coordinates")
if not (isinstance(value, str) or isinstance(value, SkyCoord)):
raise ValueError("Coordinates must be either a string or astropy.coordinates")
if isinstance(value, str):
return SkyCoord(value)
else:
Expand Down
86 changes: 84 additions & 2 deletions astroquery/esa/hubble/tests/test_esa_hubble.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import os
import shutil
import gzip
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import patch
Expand All @@ -22,6 +23,7 @@
from requests.models import Response

from astroquery.esa.hubble import ESAHubbleClass
from astroquery.esa.hubble.core import _check_rename_to_gz
from astroquery.esa.hubble.tests.dummy_tap_handler import DummyHubbleTapHandler
from astropy.utils.exceptions import AstropyDeprecationWarning

Expand Down Expand Up @@ -76,8 +78,10 @@ def pformat():

class TestESAHubble:

def get_dummy_tap_handler(self):
parameterst = {'query': "select top 10 * from hsc_v2.hubble_sc2",
def get_dummy_tap_handler(self, query=None):
if query is None:
query = "select top 10 * from hsc_v2.hubble_sc2"
parameterst = {'query': query,
'output_file': "test2.vot",
'output_format': "votable",
'verbose': False}
Expand Down Expand Up @@ -228,6 +232,58 @@ def test_get_artifact(self, tmp_path):
path = Path(tmp_path, "w0ji0v01t_c2f.fits.gz")
ehst.get_artifact(artifact_id=path)

def test_download_file(self, tmp_path):
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
file = 'w0ji0v01t_c2f.fits'
path = Path(tmp_path, file + '.gz')
ehst.download_file(file=path, filename=path)

def test_get_associated_files(self):
observation_id = 'test'
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
parameters = {'query': query,
'output_file': 'test2.vot',
'output_format': "votable",
'verbose': False}
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
ehst.get_associated_files(observation_id=observation_id)
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)

@patch.object(ESAHubbleClass, 'get_associated_files')
def test_download_fits(self, mock_associated_files):
observation_id = 'test'
query = (f"select art.artifact_id as filename, p.calibration_level, art.archive_class as type, "
f"pg_size_pretty(art.size_uncompr) as size_uncompressed from ehst.artifact art "
f"join ehst.plane p on p.plane_id = art.plane_id where "
f"art.observation_id = '{observation_id}'")
parameters = {'query': query,
'output_file': 'test2.vot',
'output_format': "votable",
'verbose': False}
mock_associated_files.return_value = [{'filename': 'test.fits'}]
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(query=query), show_messages=False)
ehst.download_fits_files(observation_id=observation_id)
self.get_dummy_tap_handler(query=query).check_call("launch_job", parameters)

def test_is_not_gz(self, tmp_path):
target_file = data_path('cone_search.vot')
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
assert _check_rename_to_gz(target_file) == target_file

def test_is_gz(self, tmp_path):
ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
# test_file = data_path('m31.vot.test')
temp_file = 'testgz'
target_file = os.path.join(tmp_path, temp_file)
with gzip.open(target_file, 'wb') as f:
f.write(b'')
# with open(test_file, 'rb') as f_in, gzip.open(target_file, 'wb') as f_out:
# f_out.writelines(f_in)
assert _check_rename_to_gz(target_file) == target_file + '.fits.gz'

def test_get_columns(self):
parameters = {'table_name': "table",
'only_names': True,
Expand All @@ -238,6 +294,32 @@ def test_get_columns(self):
ehst.get_columns(table_name="table", only_names=True, verbose=True)
dummyTapHandler.check_call("get_columns", parameters)

def test_query_criteria_proposal(self):
parameters1 = {'proposal': 12345,
'async_job': False,
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': True,
'get_query': True}
ehst = ESAHubbleClass(tap_handler=self.get_dummy_tap_handler(), show_messages=False)
test_query = ehst.query_criteria(proposal=parameters1['proposal'],
async_job=parameters1['async_job'],
output_file=parameters1['output_file'],
output_format=parameters1['output_format'],
verbose=parameters1['verbose'],
get_query=parameters1['get_query'])
parameters2 = {'query': test_query,
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': False}
parameters3 = {'query': "select * from ehst.archive where("
"proposal_id = '12345')",
'output_file': "output_test_query_by_criteria.vot.gz",
'output_format': "votable",
'verbose': False}
dummy_tap_handler = DummyHubbleTapHandler("launch_job", parameters2)
dummy_tap_handler.check_call("launch_job", parameters3)

def test_query_criteria(self):
parameters1 = {'calibration_level': "PRODUCT",
'data_product_type': "image",
Expand Down
Loading