Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rcal 596 Updates for association processing #241

Merged
merged 25 commits into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7b78fb2
RCAL-596 Inital changes for reading associations
ddavis-stsci Jul 11, 2023
68628c8
rcal-596 Updates for association processing
ddavis-stsci Jul 18, 2023
abc7976
rcal-596 Updates for association processing
ddavis-stsci Jul 18, 2023
094ccca
RCAL-596 Added tests for filetype
ddavis-stsci Jul 18, 2023
45815ab
rcal-596 Incorporate/rebase _utils updates.
ddavis-stsci Jul 19, 2023
63c400f
change log msg
ddavis-stsci Jul 20, 2023
acd8d4d
RCAL-596 Fix my rebase of _utils
ddavis-stsci Jul 20, 2023
5ccc842
RCAL-596 Fix my rebase of _utils
ddavis-stsci Jul 20, 2023
937b0ac
RCAL-596 Fix my rebase of _utils
ddavis-stsci Jul 20, 2023
05df2a3
RCAL-596 Fix my rebase of _utils
ddavis-stsci Jul 20, 2023
a1b141b
RCAL-596 Reorder asn test in _utils
ddavis-stsci Jul 20, 2023
5f53e4c
Merge branch 'main' into rcal-596_dsd
zacharyburnett Jul 20, 2023
a1115e7
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 20, 2023
e283ad1
RCAL-596 code cleanup for review
ddavis-stsci Jul 20, 2023
eecddc2
Update utils
ddavis-stsci Jul 21, 2023
d2c912d
Update utils
ddavis-stsci Jul 21, 2023
c38833d
Merge branch 'spacetelescope:main' into rcal-596_dsd
ddavis-stsci Jul 21, 2023
786960e
RCAL-596 pseudo review updates
ddavis-stsci Jul 24, 2023
0d048a6
RCAL-596 import Path from pathlib
ddavis-stsci Jul 24, 2023
f3df2ff
rcal-596 review updates
ddavis-stsci Jul 25, 2023
3974ac6
rcal-596 updates for test_stnode
ddavis-stsci Jul 25, 2023
6538574
rcal-596 Updates
ddavis-stsci Jul 26, 2023
c4da77b
rcal-596 Update Changes
ddavis-stsci Jul 26, 2023
f332ce8
Merge branch 'main' into rcal-596_dsd
ddavis-stsci Jul 26, 2023
cb927e9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jul 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
0.17.0 (unreleased)
===================
- Add check for filetypes for association processing [#241]

- Remove the ``random_utils`` module and make ``maker_utils`` entirely deterministic. [#217]

- Add tests to ensure consistency between file-level schemas in RAD and the corresponding
Expand Down
10 changes: 8 additions & 2 deletions src/roman_datamodels/datamodels/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import asdf
import packaging.version

from roman_datamodels import validate
from roman_datamodels import validate, filetype

from ._core import MODEL_REGISTRY, DataModel

Expand All @@ -31,7 +31,7 @@
def rdm_open(init, memmap=False, target=None, **kwargs):
"""
Datamodel open/create function.
This function opens a Roman datamodel from an asdf file or generates
This function opens a Roman datamodel from various files or generates
the datamodel from an existing one.

Parameters
Expand All @@ -57,6 +57,11 @@ def rdm_open(init, memmap=False, target=None, **kwargs):
"""
with validate.nuke_validation():
file_to_close = None
if isinstance(init, str):
input_file_type = filetype.check(init)
if input_file_type =='asn':
print("Returning an asn string:", init)
return init
if target is not None:
if not issubclass(target, DataModel):
raise ValueError("Target must be a subclass of DataModel")
Expand All @@ -73,6 +78,7 @@ def rdm_open(init, memmap=False, target=None, **kwargs):
return init
# Copy the object so it knows not to close here
return init.copy()
# Copy the object so it knows not to close here
else:
try:
kwargs["copy_arrays"] = not memmap
Expand Down
56 changes: 56 additions & 0 deletions src/roman_datamodels/filetype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import io
import os
from pathlib import Path
from typing import Union


def check(init: Union[os.PathLike, Path, io.FileIO]) -> str:
"""
Determine the type of a file and return it as a string

Parameters
----------

init : str
file path or file object

Returns
-------
file_type: str
a string with the file type ("asdf" or "asn")

"""

supported = ("asdf", "json")

if isinstance(init, (str, os.PathLike, Path)):
path, ext = os.path.splitext(init)
ext = ext.strip(".")

if not ext:
raise ValueError(f"Input file path does not have an extension: {init}")

if ext not in supported: # Could be the file is zipped; try splitting again
path, ext = os.path.splitext(path)
ext = ext.strip(".")

if ext not in supported:
raise ValueError(f"Unrecognized file type for: {init}")

if ext == "json": # Assume json input is an association
return "asn"

return ext
elif hasattr(init, "read") and hasattr(init, "seek"):
magic = init.read(5)
init.seek(0, 0)

if not magic or len(magic) < 5:
raise ValueError(f"Cannot get file type of {str(init)}")

if magic == b"#ASDF":
return "asdf"

return "asn"
else:
raise ValueError(f"Cannot get file type of {str(init)}")
Empty file added tests/data/empty.asdf
Empty file.
Empty file added tests/data/empty.json
Empty file.
43 changes: 43 additions & 0 deletions tests/data/example_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"date" : {
"title" : "[yyyy-mm-ddThh:mm:ss.ssssss] UTC date file created",
"type" : "string",
"sql_dtype" : "datetime2",
"fits_keyword" : "DATE",
"description" : "The UTC date and time when the HDU was created, in the form YYYY-MM-DDThh:mm:ss.ssssss, where YYYY shall be the four-digit calendar year number, MM the two-digit month number with January given by 01 and December by 12, and DD the two-digit day of the month. The literal T shall separate the date and time, hh shall be the two-digit hour in the day, mm the two-digit number of minutes after the hour, and ss.ssssss the number of seconds (two digits followed by a fraction accurate to microseconds) after the minute. Default values must not be given to any portion of the date/time string, and leading zeros must not be omitted.",
"calculation" : "Operating system time in the format of YYYY-MM-DDThh:mm:ss.ssssss",
"default_value" : "",
"example" : "2015-01-01T00:00:00.000001",
"units" : "",
"sw_source" : "calculation",
"source" : "Science Data Processing (SDP)",
"destination" : ["ScienceCommon.date","GuideStar.date"],
"level" : "1a",
"si" : "Multiple",
"section" : "Basic",
"mode" : "All",
"fits_hdu" : "PRIMARY",
"misc" : ""
},

"origin" : {
"title" : "institution responsible for creating FITS file",
"type" : "string",
"sql_dtype" : "nvarchar(20)",
"fits_keyword" : "ORIGIN",
"description" : "Identifies the organization or institution responsible for creating the FITS file.",
"calculation" : "",
"default_value" : "STSCI",
"example" : "STSCI",
"units" : "",
"sw_source" : "",
"source" : "Science Data Processing (SDP)",
"destination" : ["ScienceCommon.origin","GuideStar.origin"],
"level" : "1a",
"si" : "Multiple",
"section" : "Basic",
"mode" : "All",
"fits_hdu" : "PRIMARY",
"misc" : ""
}
}
1 change: 1 addition & 0 deletions tests/data/fake.asdf
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
not actually an ASDF file
1 change: 1 addition & 0 deletions tests/data/fake.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
not actually a JSON file
Binary file added tests/data/pluto.asdf
Binary file not shown.
41 changes: 41 additions & 0 deletions tests/test_filetype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from pathlib import Path

import pytest

from roman_datamodels import filetype

DATA_DIRECTORY = Path(__file__).parent / "data"


def test_filetype():
file_1 = filetype.check(DATA_DIRECTORY / "empty.json")
file_2 = filetype.check(DATA_DIRECTORY / "example_schema.json")
with open(DATA_DIRECTORY / "fake.json") as file_h:
file_3 = filetype.check(file_h)
file_4 = filetype.check(DATA_DIRECTORY / "empty.asdf")
file_5 = filetype.check(DATA_DIRECTORY / "pluto.asdf")
with open(DATA_DIRECTORY / "pluto.asdf", "rb") as file_h:
file_6 = filetype.check(file_h)
file_7 = filetype.check(DATA_DIRECTORY / "fake.asdf")
with open(DATA_DIRECTORY / "fake.json") as file_h:
file_8 = filetype.check(file_h)
file_9 = filetype.check(str(DATA_DIRECTORY / "pluto.asdf"))

assert file_1 == "asn"
assert file_2 == "asn"
assert file_3 == "asn"
assert file_4 == "asdf"
assert file_5 == "asdf"
assert file_6 == "asdf"
assert file_7 == "asdf"
assert file_8 == "asn"
assert file_9 == "asdf"

with pytest.raises(ValueError):
filetype.check(DATA_DIRECTORY / "empty.txt")

with pytest.raises(ValueError):
filetype.check(2)

with pytest.raises(ValueError):
filetype.check("test")