diff --git a/CHANGES.rst b/CHANGES.rst index e90e7540b..f59a27eaa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -21,6 +21,10 @@ The ASDF Standard is at v1.6.0 is removed in an upcoming asdf release will be ``False`` and asdf will no longer by-default memory map arrays. [#1667] +- Introduce ``asdf.util.load_yaml`` to load just the YAML contents + of an ASDF file (with the option ``tagged`` to load the contents + as a tree of ``asdf.tagged.Tagged`` instances to preserve tags) [#1700] + 3.0.1 (2023-10-30) ------------------ diff --git a/asdf/_tests/tags/core/tests/test_integer.py b/asdf/_tests/tags/core/tests/test_integer.py index 6e86de88e..82f5fa13b 100644 --- a/asdf/_tests/tags/core/tests/test_integer.py +++ b/asdf/_tests/tags/core/tests/test_integer.py @@ -46,16 +46,16 @@ def test_integer_storage(tmpdir, inline): with asdf.AsdfFile(tree) as af: af.write_to(tmpfile) - with asdf.open(tmpfile, _force_raw_types=True) as rf: - if inline: - assert "source" not in rf.tree["integer"]["words"] - assert "data" in rf.tree["integer"]["words"] - else: - assert "source" in rf.tree["integer"]["words"] - assert "data" not in rf.tree["integer"]["words"] - - assert "string" in rf.tree["integer"] - assert rf.tree["integer"]["string"] == str(value) + tree = asdf.util.load_yaml(tmpfile, tagged=True) + if inline: + assert "source" not in tree["integer"]["words"] + assert "data" in tree["integer"]["words"] + else: + assert "source" in tree["integer"]["words"] + assert "data" not in tree["integer"]["words"] + + assert "string" in tree["integer"] + assert tree["integer"]["string"] == str(value) def test_integer_conversion(): diff --git a/asdf/_tests/test_reference.py b/asdf/_tests/test_reference.py index 9e42388e0..1b733db81 100644 --- a/asdf/_tests/test_reference.py +++ b/asdf/_tests/test_reference.py @@ -221,8 +221,7 @@ def test_internal_reference(tmp_path): ff.write_to(buff) buff.seek(0) ff = asdf.AsdfFile() - content = asdf.AsdfFile()._open_impl(ff, buff, _get_yaml_content=True) - assert b"{$ref: ''}" in content + assert b"{$ref: ''}" in buff.getvalue() def test_implicit_internal_reference(tmp_path): diff --git a/asdf/_tests/test_util.py b/asdf/_tests/test_util.py index e7c6345de..e45d68eb7 100644 --- a/asdf/_tests/test_util.py +++ b/asdf/_tests/test_util.py @@ -1,8 +1,11 @@ +import contextlib import io import warnings +import numpy as np import pytest +import asdf from asdf import generic_io, util from asdf.exceptions import AsdfDeprecationWarning @@ -118,3 +121,27 @@ def test_minversion(): assert util.minversion(yaml, "3.1") assert util.minversion("yaml", "3.1") + + +@pytest.mark.parametrize("input_type", ["filename", "binary_file", "generic_file"]) +@pytest.mark.parametrize("tagged", [True, False]) +def test_load_yaml(tmp_path, input_type, tagged): + fn = tmp_path / "test.asdf" + asdf.AsdfFile({"a": np.zeros(3)}).write_to(fn) + + if input_type == "filename": + init = fn + ctx = contextlib.nullcontext() + elif input_type == "binary_file": + init = open(fn, "rb") + ctx = init + elif input_type == "generic_file": + init = generic_io.get_file(fn, "r") + ctx = init + + with ctx: + tree = util.load_yaml(init, tagged=tagged) + if tagged: + assert isinstance(tree["a"], asdf.tagged.TaggedDict) + else: + assert not isinstance(tree["a"], asdf.tagged.TaggedDict) diff --git a/asdf/commands/edit.py b/asdf/commands/edit.py index 03d40ae09..04e64336e 100644 --- a/asdf/commands/edit.py +++ b/asdf/commands/edit.py @@ -16,7 +16,7 @@ import yaml from asdf import constants, generic_io, schema, util -from asdf._asdf import AsdfFile, open_asdf +from asdf._asdf import AsdfFile from asdf._block import io as bio from asdf._block.exceptions import BlockIndexError @@ -259,11 +259,15 @@ def edit(path): continue try: - # Blocks are not read during validation, so this will not raise - # an error even though we're only opening the YAML portion of - # the file. - with open_asdf(io.BytesIO(new_content), _force_raw_types=True): - pass + # check this is an ASDF file + if new_content[: len(constants.ASDF_MAGIC)] != constants.ASDF_MAGIC: + msg = "Does not appear to be a ASDF file." + raise ValueError(msg) + # read the tagged tree (which also checks if the YAML is valid) + tagged_tree = util.load_yaml(io.BytesIO(new_content), tagged=True) + # validate the tagged tree + ctx = AsdfFile(version=new_asdf_version) + schema.validate(tagged_tree, ctx=ctx, reading=True) except yaml.YAMLError as e: print("Error: failed to parse updated YAML:") print_exception(e) diff --git a/asdf/util.py b/asdf/util.py index 875e57f80..bca67ea3c 100644 --- a/asdf/util.py +++ b/asdf/util.py @@ -11,6 +11,7 @@ from urllib.request import pathname2url import numpy as np +import yaml # The standard library importlib.metadata returns duplicate entrypoints # for all python versions up to and including 3.11 @@ -39,6 +40,7 @@ __all__ = [ + "load_yaml", "human_list", "get_array_base", "get_base_uri", @@ -55,6 +57,50 @@ ] +def load_yaml(init, tagged=False): + """ + Load just the yaml portion of an ASDF file + + Parameters + ---------- + + init : filename or file-like + If file-like this must be opened in binary mode. + + tagged: bool, optional + Return tree with instances of `asdf.tagged.Tagged` this + can be helpful if the yaml tags are of interest. + If False, the tree will only contain basic python types + (see the pyyaml ``BaseLoader`` documentation). + + Returns + ------- + + tree : dict + Dictionary representing the ASDF tree + """ + + from .generic_io import get_file + from .yamlutil import AsdfLoader + + if tagged: + loader = AsdfLoader + else: + loader = yaml.CBaseLoader if getattr(yaml, "__with_libyaml__", None) else yaml.BaseLoader + + with get_file(init, "r") as gf: + reader = gf.reader_until( + constants.YAML_END_MARKER_REGEX, + 7, + "End of YAML marker", + include=True, + ) + # The following call to yaml.load is safe because we're + # using only loaders that don't create custom python objects + content = yaml.load(reader, Loader=loader) # noqa: S506 + return content + + def human_list(line, separator="and"): """ Formats a list for human readability.