From 9245d94622664a6abd9f5da53ae4e13499d881b2 Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:40:52 -0400 Subject: [PATCH 1/6] Make json methods "public", use args/kwargs --- pystac/stac_io.py | 66 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/pystac/stac_io.py b/pystac/stac_io.py index 00e285d23..2f60d17a6 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -18,7 +18,7 @@ from urllib.error import HTTPError import pystac -from pystac.utils import safe_urlparse +from pystac.utils import safe_urlparse, get_opt from pystac.serialization import ( merge_common_properties, identify_stac_object_type, @@ -78,7 +78,19 @@ def write_text( """ raise NotImplementedError("write_text not implemented") - def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]: + def json_loads(self, txt: str, *args: Any, **kwargs: Any) -> Dict[str, Any]: + """Method used internally by :class:`StacIO` instances to deserialize a + dictionary from a JSON string. + + This method may be overwritten in :class:`StacIO` sub-classes to provide custom + deserialization logic. The method accepts arbitrary keyword arguments. These are + not used by the default implementation, but may be used by sub-class + implementations. + + Args: + + txt : The JSON string to deserialize to a dictionary. + """ result: Dict[str, Any] if orjson is not None: result = orjson.loads(txt) @@ -86,9 +98,20 @@ def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, An result = json.loads(txt) return result - def _json_dumps( - self, json_dict: Dict[str, Any], source: Union[str, "Link_Type"] - ) -> str: + def json_dumps(self, json_dict: Dict[str, Any], *args: Any, **kwargs: Any) -> str: + """Method used internally by :class:`StacIO` instances to serialize a dictionary + to a JSON string. + + This method may be overwritten in :class:`StacIO` sub-classes to provide custom + serialization logic. The method accepts arbitrary keyword arguments. These are + not used by the default implementation, but may be used by sub-class + implementations (see :meth:`DuplicateKeyReportingMixin.json_dumps` as an + example). + + Args: + + json_dict : The dictionary to serialize + """ if orjson is not None: return orjson.dumps(json_dict, option=orjson.OPT_INDENT_2).decode("utf-8") else: @@ -143,16 +166,24 @@ def read_json( Args: source : The source from which to read. + *args : Additional positional arguments to be passed to + :meth:`StacIO.read_text`. + **kwargs : Additional keyword arguments to be passed to + :meth:`StacIO.read_text`. Returns: dict: A dict representation of the JSON contained in the file at the given source. """ txt = self.read_text(source, *args, **kwargs) - return self._json_loads(txt, source) + return self.json_loads(txt) def read_stac_object( - self, source: Union[str, "Link_Type"], root: Optional["Catalog_Type"] = None + self, + source: Union[str, "Link_Type"], + root: Optional["Catalog_Type"] = None, + *args: Any, + **kwargs: Any, ) -> "STACObject_Type": """Read a STACObject from a JSON file at the given source. @@ -164,17 +195,25 @@ def read_stac_object( root : Optional root of the catalog for this object. If provided, the root's resolved object cache can be used to search for previously resolved instances of the STAC object. + *args : Additional positional arguments to be passed to + :meth:`StacIO.read_json`. + **kwargs : Additional keyword arguments to be passed to + :meth:`StacIO.read_json`. Returns: STACObject: The deserialized STACObject from the serialized JSON contained in the file at the given uri. """ - d = self.read_json(source) + d = self.read_json(source, *args, **kwargs) href = source if isinstance(source, str) else source.get_absolute_href() return self.stac_object_from_dict(d, href=href, root=root, preserve_dict=False) def save_json( - self, dest: Union[str, "Link_Type"], json_dict: Dict[str, Any] + self, + dest: Union[str, "Link_Type"], + json_dict: Dict[str, Any], + *args: Any, + **kwargs: Any, ) -> None: """Write a dict to the given URI as JSON. @@ -184,8 +223,12 @@ def save_json( Args: dest : The destination file to write the text to. json_dict : The JSON dict to write. + *args : Additional positional arguments to be passed to + :meth:`StacIO.json_dumps`. + **kwargs : Additional keyword arguments to be passed to + :meth:`StacIO.json_dumps`. """ - txt = self._json_dumps(json_dict, dest) + txt = self.json_dumps(json_dict, *args, **kwargs) self.write_text(dest, txt) @classmethod @@ -261,7 +304,8 @@ class DuplicateKeyReportingMixin(StacIO): See https://github.com/stac-utils/pystac/issues/313 """ - def _json_loads(self, txt: str, source: Union[str, "Link_Type"]) -> Dict[str, Any]: + def json_loads(self, txt: str, *args: Any, **kwargs: Any) -> Dict[str, Any]: + source: Union[str, "Link_Type"] = get_opt(kwargs.get("source")) result: Dict[str, Any] = json.loads( txt, object_pairs_hook=self.duplicate_object_names_report_builder(source) ) From d8e4bdeac2b5c752449c6810274c48c9f2fb91af Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:40:52 -0400 Subject: [PATCH 2/6] Move DuplicateObjectKeyError to errors module --- pystac/__init__.py | 1 + pystac/errors.py | 6 ++++++ pystac/stac_io.py | 4 ---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pystac/__init__.py b/pystac/__init__.py index c382046df..a2004ae3f 100644 --- a/pystac/__init__.py +++ b/pystac/__init__.py @@ -6,6 +6,7 @@ from pystac.errors import ( STACError, STACTypeError, + DuplicateObjectKeyError, ExtensionAlreadyExistsError, ExtensionNotImplemented, ExtensionTypeError, diff --git a/pystac/errors.py b/pystac/errors.py index a5eae5df4..552d0da99 100644 --- a/pystac/errors.py +++ b/pystac/errors.py @@ -19,6 +19,12 @@ class STACTypeError(Exception): pass +class DuplicateObjectKeyError(Exception): + """Raised when deserializing a JSON object containing a duplicate key.""" + + pass + + class ExtensionTypeError(Exception): """An ExtensionTypeError is raised when an extension is used against an object that the extension does not apply to diff --git a/pystac/stac_io.py b/pystac/stac_io.py index 2f60d17a6..c866acc17 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -293,10 +293,6 @@ def write_text_to_href( f.write(txt) -class DuplicateObjectKeyError(Exception): - pass - - class DuplicateKeyReportingMixin(StacIO): """A mixin for StacIO implementations that will report on duplicate keys in the JSON being read in. From 8be1813c88e06b93f7ed09228e4aaff76b3a4a47 Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:40:52 -0400 Subject: [PATCH 3/6] Move file-level error reporting for duplicate keys to read_json --- pystac/stac_io.py | 71 +++++++++++++++++++++++++------------------ tests/test_stac_io.py | 30 +++++++++++++++++- 2 files changed, 71 insertions(+), 30 deletions(-) diff --git a/pystac/stac_io.py b/pystac/stac_io.py index c866acc17..747aff8a4 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -3,7 +3,6 @@ import json from typing import ( Any, - Callable, Dict, List, Optional, @@ -18,7 +17,7 @@ from urllib.error import HTTPError import pystac -from pystac.utils import safe_urlparse, get_opt +from pystac.utils import safe_urlparse from pystac.serialization import ( merge_common_properties, identify_stac_object_type, @@ -105,8 +104,7 @@ def json_dumps(self, json_dict: Dict[str, Any], *args: Any, **kwargs: Any) -> st This method may be overwritten in :class:`StacIO` sub-classes to provide custom serialization logic. The method accepts arbitrary keyword arguments. These are not used by the default implementation, but may be used by sub-class - implementations (see :meth:`DuplicateKeyReportingMixin.json_dumps` as an - example). + implementations. Args: @@ -300,36 +298,51 @@ class DuplicateKeyReportingMixin(StacIO): See https://github.com/stac-utils/pystac/issues/313 """ - def json_loads(self, txt: str, *args: Any, **kwargs: Any) -> Dict[str, Any]: - source: Union[str, "Link_Type"] = get_opt(kwargs.get("source")) + def json_loads(self, txt: str, *_: Any, **__: Any) -> Dict[str, Any]: + """Overwrites :meth:`StacIO.json_loads` as the internal method used by + :class:`DuplicateKeyReportingMixin` for deserializing a JSON string to a + dictionary while checking for duplicate object keys. + + Raises: + + pystac.DuplicateObjectKeyError : If a duplicate object key is found. + """ result: Dict[str, Any] = json.loads( - txt, object_pairs_hook=self.duplicate_object_names_report_builder(source) + txt, object_pairs_hook=self._report_duplicate_object_names ) return result - @staticmethod - def duplicate_object_names_report_builder( - source: Union[str, "Link_Type"] - ) -> Callable[[List[Tuple[str, Any]]], Dict[str, Any]]: - def report_duplicate_object_names( - object_pairs: List[Tuple[str, Any]] - ) -> Dict[str, Any]: - result: Dict[str, Any] = {} - for key, value in object_pairs: - if key in result: - url = ( - source - if isinstance(source, str) - else source.get_absolute_href() - ) - raise DuplicateObjectKeyError( - f"Found duplicate object name “{key}” in “{url}”" - ) - else: - result[key] = value - return result + def read_json( + self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any + ) -> Dict[str, Any]: + """Overwrites :meth:`StacIO.read_json` for deserializing a JSON file to a + dictionary while checking for duplicate object keys. + + Raises: + + pystac.DuplicateObjectKeyError : If a duplicate object key is found. + """ + txt = self.read_text(source, *args, **kwargs) + try: + return self.json_loads(txt, source=source) + except pystac.DuplicateObjectKeyError as e: + url = source if isinstance(source, str) else source.get_absolute_href() + msg = str(e) + f" in {url}" + raise pystac.DuplicateObjectKeyError(msg) - return report_duplicate_object_names + @staticmethod + def _report_duplicate_object_names( + object_pairs: List[Tuple[str, Any]] + ) -> Dict[str, Any]: + result: Dict[str, Any] = {} + for key, value in object_pairs: + if key in result: + raise pystac.DuplicateObjectKeyError( + f'Found duplicate object name "{key}"' + ) + else: + result[key] = value + return result class STAC_IO: diff --git a/tests/test_stac_io.py b/tests/test_stac_io.py index 376f8bd82..4091ab5ee 100644 --- a/tests/test_stac_io.py +++ b/tests/test_stac_io.py @@ -4,7 +4,7 @@ import tempfile import pystac -from pystac.stac_io import STAC_IO, StacIO +from pystac.stac_io import STAC_IO, StacIO, DefaultStacIO, DuplicateKeyReportingMixin from tests.utils import TestCases @@ -117,3 +117,31 @@ def test_read_from_stac_object(self) -> None: TestCases.get_path("data-files/catalogs/test-case-1/catalog.json") ) self.assertIsInstance(catalog, pystac.Catalog) + + def test_report_duplicate_keys(self) -> None: + # Directly from dict + class ReportingStacIO(DefaultStacIO, DuplicateKeyReportingMixin): + pass + + stac_io = ReportingStacIO() + test_json = """{ + "key": "value_1", + "key": "value_2" + }""" + + with self.assertRaises(pystac.DuplicateObjectKeyError) as excinfo: + stac_io.json_loads(test_json) + self.assertEqual(str(excinfo.exception), 'Found duplicate object name "key"') + + # From file + with tempfile.TemporaryDirectory() as tmp_dir: + src_href = os.path.join(tmp_dir, "test.json") + with open(src_href, "w") as dst: + dst.write(test_json) + + with self.assertRaises(pystac.DuplicateObjectKeyError) as excinfo: + stac_io.read_json(src_href) + self.assertEqual( + str(excinfo.exception), + f'Found duplicate object name "key" in {src_href}', + ) From 3d29abccce1e9eb5f9f4d754af8497db6892870c Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:40:52 -0400 Subject: [PATCH 4/6] Update I/O docs --- docs/api.rst | 50 ++++++++++++++++++ docs/concepts.rst | 130 +++++++++++++++++++++++++++------------------- pystac/stac_io.py | 89 +++++++++++++++++++++++-------- 3 files changed, 195 insertions(+), 74 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 91469842b..7cf487520 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -171,6 +171,20 @@ StacIO :members: :undoc-members: +DefaultStacIO +~~~~~~~~~~~~~ + +.. autoclass:: pystac.stac_io.DefaultStacIO + :members: + :show-inheritance: + +DuplicateKeyReportingMixin +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.stac_io.DuplicateKeyReportingMixin + :members: + :show-inheritance: + STAC_IO ~~~~~~~ @@ -213,11 +227,47 @@ STACError .. autoclass:: pystac.STACError +STACTypeError +~~~~~~~~~~~~~ + +.. autoclass:: pystac.STACTypeError + +DuplicateObjectKeyError +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.DuplicateObjectKeyError + +ExtensionAlreadyExistsError +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.ExtensionAlreadyExistsError + +ExtensionTypeError +~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.ExtensionTypeError + +ExtensionNotImplemented +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.ExtensionNotImplemented + ExtensionTypeError ~~~~~~~~~~~~~~~~~~ .. autoclass:: pystac.ExtensionTypeError +RequiredPropertyMissing +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.RequiredPropertyMissing + +STACValidationError +~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: pystac.STACValidationError + + Extensions ---------- diff --git a/docs/concepts.rst b/docs/concepts.rst index 5ada93996..7cd0c3057 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -225,72 +225,96 @@ written (e.g. if you are working with self-contained catalogs). .. _using stac_io: -Using STAC_IO +I/O in PySTAC ============= -The :class:`~pystac.STAC_IO` class is the way PySTAC reads and writes text from file -locations. Since PySTAC aims to be dependency-free, there is no default mechanisms to -read and write from anything but the local file system. However, users of PySTAC may -want to read and write from other file systems, such as HTTP or cloud object storage. -STAC_IO allows users to hook into PySTAC and define their own reading and writing -primitives to allow for those use cases. - -To enable reading from other types of file systems, it is recommended that in the -`__init__.py` of the client module, or at the beginning of the script using PySTAC, you -overwrite the :func:`STAC_IO.read_text_method ` and -:func:`STAC_IO.write_text_method ` members of STAC_IO -with functions that read and write however you need. For example, this code will allow +The :class:`pystac.StacIO` class defines fundamental methods for I/O +operations within PySTAC, including serialization and deserialization to and from +JSON files and conversion to and from Python dictionaries. This is an abstract class +and should not be instantiated directly. However, PySTAC provides a +:class:`pystac.stac_io.DefaultStacIO` class with minimal implementations of these +methods. This default implementation provides support for reading and writing files +from the local filesystem as well as HTTP URIs (using ``urllib``). This class is +created automatically by all of the object-specific I/O methods (e.g. +:meth:`pystac.Catalog.from_file`), so most users will not need to instantiate this +class themselves. + +If you require custom logic for I/O operations or would like to use a 3rd-party library +for I/O operations (e.g. ``requests``), you can create a sub-class of +:class:`pystac.StacIO` (or :class:`pystac.DefaultStacIO`) and customize the methods as +you see fit. You can then pass instances of this custom sub-class into the ``stac_io`` +argument of most object-specific I/O methods. You can also use +:meth:`pystac.StacIO.set_default` in your client's ``__init__.py`` file to make this +sub-class the default :class:`pystac.StacIO` implementation throughout the library. + +For example, this code will allow for reading from AWS's S3 cloud object storage using `boto3 -`_: +`__: .. code-block:: python from urllib.parse import urlparse import boto3 - from pystac import STAC_IO - - def my_read_method(uri): - parsed = urlparse(uri) - if parsed.scheme == 's3': - bucket = parsed.netloc - key = parsed.path[1:] - s3 = boto3.resource('s3') - obj = s3.Object(bucket, key) - return obj.get()['Body'].read().decode('utf-8') - else: - return STAC_IO.default_read_text_method(uri) - - def my_write_method(uri, txt): - parsed = urlparse(uri) - if parsed.scheme == 's3': - bucket = parsed.netloc - key = parsed.path[1:] - s3 = boto3.resource("s3") - s3.Object(bucket, key).put(Body=txt) - else: - STAC_IO.default_write_text_method(uri, txt) - - STAC_IO.read_text_method = my_read_method - STAC_IO.write_text_method = my_write_method - -If you are only going to read from another source, e.g. HTTP, you could only replace the -read method. For example, using the `requests library -`_: + from pystac import Link + from pystac.stac_io import DefaultStacIO, StacIO + + class CustomStacIO(DefaultStacIO): + def __init__(): + self.s3 = boto3.resource("s3") + + def read_text( + self, source: Union[str, Link], *args: Any, **kwargs: Any + ) -> str: + parsed = urlparse(uri) + if parsed.scheme == "s3": + bucket = parsed.netloc + key = parsed.path[1:] + + obj = self.s3.Object(bucket, key) + return obj.get()["Body"].read().decode("utf-8") + else: + return super().read_text(source, *args, **kwargs) + + def write_text( + self, dest: Union[str, Link], txt: str, *args: Any, **kwargs: Any + ) -> None: + parsed = urlparse(uri) + if parsed.scheme == "s3": + bucket = parsed.netloc + key = parsed.path[1:] + s3 = boto3.resource("s3") + s3.Object(bucket, key).put(Body=txt, ContentEncoding="utf-8") + else: + super().write_text(dest, txt, *args, **kwargs) + + StacIO.set_default(CustomStacIO) + + +If you only need to customize read operations you can inherit from +:class:`~pystac.stac_io.DefaultStacIO` and only overwrite the read method. For example, +to take advantage of connection pooling using a `requests.Session +`__: .. code-block:: python from urllib.parse import urlparse import requests - from pystac import STAC_IO - - def my_read_method(uri): - parsed = urlparse(uri) - if parsed.scheme.startswith('http'): - return requests.get(uri).text - else: - return STAC_IO.default_read_text_method(uri) - - STAC_IO.read_text_method = my_read_method + from pystac.stac_io import DefaultStacIO, StacIO + + class ConnectionPoolingIO(DefaultStacIO): + def __init__(): + self.session = requests.Session() + + def read_text( + self, source: Union[str, Link], *args: Any, **kwargs: Any + ) -> str: + parsed = urlparse(uri) + if parsed.scheme.startswith("http"): + return self.session.get(uri).text + else: + return super().read_text(source, *args, **kwargs) + + StacIO.set_default(ConnectionPoolingIO) Validation ========== diff --git a/pystac/stac_io.py b/pystac/stac_io.py index 747aff8a4..bb7a9db54 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -46,14 +46,19 @@ def read_text( ) -> str: """Read text from the given URI. - The source to read from can be specified - as a string or a Link. If it's a string, it's the URL of the HREF from which to - read. When reading links, PySTAC will pass in the entire link body. - This enables implementations to utilize additional link information, - e.g. the "post" information in a pagination link from a STAC API search. + The source to read from can be specified as a string or a + :class:`~pystac.Link`. If it is a string, it must be a URI or local path from + which to read. Using a :class:`~pystac.Link` enables implementations to use + additional link information, such as paging information contained in the + extended links described in the `STAC API spec + `__. Args: source : The source to read from. + *args : Arbitrary positional arguments that may be utilized by the concrete + implementation. + **kwargs : Arbitrary keyword arguments that may be utilized by the concrete + implementation. Returns: str: The text contained in the file at the location specified by the uri. @@ -66,10 +71,10 @@ def write_text( ) -> None: """Write the given text to a file at the given URI. - The destination to write to from can be specified - as a string or a Link. If it's a string, it's the URL of the HREF from which to - read. When writing based on links links, PySTAC will pass in the entire - link body. + The destination to write to from can be specified as a string or a + :class:`~pystac.Link`. If it is a string, it must be a URI or local path from + which to read. Using a :class:`~pystac.Link` enables implementations to use + additional link information. Args: dest : The destination to write to. @@ -122,6 +127,21 @@ def stac_object_from_dict( root: Optional["Catalog_Type"] = None, preserve_dict: bool = True, ) -> "STACObject_Type": + """Deserializes a :class:`~pystac.STACObject` sub-class instance from a + dictionary. + + Args: + + d : The dictionary to deserialize + href : Optional href to associate with the STAC object + root : Optional root :class:`~pystac.Catalog` to associate with the + STAC object. + preserve_dict: If ``False``, the dict parameter ``d`` may be modified + during this method call. Otherwise the dict is not mutated. + Defaults to ``True``, which results results in a deepcopy of the + parameter. Set to ``False`` when possible to avoid the performance + hit of a deepcopy. + """ if identify_stac_object_type(d) == pystac.STACObjectType.ITEM: collection_cache = None if root is not None: @@ -244,8 +264,11 @@ def default(cls) -> "StacIO": class DefaultStacIO(StacIO): def read_text( - self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any + self, source: Union[str, "Link_Type"], *_: Any, **__: Any ) -> str: + """A concrete implementation of :meth:`StacIO.read_text `. Converts the + ``source`` argument to a string (if it is not already) and delegates to + :meth:`DefaultStacIO.read_text_from_href` for opening and reading the file.""" href: Optional[str] if isinstance(source, str): href = source @@ -253,9 +276,19 @@ def read_text( href = source.get_absolute_href() if href is None: raise IOError(f"Could not get an absolute HREF from link {source}") - return self.read_text_from_href(href, *args, **kwargs) + return self.read_text_from_href(href) + + def read_text_from_href(self, href: str) -> str: + """Reads file as a UTF-8 string. + + If ``href`` has a "scheme" (e.g. if it starts with "https://") then this will + use :func:`urllib.request.urlopen` to open the file and read the contents; + otherwise, :func:`open` will be used to open a local file. + + Args: - def read_text_from_href(self, href: str, *args: Any, **kwargs: Any) -> str: + href : The URI of the file to open. + """ parsed = safe_urlparse(href) href_contents: str if parsed.scheme != "": @@ -270,8 +303,11 @@ def read_text_from_href(self, href: str, *args: Any, **kwargs: Any) -> str: return href_contents def write_text( - self, dest: Union[str, "Link_Type"], txt: str, *args: Any, **kwargs: Any + self, dest: Union[str, "Link_Type"], txt: str, *_: Any, **__: Any ) -> None: + """A concrete implementation of :meth:`StacIO.write_text `. Converts the + ``dest`` argument to a string (if it is not already) and delegates to + :meth:`DefaultStacIO.write_text_from_href` for opening and reading the file.""" href: Optional[str] if isinstance(dest, str): href = dest @@ -279,11 +315,21 @@ def write_text( href = dest.get_absolute_href() if href is None: raise IOError(f"Could not get an absolute HREF from link {dest}") - return self.write_text_to_href(href, txt, *args, **kwargs) + return self.write_text_to_href(href, txt) def write_text_to_href( - self, href: str, txt: str, *args: Any, **kwargs: Any + self, href: str, txt: str ) -> None: + """Writes text to file using UTF-8 encoding. + + This implementation uses :func:`open` and therefore can only write to the local + file system. + + Args: + + href : The path to which the file will be written. + txt : The string content to write to the file. + """ dirname = os.path.dirname(href) if dirname != "" and not os.path.isdir(dirname): os.makedirs(dirname) @@ -292,16 +338,16 @@ def write_text_to_href( class DuplicateKeyReportingMixin(StacIO): - """A mixin for StacIO implementations that will report + """A mixin for :class:`pystac.StacIO` implementations that will report on duplicate keys in the JSON being read in. See https://github.com/stac-utils/pystac/issues/313 """ def json_loads(self, txt: str, *_: Any, **__: Any) -> Dict[str, Any]: - """Overwrites :meth:`StacIO.json_loads` as the internal method used by - :class:`DuplicateKeyReportingMixin` for deserializing a JSON string to a - dictionary while checking for duplicate object keys. + """Overwrites :meth:`StacIO.json_loads ` as the + internal method used by :class:`DuplicateKeyReportingMixin` for deserializing + a JSON string to a dictionary while checking for duplicate object keys. Raises: @@ -315,8 +361,9 @@ def json_loads(self, txt: str, *_: Any, **__: Any) -> Dict[str, Any]: def read_json( self, source: Union[str, "Link_Type"], *args: Any, **kwargs: Any ) -> Dict[str, Any]: - """Overwrites :meth:`StacIO.read_json` for deserializing a JSON file to a - dictionary while checking for duplicate object keys. + """Overwrites :meth:`StacIO.read_json ` for + deserializing a JSON file to a dictionary while checking for duplicate object + keys. Raises: From ad38b05370ff7bea2aff537efe7ffefdf84470ca Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:40:52 -0400 Subject: [PATCH 5/6] Fix lint issues --- docs/concepts.rst | 8 ++++---- pystac/stac_io.py | 24 +++++++++++------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/docs/concepts.rst b/docs/concepts.rst index 7cd0c3057..0fe9a9288 100644 --- a/docs/concepts.rst +++ b/docs/concepts.rst @@ -269,7 +269,7 @@ for reading from AWS's S3 cloud object storage using `boto3 if parsed.scheme == "s3": bucket = parsed.netloc key = parsed.path[1:] - + obj = self.s3.Object(bucket, key) return obj.get()["Body"].read().decode("utf-8") else: @@ -288,7 +288,7 @@ for reading from AWS's S3 cloud object storage using `boto3 super().write_text(dest, txt, *args, **kwargs) StacIO.set_default(CustomStacIO) - + If you only need to customize read operations you can inherit from :class:`~pystac.stac_io.DefaultStacIO` and only overwrite the read method. For example, @@ -304,7 +304,7 @@ to take advantage of connection pooling using a `requests.Session class ConnectionPoolingIO(DefaultStacIO): def __init__(): self.session = requests.Session() - + def read_text( self, source: Union[str, Link], *args: Any, **kwargs: Any ) -> str: @@ -313,7 +313,7 @@ to take advantage of connection pooling using a `requests.Session return self.session.get(uri).text else: return super().read_text(source, *args, **kwargs) - + StacIO.set_default(ConnectionPoolingIO) Validation diff --git a/pystac/stac_io.py b/pystac/stac_io.py index bb7a9db54..3c4485ff9 100644 --- a/pystac/stac_io.py +++ b/pystac/stac_io.py @@ -50,7 +50,7 @@ def read_text( :class:`~pystac.Link`. If it is a string, it must be a URI or local path from which to read. Using a :class:`~pystac.Link` enables implementations to use additional link information, such as paging information contained in the - extended links described in the `STAC API spec + extended links described in the `STAC API spec `__. Args: @@ -263,12 +263,11 @@ def default(cls) -> "StacIO": class DefaultStacIO(StacIO): - def read_text( - self, source: Union[str, "Link_Type"], *_: Any, **__: Any - ) -> str: - """A concrete implementation of :meth:`StacIO.read_text `. Converts the - ``source`` argument to a string (if it is not already) and delegates to - :meth:`DefaultStacIO.read_text_from_href` for opening and reading the file.""" + def read_text(self, source: Union[str, "Link_Type"], *_: Any, **__: Any) -> str: + """A concrete implementation of :meth:`StacIO.read_text + `. Converts the ``source`` argument to a string (if it + is not already) and delegates to :meth:`DefaultStacIO.read_text_from_href` for + opening and reading the file.""" href: Optional[str] if isinstance(source, str): href = source @@ -305,9 +304,10 @@ def read_text_from_href(self, href: str) -> str: def write_text( self, dest: Union[str, "Link_Type"], txt: str, *_: Any, **__: Any ) -> None: - """A concrete implementation of :meth:`StacIO.write_text `. Converts the - ``dest`` argument to a string (if it is not already) and delegates to - :meth:`DefaultStacIO.write_text_from_href` for opening and reading the file.""" + """A concrete implementation of :meth:`StacIO.write_text + `. Converts the ``dest`` argument to a string (if it + is not already) and delegates to :meth:`DefaultStacIO.write_text_from_href` for + opening and reading the file.""" href: Optional[str] if isinstance(dest, str): href = dest @@ -317,9 +317,7 @@ def write_text( raise IOError(f"Could not get an absolute HREF from link {dest}") return self.write_text_to_href(href, txt) - def write_text_to_href( - self, href: str, txt: str - ) -> None: + def write_text_to_href(self, href: str, txt: str) -> None: """Writes text to file using UTF-8 encoding. This implementation uses :func:`open` and therefore can only write to the local From 2519e034ccde0d6b73c95a0e21da368b234031e1 Mon Sep 17 00:00:00 2001 From: Jon Duckworth Date: Wed, 23 Jun 2021 22:42:10 -0400 Subject: [PATCH 6/6] Add CHANGELOG entry for #471 --- CHANGELOG.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f23fbcb89..564bc4264 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,18 @@ ### Added -- Add a `preserve_dict` parameter to `ItemCollection.from_dict` and set it to False when using `ItemCollection.from_file`. ([#468](https://github.com/stac-utils/pystac/pull/468)) +- Add a `preserve_dict` parameter to `ItemCollection.from_dict` and set it to False when + using `ItemCollection.from_file`. + ([#468](https://github.com/stac-utils/pystac/pull/468)) +- `StacIO.json_dumps` and `StacIO.json_loads` methods for JSON + serialization/deserialization. These were "private" methods, but are now "public" and + documented ([#471](https://github.com/stac-utils/pystac/pull/471)) ### Changed +- `pystac.stac_io.DuplicateObjectKeyError` moved to `pystac.DuplicateObjectKeyError` + ([#471](https://github.com/stac-utils/pystac/pull/471)) + ### Fixed ### Removed