From 217b979a8c08960d20947c93368ee695d688c368 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 09:03:49 -0300 Subject: [PATCH 1/9] refactor(downloader): handle negative tolerance Raise a ValueError if the time_tolerance value is negative in the Downloader class. --- src/GOES_DL/downloader/downloader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/GOES_DL/downloader/downloader.py b/src/GOES_DL/downloader/downloader.py index 96c83c5..552e42b 100644 --- a/src/GOES_DL/downloader/downloader.py +++ b/src/GOES_DL/downloader/downloader.py @@ -69,7 +69,8 @@ def __post_init__(self) -> None: """ Validate the downloader object. """ - assert self.time_tolerance >= 0 + if self.time_tolerance < 0: + raise ValueError("time_tolerance must be non-negative") def get_files(self, *, start: str, end: str = "") -> list[Any]: """ From ba25f3146778aa06a1247affa79ccc6ee9f8d927 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:03:17 -0300 Subject: [PATCH 2/9] refactor(datasource): update get_file return type to bytes --- src/GOES_DL/datasource/datasource.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/GOES_DL/datasource/datasource.py b/src/GOES_DL/datasource/datasource.py index 85453b4..4a72b85 100644 --- a/src/GOES_DL/datasource/datasource.py +++ b/src/GOES_DL/datasource/datasource.py @@ -7,7 +7,6 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any @dataclass(eq=False, frozen=True) @@ -37,7 +36,7 @@ class Datasource(ABC): base_url: str @abstractmethod - def get_file(self, file_path: str) -> Any: + def get_file(self, file_path: str) -> bytes: """ Get a file. @@ -51,7 +50,7 @@ def get_file(self, file_path: str) -> Any: Returns ------- - Any + bytes The file object. """ From 662b6c423434f07f0bf00367cbf398d7a442e7b9 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:03:33 -0300 Subject: [PATCH 3/9] refactor(datasource): update get_file return type to bytes --- src/GOES_DL/datasource/datasource_http.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/GOES_DL/datasource/datasource_http.py b/src/GOES_DL/datasource/datasource_http.py index f3d5e53..355c724 100644 --- a/src/GOES_DL/datasource/datasource_http.py +++ b/src/GOES_DL/datasource/datasource_http.py @@ -7,7 +7,7 @@ import re import socket -from typing import Any, overload +from typing import overload from urllib.parse import ParseResult import requests @@ -122,7 +122,7 @@ def create( cache = DatasourceCache(life_time) return DatasourceHTTP(locator, cache) - def get_file(self, file_path: str) -> Any: + def get_file(self, file_path: str) -> bytes: """ Download a file into memory. @@ -136,7 +136,7 @@ def get_file(self, file_path: str) -> Any: Returns ------- - Any + bytes The file object. Raises From 53ba582d9eef55f0fa7472297077ab6cc230b349 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:03:44 -0300 Subject: [PATCH 4/9] refactor(datasource): update get_file return type to bytes --- src/GOES_DL/datasource/datasource_aws.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/GOES_DL/datasource/datasource_aws.py b/src/GOES_DL/datasource/datasource_aws.py index 770c621..8d47a0f 100644 --- a/src/GOES_DL/datasource/datasource_aws.py +++ b/src/GOES_DL/datasource/datasource_aws.py @@ -212,7 +212,7 @@ def _get_client(region: str | None) -> Any: config=Config(signature_version=UNSIGNED), ) - def get_file(self, file_path: str) -> Any: + def get_file(self, file_path: str) -> bytes: """ Download a file into memory. @@ -226,7 +226,7 @@ def get_file(self, file_path: str) -> Any: Returns ------- - Any + bytes The file object. Raises @@ -240,7 +240,7 @@ def get_file(self, file_path: str) -> Any: response: Any = self.s3_client.get_object( Bucket=self.bucket_name, Key=folder_path ) - return response["Body"].read() + return bytes(response["Body"].read()) except ClientError as exc: message: str = f"Unable to retrieve the file '{file_path}': {exc}" From 0f58047c88fbf6ac0934d761358db3c1a8999f3c Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:06:48 -0300 Subject: [PATCH 5/9] refactor(downloader): update retrieve_files return type to bytes --- src/GOES_DL/downloader/downloader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/GOES_DL/downloader/downloader.py b/src/GOES_DL/downloader/downloader.py index 96c83c5..e892a1b 100644 --- a/src/GOES_DL/downloader/downloader.py +++ b/src/GOES_DL/downloader/downloader.py @@ -166,7 +166,7 @@ def get_file_list(self, start_time: str, end_time: str = "") -> list[str]: datetime_ini, datetime_fin, files ) - def retrieve_files(self, file_paths: list[str]) -> list[Any]: + def retrieve_files(self, file_paths: list[str]) -> list[bytes]: """ Retrieve the files from the datasource. @@ -180,7 +180,7 @@ def retrieve_files(self, file_paths: list[str]) -> list[Any]: Returns ------- - list[Any] + list[bytes] A list with the file objects. Raises @@ -190,10 +190,10 @@ def retrieve_files(self, file_paths: list[str]) -> list[Any]: e.g. if the file does not exist in the datasource or an internal error occurred. """ - file_objects: list[Any] = [] + file_objects: list[bytes] = [] for file in file_paths: - file_object: Any = self.datasource.get_file(file) + file_object: bytes = self.datasource.get_file(file) file_objects.append(file_object) return file_objects From c83f47a018227e52aa9dbe5e7541a6340ee48e69 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:08:42 -0300 Subject: [PATCH 6/9] refactor(downloader): update get_files return type to bytes --- src/GOES_DL/downloader/downloader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/GOES_DL/downloader/downloader.py b/src/GOES_DL/downloader/downloader.py index e892a1b..66c142f 100644 --- a/src/GOES_DL/downloader/downloader.py +++ b/src/GOES_DL/downloader/downloader.py @@ -13,7 +13,6 @@ import os from dataclasses import dataclass from datetime import datetime, timedelta -from typing import Any from ..dataset import ProductLocator from ..datasource import Datasource @@ -71,7 +70,7 @@ def __post_init__(self) -> None: """ assert self.time_tolerance >= 0 - def get_files(self, *, start: str, end: str = "") -> list[Any]: + def get_files(self, *, start: str, end: str = "") -> list[bytes]: """ Get the files from the datasource. From 897086c6c5d08f687e28e383e9a7bc06bc1b13ff Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:10:29 -0300 Subject: [PATCH 7/9] refactor(downloader): update return type of get_files to bytes --- src/GOES_DL/downloader/downloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GOES_DL/downloader/downloader.py b/src/GOES_DL/downloader/downloader.py index 66c142f..44e1d21 100644 --- a/src/GOES_DL/downloader/downloader.py +++ b/src/GOES_DL/downloader/downloader.py @@ -96,7 +96,7 @@ def get_files(self, *, start: str, end: str = "") -> list[bytes]: Returns ------- - list[Any] + list[bytes] A list with the file objects. Raises From d0f71b69f1f16b01a23fb808b0dd595b5b7948b2 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:23:58 -0300 Subject: [PATCH 8/9] refactor(downloader): update get_files return type to include file paths Update the return type of the `get_files` method in the `Downloader` class to include both the file paths and the file objects. This change is made to provide more information about the files retrieved from the datasource. The return type is now a list of tuples, where each tuple contains the file path and the corresponding file object. Refactor the `get_files` method to retrieve the file paths using the `get_file_list` method and then retrieve the file objects using the `retrieve_files` method. The retrieved file objects are then zipped with their respective file paths and returned as a list of tuples. This change improves the clarity and usability of the `get_files` method in the `Downloader` class. --- src/GOES_DL/downloader/downloader.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/GOES_DL/downloader/downloader.py b/src/GOES_DL/downloader/downloader.py index 44e1d21..a8a4d31 100644 --- a/src/GOES_DL/downloader/downloader.py +++ b/src/GOES_DL/downloader/downloader.py @@ -70,7 +70,9 @@ def __post_init__(self) -> None: """ assert self.time_tolerance >= 0 - def get_files(self, *, start: str, end: str = "") -> list[bytes]: + def get_files( + self, *, start: str, end: str = "" + ) -> list[tuple[str, bytes]]: """ Get the files from the datasource. @@ -96,8 +98,10 @@ def get_files(self, *, start: str, end: str = "") -> list[bytes]: Returns ------- - list[bytes] - A list with the file objects. + list[tuple[str, bytes]] + A list of tuples with the file paths and the file objects in + the directory that match the timestamps between `start` and + `end`. Raises ------ @@ -113,7 +117,9 @@ def get_files(self, *, start: str, end: str = "") -> list[bytes]: """ files_in_range: list[str] = self.get_file_list(start, end) - return self.retrieve_files(files_in_range) + retrieved_files: list[bytes] = self.retrieve_files(files_in_range) + + return list(zip(files_in_range, retrieved_files)) def get_file_list(self, start_time: str, end_time: str = "") -> list[str]: """ From 21133d69df910a7c432de61c8ddb57a2663839e2 Mon Sep 17 00:00:00 2001 From: wvenialbo Date: Wed, 23 Oct 2024 11:32:54 -0300 Subject: [PATCH 9/9] docs(downloader): update README.md --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 374232c..246886a 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ other applications. 1. **GOES 2nd Generation (GOES-8 to GOES-15)**: Also known as the I to P Series, these datasets provide environmental monitoring and meteorological data for the Western Hemisphere [[4](#goesi)]. - + 2. **GOES 3rd Generation (GOES-16 to GOES-18)**: Also known as the R to U Series, these satellites offer advanced imagery and atmospheric measurements with better spatial, spectral, and temporal resolution [[7](#goesr)]. @@ -102,6 +102,9 @@ files2 = downloader.get_files( start="2012-08-23T00:00-0004", end="2012-08-24T00:00-0004", ) + +# `files1` and files2` are lists of tuple[str, bytes] with file path and +# file content, respectively. The file path is relative to the base URL. ``` ### 2. Download GOES 3rd Generation Data @@ -130,6 +133,9 @@ files2 = downloader.get_files( start="2024-08-23T00:00:00-0004", # use the default date format end="2024-08-24T00:00:00-0004", ) + +# `files1` and files2` are lists of tuple[str, bytes] with file path and +# file content, respectively. The file path is relative to the base URL. ``` ### 3. Download GridSat-B1 Data @@ -159,6 +165,9 @@ files2 = downloader.get_files( start="1984-08-23T00:00-0004", end="1984-08-24T00:00-0004", ) + +# `files1` and files2` are lists of tuple[str, bytes] with file path and +# file content, respectively. The file path is relative to the base URL. ``` ## Pipeline and parameters @@ -182,6 +191,7 @@ The general workflow for downloading data using **GOES-DL** is as follows: date format to be used in the download process. The Downloader.get_files method accepts the following parameters: + - **start_time**: A string specifying the starting date for the dataset to be downloaded. - **end_time**: A string specifying the ending date for the dataset to be