Skip to content

Commit

Permalink
Merge branch 'develop' into add-file-repository
Browse files Browse the repository at this point in the history
  • Loading branch information
wvenialbo authored Oct 25, 2024
2 parents b96f05a + 49608f2 commit f18ead4
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 15 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ other applications.
1. **GOES 2nd Generation (GOES-8 to GOES-15)**: Also known as the I to P
Series, these datasets provide environmental monitoring and meteorological
data for the Western Hemisphere [[4](#goesi)].

2. **GOES 3rd Generation (GOES-16 to GOES-18)**: Also known as the R to U
Series, these satellites offer advanced imagery and atmospheric measurements
with better spatial, spectral, and temporal resolution [[7](#goesr)].
Expand Down Expand Up @@ -102,6 +102,9 @@ files2 = downloader.get_files(
start="2012-08-23T00:00-0004",
end="2012-08-24T00:00-0004",
)

# `files1` and files2` are lists of tuple[str, bytes] with file path and
# file content, respectively. The file path is relative to the base URL.
```

### 2. Download GOES 3rd Generation Data
Expand Down Expand Up @@ -130,6 +133,9 @@ files2 = downloader.get_files(
start="2024-08-23T00:00:00-0004", # use the default date format
end="2024-08-24T00:00:00-0004",
)

# `files1` and files2` are lists of tuple[str, bytes] with file path and
# file content, respectively. The file path is relative to the base URL.
```

### 3. Download GridSat-B1 Data
Expand Down Expand Up @@ -159,6 +165,9 @@ files2 = downloader.get_files(
start="1984-08-23T00:00-0004",
end="1984-08-24T00:00-0004",
)

# `files1` and files2` are lists of tuple[str, bytes] with file path and
# file content, respectively. The file path is relative to the base URL.
```

## Pipeline and parameters
Expand All @@ -182,6 +191,7 @@ The general workflow for downloading data using **GOES-DL** is as follows:
date format to be used in the download process.

The Downloader.get_files method accepts the following parameters:

- **start_time**: A string specifying the starting date for the dataset to be
downloaded.
- **end_time**: A string specifying the ending date for the dataset to be
Expand Down
5 changes: 2 additions & 3 deletions src/GOES_DL/datasource/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any


@dataclass(eq=False, frozen=True)
Expand Down Expand Up @@ -37,7 +36,7 @@ class Datasource(ABC):
base_url: str

@abstractmethod
def get_file(self, file_path: str) -> Any:
def get_file(self, file_path: str) -> bytes:
"""
Get a file.
Expand All @@ -51,7 +50,7 @@ def get_file(self, file_path: str) -> Any:
Returns
-------
Any
bytes
The file object.
"""

Expand Down
2 changes: 1 addition & 1 deletion src/GOES_DL/datasource/datasource_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def get_file(self, file_path: str) -> bytes:
Returns
-------
Any
bytes
The file object.
Raises
Expand Down
26 changes: 16 additions & 10 deletions src/GOES_DL/downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import os
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any

from ..dataset import ProductLocator
from ..datasource import Datasource
Expand Down Expand Up @@ -69,9 +68,12 @@ def __post_init__(self) -> None:
"""
Validate the downloader object.
"""
assert self.time_tolerance >= 0
if self.time_tolerance < 0:
raise ValueError("time_tolerance must be non-negative")

def get_files(self, *, start: str, end: str = "") -> list[Any]:
def get_files(
self, *, start: str, end: str = ""
) -> list[tuple[str, bytes]]:
"""
Get the files from the datasource.
Expand All @@ -97,8 +99,10 @@ def get_files(self, *, start: str, end: str = "") -> list[Any]:
Returns
-------
list[Any]
A list with the file objects.
list[tuple[str, bytes]]
A list of tuples with the file paths and the file objects in
the directory that match the timestamps between `start` and
`end`.
Raises
------
Expand All @@ -114,7 +118,9 @@ def get_files(self, *, start: str, end: str = "") -> list[Any]:
"""
files_in_range: list[str] = self.get_file_list(start, end)

return self.retrieve_files(files_in_range)
retrieved_files: list[bytes] = self.retrieve_files(files_in_range)

return list(zip(files_in_range, retrieved_files))

def get_file_list(self, start_time: str, end_time: str = "") -> list[str]:
"""
Expand Down Expand Up @@ -166,7 +172,7 @@ def get_file_list(self, start_time: str, end_time: str = "") -> list[str]:
datetime_ini, datetime_fin, files
)

def retrieve_files(self, file_paths: list[str]) -> list[Any]:
def retrieve_files(self, file_paths: list[str]) -> list[bytes]:
"""
Retrieve the files from the datasource.
Expand All @@ -180,7 +186,7 @@ def retrieve_files(self, file_paths: list[str]) -> list[Any]:
Returns
-------
list[Any]
list[bytes]
A list with the file objects.
Raises
Expand All @@ -190,10 +196,10 @@ def retrieve_files(self, file_paths: list[str]) -> list[Any]:
e.g. if the file does not exist in the datasource or an
internal error occurred.
"""
file_objects: list[Any] = []
file_objects: list[bytes] = []

for file in file_paths:
file_object: Any = self.datasource.get_file(file)
file_object: bytes = self.datasource.get_file(file)
file_objects.append(file_object)

return file_objects
Expand Down

0 comments on commit f18ead4

Please sign in to comment.