Skip to content

Commit

Permalink
Merge pull request #12 from wvenialbo/develop
Browse files Browse the repository at this point in the history
Update settings-driven product locator builder branch
  • Loading branch information
wvenialbo authored Oct 30, 2024
2 parents 111cf8a + afc77ed commit 0b8abcf
Show file tree
Hide file tree
Showing 15 changed files with 972 additions and 448 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,9 @@
"yaml": true,
},
"vsintellicode.python.completionsEnabled": true,
"vsintellicode.java.completionsEnabled": false,
"vsintellicode.sql.completionsEnabled": false,
"vsintellicode.typescript.completionsEnabled": false,
// Productivity tools settings
"diffEditor.diffAlgorithm": "advanced",
"diffEditor.wordWrap": "off",
Expand Down
54 changes: 47 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Satellites - R Series][4].

## Installation

To install **GOES-DL**, use `pip` *(not yet)*:
To install **GOES-DL**, use `pip`:

```bash
pip install goes-dl
Expand All @@ -75,7 +75,7 @@ pip install goes-dl
Below are examples of how to use the GOES-DL package to download data from each
of the supported sources.

### 1. Download GOES 2nd Generation Data
### 1. Download GOES 2nd Generation Data (from NOAA's NCEI archive)

```python
# Import the locator and datasource according to your desired product
Expand All @@ -86,7 +86,7 @@ from GOES_DL.downloader import Downloader
# Initialize the downloader for GridSat-GOES (GOES-12, Full Disk)
locator = GridSatProductLocatorGC("F", "G12")

datasource = DatasourceHTTP(locator)
datasource = DatasourceHTTP(locator, repository="./my_data/gridsat-gc")

downloader = Downloader(
datasource=datasource,
Expand All @@ -107,7 +107,7 @@ files2 = downloader.get_files(
# file content, respectively. The file path is relative to the base URL.
```

### 2. Download GOES 3rd Generation Data
### 2. Download GOES 3rd Generation Data (from NOAA's AWS archive)

```python
# Import the locator and datasource according to your desired product
Expand All @@ -118,7 +118,9 @@ from GOES_DL.downloader import Downloader
# Initialize the downloader for GOES-R Series (set your desired product)
locator = GOESProductLocatorABIPP("CMIP", "F", ["C02", "C08", "C13"], "G16")

datasource = DatasourceAWS(locator)
# GOES-16 data is updated every 10 minutes. If you are downloading
# old data, you may leave the cache refresh rate as default (+inf).
datasource = DatasourceAWS(locator, repository="./my_data/goes-r", cache=600)

downloader = Downloader(
datasource=datasource,
Expand All @@ -138,7 +140,7 @@ files2 = downloader.get_files(
# file content, respectively. The file path is relative to the base URL.
```

### 3. Download GridSat-B1 Data
### 3. Download GridSat-B1 Data (from NOAA's AWS archive)

```python
# Import the locator and datasource according to your desired product
Expand All @@ -149,7 +151,45 @@ from GOES_DL.downloader import Downloader
# Initialize the downloader for GridSat-B1
locator = GridSatProductLocatorB1()

datasource = DatasourceAWS(locator) # also available in HTTP from NCEI
# Also available in HTTP from NCEI's archive, see next example
datasource = DatasourceAWS(locator, repository="./my_data/gridsat-b1")

downloader = Downloader(
datasource=datasource,
locator=locator,
date_format="%Y-%m-%dT%H:%M%z",
)

# Set your desired date...
files1 = downloader.get_files(start="1984-08-23T00:00Z")

# ...or your desired date range
files2 = downloader.get_files(
start="1984-08-23T00:00-0004",
end="1984-08-24T00:00-0004",
)

# `files1` and files2` are lists of tuple[str, bytes] with file path and
# file content, respectively. The file path is relative to the base URL.
```

### 4. Download GridSat-B1 Data (from NOAA's NCEI archive)

```python
# Import the locator and datasource according to your desired product
from GOES_DL.dataset.gridsat import GridSatProductLocatorB1
from GOES_DL.datasource import DatasourceHTTP
from GOES_DL.downloader import Downloader

# Initialize the downloader for GridSat-B1
locator = GridSatProductLocatorB1()

# NCEI archive has the same folder structure as AWS, so, if you have
# downloaded data from AWS, you can use the same locator and change the
# datasource to HTTP. If a file is not found in the local repository, it
# will be downloaded from the remote datasource. In all previous examples,
# if a file was already downloaded, it will not be downloaded again.
datasource = DatasourceHTTP(locator, repository="./my_data/gridsat-b1")

downloader = Downloader(
datasource=datasource,
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
boto3~=1.35.43
boto3~=1.35.47
mypy-boto3-s3~=1.35.46
requests~=2.32.3
2 changes: 1 addition & 1 deletion src/GOES_DL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

__package_id__ = "GOES-DL"
__package_name__ = f"GOES Dataset Downloader - {__package_id__}"
__version__ = "v0.1-rc1"
__version__ = "v0.1-rc2"
6 changes: 6 additions & 0 deletions src/GOES_DL/dataset/base/locator_gg.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,12 @@ def _validate_channels(
) -> None:
cls._validate_set("channel", channel, available_channels)

@classmethod
def _validate_datasource(
cls, datasource: str, available_datasources: Iterable[str]
) -> None:
cls._validate_entity("datasource", datasource, available_datasources)

@classmethod
def _validate_instrument(
cls, instrument: str, available_instruments: Iterable[str]
Expand Down
6 changes: 5 additions & 1 deletion src/GOES_DL/dataset/goes/locator.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ class GOESProductLocator(ProductLocatorGG):
MESO_1 = "M1"
MESO_2 = "M2"

# WARNING: This is being override by GOESProductLocatorABI in this
# release due to an incomplete refactoring. This will be
# fixed in future releases. Nevertheless, the methods that
# use this attribute was fixed to use the correct values.
AVAILABLE_SCENES: dict[str, str] = {
FULL_DISK: FULL_DISK,
CONUS: CONUS,
Expand Down Expand Up @@ -262,7 +266,7 @@ def get_base_url(self, datasource: str) -> tuple[str, ...]:
f"Supported datasources: {supported_datasources}"
)

scene: str = self.AVAILABLE_SCENES[self.scene] if self.scene else ""
scene: str = self.scene
product: str = f"{self.instrument}-{self.level}-{self.name}{scene}"
satellite: str = self.AVAILABLE_ORIGINS[self.origin]

Expand Down
33 changes: 25 additions & 8 deletions src/GOES_DL/datasource/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class Datasource(ABC):
Abstract a datasource object.
This class defines the interface for a datasource object. The
datasource is responsible for listing the contents of a directory
in a remote location and for downloading files from that location.
datasource is responsible for listing the contents of a directory in
a remote location and for downloading files from that location.
Attributes
----------
Expand All @@ -27,21 +27,38 @@ class Datasource(ABC):
Methods
-------
download_file(file_path: str)
Retrieve a file from the datasource and save it into the local
repository.
get_file(file_path: str)
Get a file from the datasource.
Get a file from the datasource or local repository.
listdir(dir_path: str)
List the contents of a directory.
List the contents of a remote directory.
"""

base_url: str

@abstractmethod
def download_file(self, file_path: str) -> None:
"""
Download a file from the datasource into the local repository.
Get a file from a remote location or local repository. The path
is relative to the base URL and local repository root directory.
Parameters
----------
file_path : str
The path to the file. The path is relative to the base URL.
"""

@abstractmethod
def get_file(self, file_path: str) -> bytes:
"""
Get a file.
Get a file from the datasource or local repository.
Get a file from a remote location. The path is relative to the
base URL.
Get a file from a remote location or local repository. The path
is relative to the base URL and local repository root directory.
Parameters
----------
Expand All @@ -57,7 +74,7 @@ def get_file(self, file_path: str) -> bytes:
@abstractmethod
def listdir(self, dir_path: str) -> list[str]:
"""
List the contents of a directory.
List the contents of a remote directory.
List the contents of a directory in a remote location. The path
is relative to the base URL.
Expand Down
Loading

0 comments on commit 0b8abcf

Please sign in to comment.