Skip to content

Commit

Permalink
style(repo): ModelRepository -> RawRepository
Browse files Browse the repository at this point in the history
  • Loading branch information
devsjc committed Dec 17, 2024
1 parent ea80153 commit 50c52c4
Show file tree
Hide file tree
Showing 24 changed files with 205 additions and 187 deletions.
14 changes: 7 additions & 7 deletions src/nwp_consumer/cmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,29 @@

class Adaptors(NamedTuple):
"""Adaptors for the CLI."""
model_repository: type[ports.ModelRepository]
model_repository: type[ports.RawRepository]
notification_repository: type[ports.NotificationRepository]

def parse_env() -> Adaptors:
"""Parse from the environment."""
model_repository_adaptor: type[ports.ModelRepository]
model_repository_adaptor: type[ports.RawRepository]
match os.getenv("MODEL_REPOSITORY"):
# Default to NOAA S3 as it is freely accessible
case None | "gfs":
model_repository_adaptor = \
repositories.model_repositories.NOAAS3ModelRepository
repositories.model_repositories.NOAAS3RawRepository
case "ceda":
model_repository_adaptor = \
repositories.model_repositories.CEDAFTPModelRepository
repositories.model_repositories.CEDAFTPRawRepository
case "ecmwf-realtime":
model_repository_adaptor = \
repositories.model_repositories.ECMWFRealTimeS3ModelRepository
repositories.model_repositories.ECMWFRealTimeS3RawRepository
case "metoffice-datahub":
model_repository_adaptor = \
repositories.model_repositories.MetOfficeDatahubModelRepository
repositories.model_repositories.MetOfficeDatahubRawRepository
case "ecmwf-mars":
model_repository_adaptor = \
repositories.model_repositories.ECMWFMARSModelRepository
repositories.model_repositories.ECMWFMARSRawRepository
case _ as mr:
log.error(
f"Unknown model repository '{mr}'. Expected one of "
Expand Down
4 changes: 2 additions & 2 deletions src/nwp_consumer/internal/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
should not contain any logic that is specific to a particular implementation.
"""

from .repometadata import ModelRepositoryMetadata
from .repometadata import RawRepositoryMetadata
from .modelmetadata import ModelMetadata, Models
from .tensorstore import ParameterScanResult, TensorStore
from .postprocess import PostProcessOptions, CodecOptions
Expand All @@ -25,7 +25,7 @@
from .performance import PerformanceMonitor

__all__ = [
"ModelRepositoryMetadata",
"RawRepositoryMetadata",
"ModelMetadata",
"Models",
"ParameterScanResult",
Expand Down
2 changes: 1 addition & 1 deletion src/nwp_consumer/internal/entities/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class NWPDimensionCoordinateMap:
def __post_init__(self) -> None:
"""Rigidly set input value ordering and precision."""
self.variable = sorted(self.variable)
# Make latitude descending, longitude acsending, and both rounded to 4 d.p.
# Make latitude descending, longitude ascending, and both rounded to 4 d.p.
# NOTE: For latitude and longitude values, we round to 4 decimal places
# to avoid floating point precision issues when comparing values.
# It is important to note that this places a limit on the precision
Expand Down
4 changes: 2 additions & 2 deletions src/nwp_consumer/internal/entities/repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
data, defining pipelines for processing, or establishing the availability
for a live service.
In this instance, the `ModelRepositoryMetadata` refers to information
In this instance, the `RawRepositoryMetadata` refers to information
about the repository where NWP data produced by the model resides.
"""

Expand All @@ -22,7 +22,7 @@


@dataclasses.dataclass(slots=True)
class ModelRepositoryMetadata:
class RawRepositoryMetadata:
"""Metadata for an NWP Model repository."""

name: str
Expand Down
8 changes: 4 additions & 4 deletions src/nwp_consumer/internal/entities/test_repometadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
import unittest

from .postprocess import PostProcessOptions
from .repometadata import ModelRepositoryMetadata
from .repometadata import RawRepositoryMetadata


class TestModelRepositoryMetadata(unittest.TestCase):
"""Test the business methods of the ModelRepositoryMetadata class."""
class TestRawRepositoryMetadata(unittest.TestCase):
"""Test the business methods of the RawRepositoryMetadata class."""

metadata: ModelRepositoryMetadata = ModelRepositoryMetadata(
metadata: RawRepositoryMetadata = RawRepositoryMetadata(
name="test",
is_archive=False,
is_order_based=False,
Expand Down
4 changes: 2 additions & 2 deletions src/nwp_consumer/internal/handlers/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@
class CLIHandler:
"""CLI driving actor."""

model_adaptor: type[ports.ModelRepository]
model_adaptor: type[ports.RawRepository]
notification_adaptor: type[ports.NotificationRepository]

def __init__(
self,
model_adaptor: type[ports.ModelRepository],
model_adaptor: type[ports.RawRepository],
notification_adaptor: type[ports.NotificationRepository],
) -> None:
"""Create a new instance."""
Expand Down
4 changes: 2 additions & 2 deletions src/nwp_consumer/internal/ports/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"""

from .services import ConsumeUseCase
from .repositories import ModelRepository, NotificationRepository
from .repositories import RawRepository, NotificationRepository

__all__ = [
"ConsumeUseCase",
"ModelRepository",
"RawRepository",
"NotificationRepository",
]
12 changes: 6 additions & 6 deletions src/nwp_consumer/internal/ports/repositories.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
All NWP providers use some kind of model to generate their data. This repository
can be physics-based, such as ERA5, or a machine learning model_repositories, such as
Google's GraphCast. The `ModelRepository` interface is used to abstract the
Google's GraphCast. The `RawRepository` interface is used to abstract the
differences between these models, allowing the core to interact with them
in a uniform way.
"""
Expand All @@ -24,11 +24,11 @@
log = logging.getLogger("nwp-consumer")


class ModelRepository(abc.ABC):
class RawRepository(abc.ABC):
"""Interface for a repository that produces raw NWP data.
Since different producers of NWP data have different data storage
implementations, a ModelRepository needs to define its own download
implementations, a RawRepository needs to define its own download
and processing methods.
A source may provide one or more files for a given init time.
Expand All @@ -44,7 +44,7 @@ class ModelRepository(abc.ABC):

@classmethod
@abc.abstractmethod
def authenticate(cls) -> ResultE["ModelRepository"]:
def authenticate(cls) -> ResultE["RawRepository"]:
"""Create a new authenticated instance of the class."""
pass

Expand Down Expand Up @@ -73,7 +73,7 @@ def fetch_init_data(self, it: dt.datetime) \
>>> import datetime as dt
>>>
>>> # Pseudocode for a model_repositories repository
>>> class MyModelRepository(ModelRepository):
>>> class MyRawRepository(RawRepository):
... @override
... def fetch_init_data(self, it: dt.datetime) \
... -> Iterator[Callable[..., ResultE[list[xr.DataArray]]]]:
Expand Down Expand Up @@ -113,7 +113,7 @@ def fetch_init_data(self, it: dt.datetime) \

@staticmethod
@abc.abstractmethod
def repository() -> entities.ModelRepositoryMetadata:
def repository() -> entities.RawRepositoryMetadata:
"""Metadata about the model repository."""
pass

Expand Down
10 changes: 8 additions & 2 deletions src/nwp_consumer/internal/ports/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

from returns.result import ResultE

from nwp_consumer.internal import entities

Check failure on line 14 in src/nwp_consumer/internal/ports/services.py

View workflow job for this annotation

GitHub Actions / lint-typecheck

Ruff (F401)

src/nwp_consumer/internal/ports/services.py:14:35: F401 `nwp_consumer.internal.entities` imported but unused


class ConsumeUseCase(abc.ABC):
"""Interface for the consumer use case.
Expand All @@ -22,21 +24,25 @@ class ConsumeUseCase(abc.ABC):


@abc.abstractmethod
def consume(self, period: dt.datetime | dt.date | None = None) -> ResultE[str]:
def consume(
self,
period: dt.datetime | dt.date | None = None,
) -> ResultE[str]:
"""Consume NWP data to Zarr format for desired time period.
Where possible the implementation should be as memory-efficient as possible.
The designs of the repository methods also enable parallel processing within
the implementation.
Args:
model: The model to consume data from.
period: The period for which to gather init time data.
Returns:
The path to the produced Zarr store.
See Also:
- `repositories.ModelRepository.fetch_init_data`
- `repositories.RawRepository.fetch_init_data`
- `tensorstore.TensorStore.write_to_region`
- https://joblib.readthedocs.io/en/stable/auto_examples/parallel_generator.html
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
TODO: Add description
"""

from .ceda_ftp import CEDAFTPModelRepository
from .ecmwf_realtime import ECMWFRealTimeS3ModelRepository
from .ecmwf_mars import ECMWFMARSModelRepository
from .noaa_s3 import NOAAS3ModelRepository
from .mo_datahub import MetOfficeDatahubModelRepository
from .ceda_ftp import CEDAFTPRawRepository
from .ecmwf_realtime import ECMWFRealTimeS3RawRepository
from .ecmwf_mars import ECMWFMARSRawRepository
from .noaa_s3 import NOAAS3RawRepository
from .mo_datahub import MetOfficeDatahubRawRepository

__all__ = [
"CEDAFTPModelRepository",
"ECMWFRealTimeS3ModelRepository",
"NOAAS3ModelRepository",
"MetOfficeDatahubModelRepository",
"ECMWFMARSModelRepository",
"CEDAFTPRawRepository",
"ECMWFRealTimeS3RawRepository",
"NOAAS3RawRepository",
"MetOfficeDatahubRawRepository",
"ECMWFMARSRawRepository",
]

Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
`this PDF <https://www.metoffice.gov.uk/binaries/content/assets/metofficegovuk/pdf/data/global-atmospheric-model-17-km-resolution.pdf>`_.
For further details on the repository, see the
`CEDAFTPModelRepository.repository` implementation.
`CEDAFTPRawRepository.repository` implementation.
Data discrepancies and corrections
==================================
Expand Down Expand Up @@ -94,7 +94,7 @@
log = logging.getLogger("nwp-consumer")


class CEDAFTPModelRepository(ports.ModelRepository):
class CEDAFTPRawRepository(ports.RawRepository):
"""Repository implementation for the MetOffice global model data."""

url_base: str = "ftp.ceda.ac.uk/badc/ukmo-nwp/data/global-grib"
Expand All @@ -109,8 +109,8 @@ def __init__(self, url_auth: str) -> None:

@staticmethod
@override
def repository() -> entities.ModelRepositoryMetadata:
return entities.ModelRepositoryMetadata(
def repository() -> entities.RawRepositoryMetadata:
return entities.RawRepositoryMetadata(
name="CEDA",
is_archive=True,
is_order_based=False,
Expand All @@ -128,7 +128,7 @@ def repository() -> entities.ModelRepositoryMetadata:
@staticmethod
@override
def model() -> entities.ModelMetadata:
return CEDAFTPModelRepository.repository().available_models["default"]
return CEDAFTPRawRepository.repository().available_models["default"]

@override
def fetch_init_data(self, it: dt.datetime) \
Expand Down Expand Up @@ -169,7 +169,7 @@ def _download_and_convert(self, url: str) -> ResultE[list[xr.DataArray]]:

@classmethod
@override
def authenticate(cls) -> ResultE["CEDAFTPModelRepository"]:
def authenticate(cls) -> ResultE["CEDAFTPRawRepository"]:
"""Authenticate with the CEDA FTP server.
Returns:
Expand Down Expand Up @@ -251,7 +251,7 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]:
try:
ds = entities.Parameter.rename_else_drop_ds_vars(
ds=ds,
allowed_parameters=CEDAFTPModelRepository.model().expected_coordinates.variable,
allowed_parameters=CEDAFTPRawRepository.model().expected_coordinates.variable,
)
# Ignore datasets with no variables of interest
if len(ds.data_vars) == 0:
Expand All @@ -265,7 +265,7 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]:
step=slice(
np.timedelta64(0, "h"),
np.timedelta64(
CEDAFTPModelRepository.model().expected_coordinates.step[-1],
CEDAFTPRawRepository.model().expected_coordinates.step[-1],
"h",
),
))
Expand All @@ -274,11 +274,11 @@ def _convert(path: pathlib.Path) -> ResultE[list[xr.DataArray]]:
])
.rename(name_dict={"time": "init_time"})
.expand_dims(dim="init_time")
.to_dataarray(name=CEDAFTPModelRepository.model().name)
.to_dataarray(name=CEDAFTPRawRepository.model().name)
)
da = (
da
.transpose(*CEDAFTPModelRepository.model().expected_coordinates.dims)
.transpose(*CEDAFTPRawRepository.model().expected_coordinates.dims)
# Remove the last value of the longitude dimension as it overlaps with the next file
# Reverse the latitude dimension to be in descending order
.isel(longitude=slice(None, -1), latitude=slice(None, None, -1))
Expand Down
Loading

0 comments on commit 50c52c4

Please sign in to comment.