Skip to content

Commit

Permalink
feat: ProphetModelDataset (#720)
Browse files Browse the repository at this point in the history
* Added dataset and tests for Facebook's Prophet model

* Removed in-path example.

* Added prophet as optional dependency in toml

* Added prophet as testing dependency

* Update docstring to have a doctest example with example data

Signed-off-by: galenseilis <galen.seilis@seilis.ca>

* Try without assert

* Move ProphetModelDataset to experimental datasets

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Move test fixtures + fix imports

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Fix prophet docstring and tests

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Fix lint

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Fix docs

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Fix docs

Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>

* Bandit

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Add nosec instead

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

* Add to release notes

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>

---------

Signed-off-by: galenseilis <galen.seilis@seilis.ca>
Signed-off-by: Merel Theisen <merel.theisen@quantumblack.com>
Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com>
Co-authored-by: Merel Theisen <merel.theisen@quantumblack.com>
Co-authored-by: Ankita Katiyar <110245118+ankatiyar@users.noreply.github.com>
Co-authored-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
  • Loading branch information
5 people authored Sep 27, 2024
1 parent 0a3a381 commit 970862b
Show file tree
Hide file tree
Showing 9 changed files with 385 additions and 1 deletion.
3 changes: 3 additions & 0 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
| Type | Description | Location |
|-------------------------------------|-----------------------------------------------------------|-----------------------------------------|
| `pytorch.PyTorchDataset` | A dataset for securely saving and loading PyTorch models | `kedro_datasets_experimental.pytorch` |
| `prophet.ProphetModelDataset` | A dataset for Meta's Prophet model for time series forecasting | `kedro_datasets_experimental.prophet` |


* Added the following new core datasets:

Expand All @@ -24,6 +26,7 @@ Many thanks to the following Kedroids for contributing PRs to this release:
* [yury-fedotov](https://github.com/yury-fedotov)
* [gitgud5000](https://github.com/gitgud5000)
* [janickspirig](https://github.com/janickspirig)
* [Galen Seilis](https://github.com/galenseilis)


# Release 4.1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@ kedro_datasets_experimental
langchain.ChatOpenAIDataset
langchain.OpenAIEmbeddingsDataset
netcdf.NetCDFDataset
prophet.ProphetModelDataset
pytorch.PyTorchDataset
rioxarray.GeoTIFFDataset
2 changes: 2 additions & 0 deletions kedro-datasets/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@
"xarray.core.dataset.Dataset",
"xarray.core.dataarray.DataArray",
"torch.nn.modules.module.Module",
"prophet.forecaster.Prophet",
"Prophet",
),
"py:data": (
"typing.Any",
Expand Down
11 changes: 11 additions & 0 deletions kedro-datasets/kedro_datasets_experimental/prophet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""``JSONDataset`` implementation to load/save data from/to a Prophet model file."""

from typing import Any

import lazy_loader as lazy

ProphetDataset: Any

__getattr__, __dir__, __all__ = lazy.attach(
__name__, submod_attrs={"prophet_dataset": ["ProphetModelDataset"]}
)
121 changes: 121 additions & 0 deletions kedro-datasets/kedro_datasets_experimental/prophet/prophet_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from __future__ import annotations

from typing import Any

from kedro.io.core import Version, get_filepath_str
from prophet import Prophet
from prophet.serialize import model_from_json, model_to_json

from kedro_datasets.json import JSONDataset


class ProphetModelDataset(JSONDataset):
"""``ProphetModelDataset`` loads/saves Facebook Prophet models to a JSON file using an
underlying filesystem (e.g., local, S3, GCS). It uses Prophet's built-in
serialization to handle the JSON file.
Example usage for the
`YAML API <https://kedro.readthedocs.io/en/stable/data/\
data_catalog_yaml_examples.html>`_:
.. code-block:: yaml
model:
type: custom_datasets.ProphetModelDataset
filepath: gcs://your_bucket/model.json
fs_args:
project: my-project
credentials: my_gcp_credentials
Example usage for the
`Python API <https://kedro.readthedocs.io/en/stable/data/\
advanced_data_catalog_usage.html>`_:
.. code-block:: pycon
>>> from kedro_datasets_experimental.prophet import ProphetModelDataset
>>> from prophet import Prophet
>>> import pandas as pd
>>>
>>> df = pd.DataFrame({
>>> "ds": ["2024-01-01", "2024-01-02", "2024-01-03"],
>>> "y": [100, 200, 300]
>>> })
>>>
>>> model = Prophet()
>>> model.fit(df)
>>> dataset = ProphetModelDataset(filepath="path/to/model.json")
>>> dataset.save(model)
>>> reloaded_model = dataset.load()
"""

def __init__( # noqa: PLR0913
self,
*,
filepath: str,
save_args: dict[str, Any] | None = None,
version: Version | None = None,
credentials: dict[str, Any] | None = None,
fs_args: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
"""Creates a new instance of ``ProphetModelDataset`` pointing to a concrete JSON file
on a specific filesystem.
Args:
filepath: Filepath in POSIX format to a JSON file prefixed with a protocol like `s3://`.
If prefix is not provided, `file` protocol (local filesystem) will be used.
The prefix should be any protocol supported by ``fsspec``.
Note: `http(s)` doesn't support versioning.
save_args: json options for saving JSON files (arguments passed
into ```json.dump``). Here you can find all available arguments:
https://docs.python.org/3/library/json.html
All defaults are preserved, but "default_flow_style", which is set to False.
version: If specified, should be an instance of
``kedro.io.core.Version``. If its ``load`` attribute is
None, the latest version will be loaded. If its ``save``
attribute is None, save version will be autogenerated.
credentials: Credentials required to get access to the underlying filesystem.
E.g. for ``GCSFileSystem`` it should look like `{"token": None}`.
fs_args: Extra arguments to pass into underlying filesystem class constructor
(e.g. `{"project": "my-project"}` for ``GCSFileSystem``), as well as
to pass to the filesystem's `open` method through nested keys
`open_args_load` and `open_args_save`.
Here you can find all available arguments for `open`:
https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem.open
metadata: Any arbitrary metadata.
This is ignored by Kedro, but may be consumed by users or external plugins.
"""
super().__init__(
filepath=filepath,
save_args=save_args,
version=version,
credentials=credentials,
fs_args=fs_args,
metadata=metadata,
)

def _load(self) -> Prophet:
"""Loads a Prophet model from a JSON file.
Returns:
Prophet: A deserialized Prophet model.
"""
load_path = get_filepath_str(self._get_load_path(), self._protocol)

with self._fs.open(load_path, **self._fs_open_args_load) as fs_file:
return model_from_json(fs_file.read())

def _save(self, data: Prophet) -> None:
"""Saves a Prophet model to a JSON file.
Args:
data: The Prophet model instance to be serialized and saved.
"""
save_path = get_filepath_str(self._get_save_path(), self._protocol)

with self._fs.open(save_path, **self._fs_open_args_save) as fs_file:
fs_file.write(model_to_json(data))

self._invalidate_cache()
34 changes: 34 additions & 0 deletions kedro-datasets/kedro_datasets_experimental/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""
This file contains the fixtures that are reusable by any tests within
this directory. You don't need to import the fixtures as pytest will
discover them automatically. More info here:
https://docs.pytest.org/en/latest/fixture.html
"""

from kedro.io.core import generate_timestamp
from pytest import fixture


@fixture(params=[None])
def load_version(request):
return request.param


@fixture(params=[None])
def save_version(request):
return request.param or generate_timestamp()


@fixture(params=[None])
def load_args(request):
return request.param


@fixture(params=[None])
def save_args(request):
return request.param


@fixture(params=[None])
def fs_args(request):
return request.param
Empty file.
Loading

0 comments on commit 970862b

Please sign in to comment.