Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v0.40 release #191

Merged
merged 4 commits into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ jobs:
- name: Install dependencies
run: |
poetry env use ${{ matrix.python-version }}
poetry install --all-extras --with dev,torch
poetry install --all-extras --with dev
poetry run pip install "torch<3.0" -i https://download.pytorch.org/whl/cpu
poetry run pip install "pytorch-lightning<3.0"

- name: Test with pytest
run: make test
4 changes: 3 additions & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ jobs:
- name: Install dependencies
run: |
poetry env use ${{ matrix.python-version }}
poetry install --all-extras --with dev,torch
poetry install --all-extras --with dev
poetry run pip install "torch<3.0" -i https://download.pytorch.org/whl/cpu
poetry run pip install "pytorch-lightning<3.0"

- name: Run Coverage
run: |
Expand Down
2 changes: 0 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ repos:
language_version: python3.9
args: [--config=pyproject.toml, --diff, --color ]
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.0.264'
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- repo: https://github.com/adamchainz/blacken-docs
rev: "1.13.0"
hooks:
Expand Down
33 changes: 21 additions & 12 deletions docs/ml-flow.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,21 @@ Numalogic provides `MLflowRegistry`, to save and load models to/from MLflow.

Here, `tracking_uri` is the uri where mlflow server is running. The `static_keys` and `dynamic_keys` are used to form a unique key for the model.

The `primary_artifact` would be the main model, and `secondary_artifacts` can be used to save any pre-processing models like scalers.

The `artifact` would be the model or transformer object that needs to be saved.
A dictionary of metadata can also be saved along with the artifact.
```python
from numalogic.registry import MLflowRegistry
from numalogic.models.autoencoder.variants import VanillaAE

model = VanillaAE(seq_len=10)

# static and dynamic keys are used to look up a model
static_keys = ["synthetic", "3ts"]
dynamic_keys = ["minmaxscaler", "sparseconv1d"]
static_keys = ["model", "autoencoder"]
dynamic_keys = ["vanilla", "seq10"]

registry = MLflowRegistry(tracking_uri="http://0.0.0.0:5000", artifact_type="pytorch")
registry = MLflowRegistry(tracking_uri="http://0.0.0.0:5000")
registry.save(
skeys=static_keys,
dkeys=dynamic_keys,
primary_artifact=model,
secondary_artifacts={"preproc": scaler},
skeys=static_keys, dkeys=dynamic_keys, artifact=model, seq_len=10, lr=0.001
)
```

Expand All @@ -46,10 +46,19 @@ registry.save(
Once, the models are save to MLflow, the `load` function of `MLflowRegistry` can be used to load the model.

```python
from numalogic.registry import MLflowRegistry

static_keys = ["model", "autoencoder"]
dynamic_keys = ["vanilla", "seq10"]

registry = MLflowRegistry(tracking_uri="http://0.0.0.0:8080")
artifact_dict = registry.load(skeys=static_keys, dkeys=dynamic_keys)
scaler = artifact_dict["secondary_artifacts"]["preproc"]
model = artifact_dict["primary_artifact"]
artifact_data = registry.load(
skeys=static_keys, dkeys=dynamic_keys, artifact_type="pytorch"
)

# get the model and metadata
model = artifact_data.artifact
model_metadata = artifact_data.metadata
```

For more details, please refer to [MLflow Model Registry](https://www.mlflow.org/docs/latest/model-registry.html#)
2 changes: 2 additions & 0 deletions numalogic/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
PreprocessFactory,
PostprocessFactory,
ThresholdFactory,
RegistryFactory,
)


Expand All @@ -28,4 +29,5 @@
"PreprocessFactory",
"PostprocessFactory",
"ThresholdFactory",
"RegistryFactory",
]
68 changes: 48 additions & 20 deletions numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,7 @@
# limitations under the License.
from typing import Union

from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler

from numalogic.config._config import ModelInfo, RegistryInfo
from numalogic.models.autoencoder.variants import (
VanillaAE,
SparseVanillaAE,
Conv1dAE,
SparseConv1dAE,
LSTMAE,
SparseLSTMAE,
TransformerAE,
SparseTransformerAE,
)
from numalogic.models.threshold import StdDevThreshold, StaticThreshold, SigmoidThreshold
from numalogic.postprocess import TanhNorm, ExpMovingAverage
from numalogic.preprocess import LogTransformer, StaticPowerTransformer, TanhScaler
from numalogic.registry import MLflowRegistry, RedisRegistry
from numalogic.tools.exceptions import UnknownConfigArgsError


Expand All @@ -52,6 +36,9 @@ def get_cls(self, object_info: Union[ModelInfo, RegistryInfo]):


class PreprocessFactory(_ObjectFactory):
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
from numalogic.preprocess import LogTransformer, StaticPowerTransformer, TanhScaler

_CLS_MAP = {
"StandardScaler": StandardScaler,
"MinMaxScaler": MinMaxScaler,
Expand All @@ -64,10 +51,14 @@ class PreprocessFactory(_ObjectFactory):


class PostprocessFactory(_ObjectFactory):
from numalogic.postprocess import TanhNorm, ExpMovingAverage

_CLS_MAP = {"TanhNorm": TanhNorm, "ExpMovingAverage": ExpMovingAverage}


class ThresholdFactory(_ObjectFactory):
from numalogic.models.threshold import StdDevThreshold, StaticThreshold, SigmoidThreshold

_CLS_MAP = {
"StdDevThreshold": StdDevThreshold,
"StaticThreshold": StaticThreshold,
Expand All @@ -76,6 +67,17 @@ class ThresholdFactory(_ObjectFactory):


class ModelFactory(_ObjectFactory):
from numalogic.models.autoencoder.variants import (
VanillaAE,
SparseVanillaAE,
Conv1dAE,
SparseConv1dAE,
LSTMAE,
SparseLSTMAE,
TransformerAE,
SparseTransformerAE,
)

_CLS_MAP = {
"VanillaAE": VanillaAE,
"SparseVanillaAE": SparseVanillaAE,
Expand All @@ -89,7 +91,33 @@ class ModelFactory(_ObjectFactory):


class RegistryFactory(_ObjectFactory):
_CLS_MAP = {
"RedisRegistry": RedisRegistry,
"MLflowRegistry": MLflowRegistry,
}
_CLS_SET = {"RedisRegistry", "MLflowRegistry"}

def get_instance(self, object_info: Union[ModelInfo, RegistryInfo]):
import numalogic.registry as reg

try:
_cls = getattr(reg, object_info.name)
except AttributeError as err:
if object_info.name in self._CLS_SET:
raise ImportError(
"Please install the required dependencies for the registry you want to use."
) from err
raise UnknownConfigArgsError(
f"Invalid model info instance provided: {object_info}"
) from err
return _cls(**object_info.conf)

def get_cls(self, object_info: Union[ModelInfo, RegistryInfo]):
import numalogic.registry as reg

try:
return getattr(reg, object_info.name)
except AttributeError as err:
if object_info.name in self._CLS_SET:
raise ImportError(
"Please install the required dependencies for the registry you want to use."
) from err
raise UnknownConfigArgsError(
f"Invalid model info instance provided: {object_info}"
) from err
25 changes: 14 additions & 11 deletions numalogic/registry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,20 @@
from numalogic.registry.artifact import ArtifactManager, ArtifactData, ArtifactCache
from numalogic.registry.localcache import LocalLRUCache


__all__ = ["ArtifactManager", "ArtifactData", "ArtifactCache", "LocalLRUCache"]


try:
from numalogic.registry.mlflow_registry import MLflowRegistry # noqa: F401
except ImportError:
pass
else:
__all__.append("MLflowRegistry")

try:
from numalogic.registry.mlflow_registry import MLflowRegistry
from numalogic.registry.redis_registry import RedisRegistry
from numalogic.registry.redis_registry import RedisRegistry # noqa: F401
except ImportError:
__all__ = ["ArtifactManager", "ArtifactData", "ArtifactCache", "LocalLRUCache"]
pass
else:
__all__ = [
"ArtifactManager",
"ArtifactData",
"MLflowRegistry",
"ArtifactCache",
"LocalLRUCache",
"RedisRegistry",
]
__all__.append("RedisRegistry")
22 changes: 20 additions & 2 deletions numalogic/registry/artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from dataclasses import dataclass
from typing import Any, Generic, TypeVar

from numalogic.tools.types import artifact_t, KEYS, META_T, EXTRA_T
from numalogic.tools.types import artifact_t, KEYS, META_T, META_VT, EXTRA_T


@dataclass
Expand Down Expand Up @@ -54,7 +54,7 @@ def load(
"""
raise NotImplementedError("Please implement this method!")

def save(self, skeys: KEYS, dkeys: KEYS, artifact: artifact_t, **metadata: META_T) -> Any:
def save(self, skeys: KEYS, dkeys: KEYS, artifact: artifact_t, **metadata: META_VT) -> Any:
r"""
Saves the artifact into mlflow registry and updates version.
Args:
Expand All @@ -75,6 +75,17 @@ def delete(self, skeys: KEYS, dkeys: KEYS, version: str) -> None:
"""
raise NotImplementedError("Please implement this method!")

@staticmethod
def is_artifact_stale(artifact_data: ArtifactData, freq_hr: int) -> bool:
"""
Returns whether the given artifact is stale or not, i.e. if
more time has elapsed since it was last retrained.
Args:
artifact_data: ArtifactData object to look into
freq_hr: Frequency of retraining in hours
"""
raise NotImplementedError("Please implement this method!")

@staticmethod
def construct_key(skeys: KEYS, dkeys: KEYS) -> str:
"""
Expand Down Expand Up @@ -135,3 +146,10 @@ def delete(self, key: str) -> None:
Implement this method for your custom cache.
"""
raise NotImplementedError("Please implement this method!")

def clear(self) -> None:
r"""
Clears the cache.
Implement this method for your custom cache.
"""
raise NotImplementedError("Please implement this method!")
3 changes: 3 additions & 0 deletions numalogic/registry/localcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,6 @@ def save(self, key: str, artifact: ArtifactData) -> None:

def delete(self, key: str) -> Optional[ArtifactData]:
return self.__cache.pop(key, default=None)

def clear(self) -> None:
self.__cache.clear()
Loading