numaproj · ab93 · May 11, 2023 · May 9, 2023 · May 10, 2023 · May 10, 2023
@@ -30,7 +30,9 @@ jobs:
     - name: Install dependencies
       run: |
         poetry env use ${{ matrix.python-version }}
-        poetry install --all-extras --with dev,torch
+        poetry install --all-extras --with dev
+        poetry run pip install "torch<3.0" -i https://download.pytorch.org/whl/cpu
+        poetry run pip install "pytorch-lightning<3.0"
 
     - name: Test with pytest
       run: make test
@@ -30,7 +30,9 @@ jobs:
     - name: Install dependencies
       run: |
         poetry env use ${{ matrix.python-version }}
-        poetry install --all-extras --with dev,torch
+        poetry install --all-extras --with dev
+        poetry run pip install "torch<3.0" -i https://download.pytorch.org/whl/cpu
+        poetry run pip install "pytorch-lightning<3.0"
 
     - name: Run Coverage
       run: |

@@ -8,11 +8,9 @@ repos:
       language_version: python3.9
       args: [--config=pyproject.toml, --diff, --color ]
 - repo: https://github.com/charliermarsh/ruff-pre-commit
-  # Ruff version.
   rev: 'v0.0.264'
   hooks:
     - id: ruff
-      args: [ --fix, --exit-non-zero-on-fix ]
 - repo: https://github.com/adamchainz/blacken-docs
   rev: "1.13.0"
   hooks:

@@ -23,21 +23,21 @@ Numalogic provides `MLflowRegistry`, to save and load models to/from MLflow.
 
 Here, `tracking_uri` is the uri where mlflow server is running. The `static_keys` and `dynamic_keys` are used to form a unique key for the model.
 
-The `primary_artifact` would be the main model, and `secondary_artifacts` can be used to save any pre-processing models like scalers.
-
+The `artifact` would be the model or transformer object that needs to be saved.
+A dictionary of metadata can also be saved along with the artifact.
 ```python
 from numalogic.registry import MLflowRegistry
+from numalogic.models.autoencoder.variants import VanillaAE
+
+model = VanillaAE(seq_len=10)
 
 # static and dynamic keys are used to look up a model
-static_keys = ["synthetic", "3ts"]
-dynamic_keys = ["minmaxscaler", "sparseconv1d"]
+static_keys = ["model", "autoencoder"]
+dynamic_keys = ["vanilla", "seq10"]
 
-registry = MLflowRegistry(tracking_uri="http://0.0.0.0:5000", artifact_type="pytorch")
+registry = MLflowRegistry(tracking_uri="http://0.0.0.0:5000")
 registry.save(
-    skeys=static_keys,
-    dkeys=dynamic_keys,
-    primary_artifact=model,
-    secondary_artifacts={"preproc": scaler},
+    skeys=static_keys, dkeys=dynamic_keys, artifact=model, seq_len=10, lr=0.001
 )
 ```
 
@@ -46,10 +46,19 @@ registry.save(
 Once, the models are save to MLflow, the `load` function of `MLflowRegistry` can be used to load the model.
 
 ```python
+from numalogic.registry import MLflowRegistry
+
+static_keys = ["model", "autoencoder"]
+dynamic_keys = ["vanilla", "seq10"]
+
 registry = MLflowRegistry(tracking_uri="http://0.0.0.0:8080")
-artifact_dict = registry.load(skeys=static_keys, dkeys=dynamic_keys)
-scaler = artifact_dict["secondary_artifacts"]["preproc"]
-model = artifact_dict["primary_artifact"]
+artifact_data = registry.load(
+    skeys=static_keys, dkeys=dynamic_keys, artifact_type="pytorch"
+)
+
+# get the model and metadata
+model = artifact_data.artifact
+model_metadata = artifact_data.metadata
 ```
 
 For more details, please refer to [MLflow Model Registry](https://www.mlflow.org/docs/latest/model-registry.html#)
@@ -16,6 +16,7 @@
     PreprocessFactory,
     PostprocessFactory,
     ThresholdFactory,
+    RegistryFactory,
 )
 
 
@@ -28,4 +29,5 @@
     "PreprocessFactory",
     "PostprocessFactory",
     "ThresholdFactory",
+    "RegistryFactory",
 ]
@@ -10,23 +10,7 @@
 # limitations under the License.
 from typing import Union
 
-from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
-
 from numalogic.config._config import ModelInfo, RegistryInfo
-from numalogic.models.autoencoder.variants import (
-    VanillaAE,
-    SparseVanillaAE,
-    Conv1dAE,
-    SparseConv1dAE,
-    LSTMAE,
-    SparseLSTMAE,
-    TransformerAE,
-    SparseTransformerAE,
-)
-from numalogic.models.threshold import StdDevThreshold, StaticThreshold, SigmoidThreshold
-from numalogic.postprocess import TanhNorm, ExpMovingAverage
-from numalogic.preprocess import LogTransformer, StaticPowerTransformer, TanhScaler
-from numalogic.registry import MLflowRegistry, RedisRegistry
 from numalogic.tools.exceptions import UnknownConfigArgsError
 
 
@@ -52,6 +36,9 @@ def get_cls(self, object_info: Union[ModelInfo, RegistryInfo]):
 
 
 class PreprocessFactory(_ObjectFactory):
+    from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler
+    from numalogic.preprocess import LogTransformer, StaticPowerTransformer, TanhScaler
+
     _CLS_MAP = {
         "StandardScaler": StandardScaler,
         "MinMaxScaler": MinMaxScaler,
@@ -64,10 +51,14 @@ class PreprocessFactory(_ObjectFactory):
 
 
 class PostprocessFactory(_ObjectFactory):
+    from numalogic.postprocess import TanhNorm, ExpMovingAverage
+
     _CLS_MAP = {"TanhNorm": TanhNorm, "ExpMovingAverage": ExpMovingAverage}
 
 
 class ThresholdFactory(_ObjectFactory):
+    from numalogic.models.threshold import StdDevThreshold, StaticThreshold, SigmoidThreshold
+
     _CLS_MAP = {
         "StdDevThreshold": StdDevThreshold,
         "StaticThreshold": StaticThreshold,
@@ -76,6 +67,17 @@ class ThresholdFactory(_ObjectFactory):
 
 
 class ModelFactory(_ObjectFactory):
+    from numalogic.models.autoencoder.variants import (
+        VanillaAE,
+        SparseVanillaAE,
+        Conv1dAE,
+        SparseConv1dAE,
+        LSTMAE,
+        SparseLSTMAE,
+        TransformerAE,
+        SparseTransformerAE,
+    )
+
     _CLS_MAP = {
         "VanillaAE": VanillaAE,
         "SparseVanillaAE": SparseVanillaAE,
@@ -89,7 +91,33 @@ class ModelFactory(_ObjectFactory):
 
 
 class RegistryFactory(_ObjectFactory):
-    _CLS_MAP = {
-        "RedisRegistry": RedisRegistry,
-        "MLflowRegistry": MLflowRegistry,
-    }
+    _CLS_SET = {"RedisRegistry", "MLflowRegistry"}
+
+    def get_instance(self, object_info: Union[ModelInfo, RegistryInfo]):
+        import numalogic.registry as reg
+
+        try:
+            _cls = getattr(reg, object_info.name)
+        except AttributeError as err:
+            if object_info.name in self._CLS_SET:
+                raise ImportError(
+                    "Please install the required dependencies for the registry you want to use."
+                ) from err
+            raise UnknownConfigArgsError(
+                f"Invalid model info instance provided: {object_info}"
+            ) from err
+        return _cls(**object_info.conf)
+
+    def get_cls(self, object_info: Union[ModelInfo, RegistryInfo]):
+        import numalogic.registry as reg
+
+        try:
+            return getattr(reg, object_info.name)
+        except AttributeError as err:
+            if object_info.name in self._CLS_SET:
+                raise ImportError(
+                    "Please install the required dependencies for the registry you want to use."
+                ) from err
+            raise UnknownConfigArgsError(
+                f"Invalid model info instance provided: {object_info}"
+            ) from err
@@ -13,17 +13,20 @@
 from numalogic.registry.artifact import ArtifactManager, ArtifactData, ArtifactCache
 from numalogic.registry.localcache import LocalLRUCache
 
+
+__all__ = ["ArtifactManager", "ArtifactData", "ArtifactCache", "LocalLRUCache"]
+
+
+try:
+    from numalogic.registry.mlflow_registry import MLflowRegistry  # noqa: F401
+except ImportError:
+    pass
+else:
+    __all__.append("MLflowRegistry")
+
 try:
-    from numalogic.registry.mlflow_registry import MLflowRegistry
-    from numalogic.registry.redis_registry import RedisRegistry
+    from numalogic.registry.redis_registry import RedisRegistry  # noqa: F401
 except ImportError:
-    __all__ = ["ArtifactManager", "ArtifactData", "ArtifactCache", "LocalLRUCache"]
+    pass
 else:
-    __all__ = [
-        "ArtifactManager",
-        "ArtifactData",
-        "MLflowRegistry",
-        "ArtifactCache",
-        "LocalLRUCache",
-        "RedisRegistry",
-    ]
+    __all__.append("RedisRegistry")
@@ -13,7 +13,7 @@
 from dataclasses import dataclass
 from typing import Any, Generic, TypeVar
 
-from numalogic.tools.types import artifact_t, KEYS, META_T, EXTRA_T
+from numalogic.tools.types import artifact_t, KEYS, META_T, META_VT, EXTRA_T
 
 
 @dataclass
@@ -54,7 +54,7 @@ def load(
         """
         raise NotImplementedError("Please implement this method!")
 
-    def save(self, skeys: KEYS, dkeys: KEYS, artifact: artifact_t, **metadata: META_T) -> Any:
+    def save(self, skeys: KEYS, dkeys: KEYS, artifact: artifact_t, **metadata: META_VT) -> Any:
         r"""
         Saves the artifact into mlflow registry and updates version.
         Args:
@@ -75,6 +75,17 @@ def delete(self, skeys: KEYS, dkeys: KEYS, version: str) -> None:
         """
         raise NotImplementedError("Please implement this method!")
 
+    @staticmethod
+    def is_artifact_stale(artifact_data: ArtifactData, freq_hr: int) -> bool:
+        """
+        Returns whether the given artifact is stale or not, i.e. if
+        more time has elapsed since it was last retrained.
+        Args:
+            artifact_data: ArtifactData object to look into
+            freq_hr: Frequency of retraining in hours
+        """
+        raise NotImplementedError("Please implement this method!")
+
     @staticmethod
     def construct_key(skeys: KEYS, dkeys: KEYS) -> str:
         """
@@ -135,3 +146,10 @@ def delete(self, key: str) -> None:
         Implement this method for your custom cache.
         """
         raise NotImplementedError("Please implement this method!")
+
+    def clear(self) -> None:
+        r"""
+        Clears the cache.
+        Implement this method for your custom cache.
+        """
+        raise NotImplementedError("Please implement this method!")
@@ -41,3 +41,6 @@ def save(self, key: str, artifact: ArtifactData) -> None:
 
     def delete(self, key: str) -> Optional[ArtifactData]:
         return self.__cache.pop(key, default=None)
+
+    def clear(self) -> None:
+        self.__cache.clear()