Skip to content

Commit

Permalink
sklearn tests and tags rework (#7)
Browse files Browse the repository at this point in the history
* actually change python version

* dummy classifiers and sklearn lower bound change

* test fix

* test fix

* dev

* early sklearn version fixes

* all interval classifiers

* dummy and conversion bugfix

* version

* test

* testing update

* version

* pandas

* stop all workflows failing

* copy check
  • Loading branch information
MatthewMiddlehurst authored Apr 20, 2023
1 parent 3542cc7 commit 1dad889
Show file tree
Hide file tree
Showing 17 changed files with 681 additions and 1,509 deletions.
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest, macOS-latest, windows-latest ]
python-version: [ '3.8', '3.9', '3.10' ]
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "tsml"
version = "0.0.6"
version = "0.0.7"
description = "A toolkit for time series machine learning algorithms."
authors = [
{name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
Expand Down Expand Up @@ -38,6 +38,7 @@ dependencies = [
"numba>=0.55",
"numpy>=1.21.0",
"scikit-learn>=1.0.2",
"pandas",
]

[project.optional-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion tsml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
"""tsml."""

__version__ = "0.0.6"
__version__ = "0.0.7"
94 changes: 59 additions & 35 deletions tsml/dummy/_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sklearn.dummy import DummyRegressor as SklearnDummyRegressor
from sklearn.utils import check_random_state
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import _num_samples, check_is_fitted

from tsml.base import BaseTimeSeriesEstimator

Expand Down Expand Up @@ -85,65 +85,77 @@ class prior probabilities.
0.5
"""

def __init__(self, strategy="prior", random_state=None, constant=None):
def __init__(
self, strategy="prior", validate=False, random_state=None, constant=None
):
self.strategy = strategy
self.validate = validate
self.random_state = random_state
self.constant = constant

super(DummyClassifier, self).__init__()

def fit(self, X, y):
""""""
X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
if self.validate:
X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)

check_classification_targets(y)
check_classification_targets(y)

self.classes_ = np.unique(y)
self.n_classes_ = self.classes_.shape[0]
self.class_dictionary_ = {}
for index, classVal in enumerate(self.classes_):
self.class_dictionary_[classVal] = index
self.classes_ = np.unique(np.asarray(y))

if self.n_classes_ == 1:
return self
if self.validate:
self.n_classes_ = self.classes_.shape[0]
self.class_dictionary_ = {}
for index, classVal in enumerate(self.classes_):
self.class_dictionary_[classVal] = index

self._clf = SklearnDummyClassifier(
if self.n_classes_ == 1:
return self

self.clf_ = SklearnDummyClassifier(
strategy=self.strategy,
random_state=self.random_state,
constant=self.constant,
)
self._clf.fit(None, y)
self.clf_.fit(None, y)

return self

def predict(self, X) -> np.ndarray:
""""""
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
if self.validate:
# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat(
list(self.class_dictionary_.keys()), X.shape[0], axis=0
)

# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)

return self._clf.predict(np.zeros(X.shape))
return self.clf_.predict(np.zeros((_num_samples(X), 2)))

def predict_proba(self, X) -> np.ndarray:
""""""
check_is_fitted(self)

# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat([[1]], X.shape[0], axis=0)
if self.validate:
# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat([[1]], X.shape[0], axis=0)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)

return self._clf.predict_proba(np.zeros(X.shape))
return self.clf_.predict_proba(np.zeros((_num_samples(X), 2)))

def _more_tags(self):
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
"no_validation": not self.validate,
"allow_nan": True,
}


Expand Down Expand Up @@ -199,36 +211,41 @@ class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator):
-0.07184048625633688
"""

def __init__(self, strategy="mean", constant=None, quantile=None):
def __init__(self, strategy="mean", validate=False, constant=None, quantile=None):
self.strategy = strategy
self.validate = validate
self.constant = constant
self.quantile = quantile

super(DummyRegressor, self).__init__()

def fit(self, X, y):
""""""
_, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
if self.validate:
_, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)

self._reg = SklearnDummyRegressor(
self.reg_ = SklearnDummyRegressor(
strategy=self.strategy, constant=self.constant, quantile=self.quantile
)
self._reg.fit(None, y)
self.reg_.fit(None, y)

return self

def predict(self, X):
""""""
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
if self.validate:
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)

return self._reg.predict(np.zeros(X.shape))
return self.reg_.predict(np.zeros((_num_samples(X), 2)))

def _more_tags(self):
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
"no_validation": not self.validate,
"allow_nan": True,
}


Expand Down Expand Up @@ -257,16 +274,20 @@ class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator):
0.2087729039422543
"""

def __init__(self, strategy="single", n_clusters=2, random_state=None):
def __init__(
self, strategy="single", validate=False, n_clusters=2, random_state=None
):
self.strategy = strategy
self.validate = validate
self.n_clusters = n_clusters
self.random_state = random_state

super(DummyClusterer, self).__init__()

def fit(self, X, y=None):
""""""
X = self._validate_data(X=X, ensure_min_series_length=1)
if self.validate:
X = self._validate_data(X=X, ensure_min_series_length=1)

if self.strategy == "single":
self.labels_ = np.zeros(len(X), dtype=np.int32)
Expand All @@ -284,20 +305,23 @@ def predict(self, X):
""""""
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)
if self.validate:
X = self._validate_data(X=X, reset=False, ensure_min_series_length=1)

if self.strategy == "single":
return np.zeros(len(X), dtype=np.int32)
return np.zeros(_num_samples(X), dtype=np.int32)
elif self.strategy == "unique":
return np.arange(len(X), dtype=np.int32)
return np.arange(_num_samples(X), dtype=np.int32)
elif self.strategy == "random":
rng = check_random_state(self.random_state)
return rng.randint(self.n_clusters, size=len(X), dtype=np.int32)
return rng.randint(self.n_clusters, size=_num_samples(X), dtype=np.int32)
else:
raise ValueError(f"Unknown strategy {self.strategy}")

def _more_tags(self):
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
"no_validation": not self.validate,
"allow_nan": True,
}
14 changes: 8 additions & 6 deletions tsml/interval_based/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ def fit(self, X, y):
X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
X = self._convert_X(X)

rng = check_random_state(self.random_state)

self.n_instances_, self.n_dims_, self.series_length_ = X.shape
if is_classifier(self):
check_classification_targets(y)
Expand Down Expand Up @@ -260,9 +262,7 @@ def fit(self, X, y):
self._series_transformers = [None]
# clone series_transformers if it is a transformer and transform the input data
elif is_transformer(self.series_transformers):
t = _clone_estimator(
self.series_transformers, random_state=self.random_state
)
t = _clone_estimator(self.series_transformers, random_state=rng)
Xt = [t.fit_transform(X, y)]
self._series_transformers = [t]
# clone each series_transformers transformer and include the base series if None
Expand All @@ -276,7 +276,7 @@ def fit(self, X, y):
Xt.append(X)
self._series_transformers.append(None)
elif is_transformer(transformer):
t = _clone_estimator(transformer, random_state=self.random_state)
t = _clone_estimator(transformer, random_state=rng)
Xt.append(t.fit_transform(X, y))
self._series_transformers.append(t)
else:
Expand Down Expand Up @@ -458,7 +458,8 @@ def fit(self, X, y):
# single transformer or function for all series_transformers
if is_transformer(self.interval_features):
self._interval_transformer = [True] * len(Xt)
self._interval_features = [[self.interval_features]] * len(Xt)
transformer = _clone_estimator(self.interval_features, random_state=rng)
self._interval_features = [[transformer]] * len(Xt)
elif callable(self.interval_features):
self._interval_function = [True] * len(Xt)
self._interval_features = [[self.interval_features]] * len(Xt)
Expand Down Expand Up @@ -491,6 +492,7 @@ def fit(self, X, y):
for method in feature:
if is_transformer(method):
self._interval_transformer[i] = True
feature = _clone_estimator(feature, random_state=rng)
elif callable(method):
self._interval_function[i] = True
else:
Expand All @@ -503,6 +505,7 @@ def fit(self, X, y):
self._interval_features.append(feature)
elif is_transformer(feature):
self._interval_transformer[i] = True
feature = _clone_estimator(feature, random_state=rng)
self._interval_features.append([feature])
elif callable(feature):
self._interval_function[i] = True
Expand Down Expand Up @@ -1030,4 +1033,3 @@ def _predict_for_estimator(self, Xt, estimator, intervals, predict_proba=False):
return estimator.predict_proba(interval_features)
else:
return estimator.predict(interval_features)

8 changes: 8 additions & 0 deletions tsml/interval_based/_interval_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def fit(self, X, y):
X, y = self._validate_data(
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
)
X = self._convert_X(X)

self.n_instances_, self.n_dims_, self.series_length_ = X.shape
self.classes_ = np.unique(y)
Expand Down Expand Up @@ -152,6 +153,7 @@ def predict(self, X) -> np.ndarray:
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
X = self._convert_X(X)

return self._estimator.predict(self._transformer.transform(X))

Expand All @@ -171,6 +173,7 @@ def predict_proba(self, X) -> np.ndarray:
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
X = self._convert_X(X)

m = getattr(self._estimator, "predict_proba", None)
if callable(m):
Expand Down Expand Up @@ -311,6 +314,7 @@ def fit(self, X, y):
X, y = self._validate_data(
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=3
)
X = self._convert_X(X)

self.n_instances_, self.n_dims_, self.series_length_ = X.shape

Expand Down Expand Up @@ -359,6 +363,7 @@ def predict(self, X) -> np.ndarray:
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=3)
X = self._convert_X(X)

return self._estimator.predict(self._transformer.transform(X))

Expand Down Expand Up @@ -489,6 +494,7 @@ def fit(self, X, y):
X, y = self._validate_data(
X=X, y=y, ensure_min_samples=2, ensure_min_series_length=7
)
X = self._convert_X(X)

self.n_instances_, self.n_dims_, self.series_length_ = X.shape
self.classes_ = np.unique(y)
Expand Down Expand Up @@ -542,6 +548,7 @@ def predict(self, X) -> np.ndarray:
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
X = self._convert_X(X)

return self._estimator.predict(self._transformer.transform(X))

Expand All @@ -561,6 +568,7 @@ def predict_proba(self, X) -> np.ndarray:
check_is_fitted(self)

X = self._validate_data(X=X, reset=False, ensure_min_series_length=7)
X = self._convert_X(X)

m = getattr(self._estimator, "predict_proba", None)
if callable(m):
Expand Down
Loading

0 comments on commit 1dad889

Please sign in to comment.