Skip to content

Commit

Permalink
Dummy and conversion bugfix (#6)
Browse files Browse the repository at this point in the history
* actually change python version

* dummy classifiers and sklearn lower bound change

* test fix

* test fix

* dev

* early sklearn version fixes

* all interval classifiers

* dummy and conversion bugfix

* version

* test
  • Loading branch information
MatthewMiddlehurst authored Apr 17, 2023
1 parent fb4f7ab commit 3542cc7
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 41 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "tsml"
version = "0.0.5"
version = "0.0.6"
description = "A toolkit for time series machine learning algorithms."
authors = [
{name = "Matthew Middlehurst", email = "m.middlehurst@uea.ac.uk"},
Expand Down
2 changes: 1 addition & 1 deletion tsml/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
"""tsml."""

__version__ = "0.0.5"
__version__ = "0.0.6"
33 changes: 26 additions & 7 deletions tsml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ def _validate_data(
return out

def _convert_X(
self, X: Union[np.ndarray, List[np.ndarray]], concatenate_channels: bool = False
self,
X: Union[np.ndarray, List[np.ndarray]],
pad_unequal: bool = False,
concatenate_channels: bool = False,
) -> Union[np.ndarray, List[np.ndarray]]:
dtypes = self._get_tags()["X_types"]

Expand All @@ -123,9 +126,9 @@ def _convert_X(
return X.reshape((X.shape[0], -1))
else:
raise ValueError(
"Can only convert 3D numpy array with 1 channel to 2D numpy "
f"array if concatenate_channels is True, found {X.shape[1]} "
"channels."
"Can only convert 3D numpy array with more than 1 channel to "
"2D numpy array if concatenate_channels is True, found "
f"{X.shape[1]} channels."
)
elif dtypes[0] == "np_list":
return [x for x in X]
Expand All @@ -142,6 +145,13 @@ def _convert_X(
if "np_list" in dtypes:
return X
elif dtypes[0] == "3darray":
if not pad_unequal and not all(x.shape[1] == X[0].shape[1] for x in X):
raise ValueError(
"Can only convert list of 2D numpy arrays with unequal length "
"data to 3D numpy array if pad_unequal is True, found "
"different series lengths."
)

max_len = max(x.shape[1] for x in X)
arr = np.zeros((len(X), X[0].shape[0], max_len))

Expand All @@ -151,6 +161,15 @@ def _convert_X(
return arr
elif dtypes[0] == "2darray":
if X[0].shape[0] == 1 or concatenate_channels:
if not pad_unequal and not all(
x.shape[1] == X[0].shape[1] for x in X
):
raise ValueError(
"Can only convert list of 2D numpy arrays with unequal "
"length data to 2D numpy array if pad_unequal is True, "
"found different series lengths."
)

max_len = max(x.shape[1] for x in X)
arr = np.zeros((len(X), X[0].shape[0], max_len))

Expand All @@ -160,9 +179,9 @@ def _convert_X(
return arr.reshape((arr.shape[0], -1))
else:
raise ValueError(
"Can only convert list of 2D numpy arrays with 1 channel to 2D "
"numpy array if concatenate_channels is True, found "
f"{X[0].shape[0]} channels."
"Can only convert list of 2D numpy arrays with more than 1 "
"channel to 2D numpy array if concatenate_channels is True, "
f"found {X[0].shape[0]} channels."
)
else:
raise ValueError(
Expand Down
21 changes: 15 additions & 6 deletions tsml/dummy/_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def fit(self, X, y):
random_state=self.random_state,
constant=self.constant,
)
self._clf.fit(np.zeros(X.shape), y)
self._clf.fit(None, y)

return self

Expand Down Expand Up @@ -141,7 +141,10 @@ def predict_proba(self, X) -> np.ndarray:
return self._clf.predict_proba(np.zeros(X.shape))

def _more_tags(self):
return {"X_types": ["3darray", "2darray", "np_list"]}
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
}


class DummyRegressor(RegressorMixin, BaseTimeSeriesEstimator):
Expand Down Expand Up @@ -205,12 +208,12 @@ def __init__(self, strategy="mean", constant=None, quantile=None):

def fit(self, X, y):
""""""
X, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)
_, y = self._validate_data(X=X, y=y, ensure_min_series_length=1)

self._reg = SklearnDummyRegressor(
strategy=self.strategy, constant=self.constant, quantile=self.quantile
)
self._reg.fit(np.zeros(X.shape), y)
self._reg.fit(None, y)

return self

Expand All @@ -223,7 +226,10 @@ def predict(self, X):
return self._reg.predict(np.zeros(X.shape))

def _more_tags(self):
return {"X_types": ["3darray", "2darray", "np_list"]}
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
}


class DummyClusterer(ClusterMixin, BaseTimeSeriesEstimator):
Expand Down Expand Up @@ -291,4 +297,7 @@ def predict(self, X):
raise ValueError(f"Unknown strategy {self.strategy}")

def _more_tags(self):
return {"X_types": ["3darray", "2darray", "np_list"]}
return {
"X_types": ["3darray", "2darray", "np_list"],
"equal_length_only": False,
}
48 changes: 24 additions & 24 deletions tsml/tests/_sklearn_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1329,16 +1329,16 @@ def check_classifiers_train(
"fit."
)

if not tags["no_validation"]:
if tags["pairwise"]:
with raises(
ValueError,
err_msg=msg_pairwise.format(name, "predict"),
):
classifier.predict(X.reshape(-1, 1))
else:
with raises(ValueError, err_msg=msg.format(name, "predict")):
classifier.predict(X.T)
# if not tags["no_validation"]:
# if tags["pairwise"]:
# with raises(
# ValueError,
# err_msg=msg_pairwise.format(name, "predict"),
# ):
# classifier.predict(X.reshape(-1, 1))
# else:
# with raises(ValueError, err_msg=msg.format(name, "predict")):
# classifier.predict(X.T)
if hasattr(classifier, "decision_function"):
try:
# decision_function agrees with predict
Expand Down Expand Up @@ -1378,20 +1378,20 @@ def check_classifiers_train(
assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
# check that probas for all classes sum to one
assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples))
if not tags["no_validation"]:
# raises error on malformed input for predict_proba
if tags["pairwise"]:
with raises(
ValueError,
err_msg=msg_pairwise.format(name, "predict_proba"),
):
classifier.predict_proba(X.reshape(-1, 1))
else:
with raises(
ValueError,
err_msg=msg.format(name, "predict_proba"),
):
classifier.predict_proba(X.T)
# if not tags["no_validation"]:
# # raises error on malformed input for predict_proba
# if tags["pairwise"]:
# with raises(
# ValueError,
# err_msg=msg_pairwise.format(name, "predict_proba"),
# ):
# classifier.predict_proba(X.reshape(-1, 1))
# else:
# with raises(
# ValueError,
# err_msg=msg.format(name, "predict_proba"),
# ):
# classifier.predict_proba(X.T)
if hasattr(classifier, "predict_log_proba"):
# predict_log_proba is a transformation of predict_proba
y_log_prob = classifier.predict_log_proba(X)
Expand Down
4 changes: 2 additions & 2 deletions tsml/tests/test_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def _generate_conversion_test_X(data_type):
def test_convert_X_to_3d_array(input_type):
est = _3dArrayDummy()
X, old_shape = _generate_conversion_test_X(input_type)
X = est._convert_X(X)
X = est._convert_X(X, pad_unequal=True)

assert isinstance(X, np.ndarray)
assert X.ndim == 3
Expand All @@ -45,7 +45,7 @@ def test_convert_X_to_3d_array(input_type):
def test_convert_X_to_2d_array(input_type):
est = _2dArrayDummy()
X, old_shape = _generate_conversion_test_X(input_type)
X = est._convert_X(X, concatenate_channels=True)
X = est._convert_X(X, concatenate_channels=True, pad_unequal=True)

assert isinstance(X, np.ndarray)
assert X.ndim == 2
Expand Down

0 comments on commit 3542cc7

Please sign in to comment.