From 666ca68790be90ae1153a6c355b7c1ad9921ef52 Mon Sep 17 00:00:00 2001 From: Neeratyoy Mallik Date: Mon, 3 Aug 2020 11:01:25 +0200 Subject: [PATCH] Adding support for scikit-learn > 0.22 (#936) * Preliminary changes * Updating unit tests for sklearn 0.22 and above * Triggering sklearn tests + fixes * Refactoring to inspect.signature in extensions --- .travis.yml | 6 +- openml/extensions/sklearn/extension.py | 18 +- .../test_sklearn_extension.py | 196 ++++++++++++------ tests/test_flows/test_flow.py | 77 +++++-- tests/test_runs/test_run_functions.py | 10 +- 5 files changed, 216 insertions(+), 91 deletions(-) diff --git a/.travis.yml b/.travis.yml index dcfda6d37..7360339ac 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,9 +15,13 @@ env: - TEST_DIR=/tmp/test_dir/ - MODULE=openml matrix: - - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true" - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" RUN_FLAKE8="true" SKIP_TESTS="true" - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.21.2" COVERAGE="true" DOCPUSH="true" + - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.23.1" TEST_DIST="true" + - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.23.1" TEST_DIST="true" + - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.22.2" TEST_DIST="true" + - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.22.2" TEST_DIST="true" + - DISTRIB="conda" PYTHON_VERSION="3.6" SKLEARN_VERSION="0.21.2" TEST_DIST="true" - DISTRIB="conda" PYTHON_VERSION="3.7" SKLEARN_VERSION="0.20.2" # Checks for older scikit-learn versions (which also don't nicely work with # Python3.7) diff --git a/openml/extensions/sklearn/extension.py b/openml/extensions/sklearn/extension.py index af0b42144..fe9d029aa 100644 --- a/openml/extensions/sklearn/extension.py +++ b/openml/extensions/sklearn/extension.py @@ -994,12 +994,16 @@ def _get_fn_arguments_with_defaults(self, fn_name: Callable) -> Tuple[Dict, Set] a set with all parameters that do not have a default value """ # parameters with defaults are optional, all others are required. - signature = inspect.getfullargspec(fn_name) - if signature.defaults: - optional_params = dict(zip(reversed(signature.args), reversed(signature.defaults))) - else: - optional_params = dict() - required_params = {arg for arg in signature.args if arg not in optional_params} + parameters = inspect.signature(fn_name).parameters + required_params = set() + optional_params = dict() + for param in parameters.keys(): + parameter = parameters.get(param) + default_val = parameter.default # type: ignore + if default_val is inspect.Signature.empty: + required_params.add(param) + else: + optional_params[param] = default_val return optional_params, required_params def _deserialize_model( @@ -1346,7 +1350,7 @@ def _can_measure_cputime(self, model: Any) -> bool: # check the parameters for n_jobs n_jobs_vals = SklearnExtension._get_parameter_values_recursive(model.get_params(), "n_jobs") for val in n_jobs_vals: - if val is not None and val != 1: + if val is not None and val != 1 and val != "deprecated": return False return True diff --git a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py index 48832b58f..acc93b024 100644 --- a/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py +++ b/tests/test_extensions/test_sklearn_extension/test_sklearn_extension.py @@ -77,11 +77,14 @@ def test_serialize_model(self): criterion="entropy", max_features="auto", max_leaf_nodes=2000 ) - fixture_name = "sklearn.tree.tree.DecisionTreeClassifier" + tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes" + fixture_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name) fixture_short_name = "sklearn.DecisionTreeClassifier" # str obtained from self.extension._get_sklearn_description(model) fixture_description = "A decision tree classifier." version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__ + + presort_val = "false" if LooseVersion(sklearn.__version__) < "0.22" else '"deprecated"' # min_impurity_decrease has been introduced in 0.20 # min_impurity_split has been deprecated in 0.20 if LooseVersion(sklearn.__version__) < "0.19": @@ -114,12 +117,16 @@ def test_serialize_model(self): ("min_samples_leaf", "1"), ("min_samples_split", "2"), ("min_weight_fraction_leaf", "0.0"), - ("presort", "false"), + ("presort", presort_val), ("random_state", "null"), ("splitter", '"best"'), ) ) - structure_fixture = {"sklearn.tree.tree.DecisionTreeClassifier": []} + if LooseVersion(sklearn.__version__) >= "0.22": + fixture_parameters.update({"ccp_alpha": "0.0"}) + fixture_parameters.move_to_end("ccp_alpha", last=False) + + structure_fixture = {"sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): []} serialization = self.extension.model_to_flow(model) structure = serialization.get_structure("name") @@ -161,11 +168,18 @@ def test_serialize_model_clustering(self): with mock.patch.object(self.extension, "_check_dependencies") as check_dependencies_mock: model = sklearn.cluster.KMeans() - fixture_name = "sklearn.cluster.k_means_.KMeans" + cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans" + fixture_name = "sklearn.cluster.{}.KMeans".format(cluster_name) fixture_short_name = "sklearn.KMeans" # str obtained from self.extension._get_sklearn_description(model) - fixture_description = "K-Means clustering" + fixture_description = "K-Means clustering{}".format( + "" if LooseVersion(sklearn.__version__) < "0.22" else "." + ) version_fixture = "sklearn==%s\nnumpy>=1.6.1\nscipy>=0.9" % sklearn.__version__ + + n_jobs_val = "null" if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"' + precomp_val = '"auto"' if LooseVersion(sklearn.__version__) < "0.23" else '"deprecated"' + # n_jobs default has changed to None in 0.20 if LooseVersion(sklearn.__version__) < "0.20": fixture_parameters = OrderedDict( @@ -192,14 +206,14 @@ def test_serialize_model_clustering(self): ("max_iter", "300"), ("n_clusters", "8"), ("n_init", "10"), - ("n_jobs", "null"), - ("precompute_distances", '"auto"'), + ("n_jobs", n_jobs_val), + ("precompute_distances", precomp_val), ("random_state", "null"), ("tol", "0.0001"), ("verbose", "0"), ) ) - fixture_structure = {"sklearn.cluster.k_means_.KMeans": []} + fixture_structure = {"sklearn.cluster.{}.KMeans".format(cluster_name): []} serialization = self.extension.model_to_flow(model) structure = serialization.get_structure("name") @@ -230,11 +244,15 @@ def test_serialize_model_with_subcomponent(self): n_estimators=100, base_estimator=sklearn.tree.DecisionTreeClassifier() ) + weight_name = "{}weight_boosting".format( + "" if LooseVersion(sklearn.__version__) < "0.22" else "_" + ) + tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes" fixture_name = ( - "sklearn.ensemble.weight_boosting.AdaBoostClassifier" - "(base_estimator=sklearn.tree.tree.DecisionTreeClassifier)" + "sklearn.ensemble.{}.AdaBoostClassifier" + "(base_estimator=sklearn.tree.{}.DecisionTreeClassifier)".format(weight_name, tree_name) ) - fixture_class_name = "sklearn.ensemble.weight_boosting.AdaBoostClassifier" + fixture_class_name = "sklearn.ensemble.{}.AdaBoostClassifier".format(weight_name) fixture_short_name = "sklearn.AdaBoostClassifier" # str obtained from self.extension._get_sklearn_description(model) fixture_description = ( @@ -246,13 +264,13 @@ def test_serialize_model_with_subcomponent(self): " on difficult cases.\n\nThis class implements the algorithm known " "as AdaBoost-SAMME [2]." ) - fixture_subcomponent_name = "sklearn.tree.tree.DecisionTreeClassifier" - fixture_subcomponent_class_name = "sklearn.tree.tree.DecisionTreeClassifier" + fixture_subcomponent_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name) + fixture_subcomponent_class_name = "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name) # str obtained from self.extension._get_sklearn_description(model.base_estimator) fixture_subcomponent_description = "A decision tree classifier." fixture_structure = { fixture_name: [], - "sklearn.tree.tree.DecisionTreeClassifier": ["base_estimator"], + "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["base_estimator"], } serialization = self.extension.model_to_flow(model) @@ -298,10 +316,11 @@ def test_serialize_pipeline(self): dummy = sklearn.dummy.DummyClassifier(strategy="prior") model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("dummy", dummy)]) + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" fixture_name = ( "sklearn.pipeline.Pipeline(" - "scaler=sklearn.preprocessing.data.StandardScaler," - "dummy=sklearn.dummy.DummyClassifier)" + "scaler=sklearn.preprocessing.{}.StandardScaler," + "dummy=sklearn.dummy.DummyClassifier)".format(scaler_name) ) fixture_short_name = "sklearn.Pipeline(StandardScaler,DummyClassifier)" @@ -327,7 +346,7 @@ def test_serialize_pipeline(self): fixture_structure = { fixture_name: [], - "sklearn.preprocessing.data.StandardScaler": ["scaler"], + "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"], "sklearn.dummy.DummyClassifier": ["dummy"], } @@ -402,10 +421,12 @@ def test_serialize_pipeline_clustering(self): km = sklearn.cluster.KMeans() model = sklearn.pipeline.Pipeline(steps=[("scaler", scaler), ("clusterer", km)]) + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" + cluster_name = "k_means_" if LooseVersion(sklearn.__version__) < "0.22" else "_kmeans" fixture_name = ( "sklearn.pipeline.Pipeline(" - "scaler=sklearn.preprocessing.data.StandardScaler," - "clusterer=sklearn.cluster.k_means_.KMeans)" + "scaler=sklearn.preprocessing.{}.StandardScaler," + "clusterer=sklearn.cluster.{}.KMeans)".format(scaler_name, cluster_name) ) fixture_short_name = "sklearn.Pipeline(StandardScaler,KMeans)" @@ -430,10 +451,9 @@ def test_serialize_pipeline_clustering(self): fixture_description = self.extension._get_sklearn_description(model) fixture_structure = { fixture_name: [], - "sklearn.preprocessing.data.StandardScaler": ["scaler"], - "sklearn.cluster.k_means_.KMeans": ["clusterer"], + "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"], + "sklearn.cluster.{}.KMeans".format(cluster_name): ["clusterer"], } - serialization = self.extension.model_to_flow(model) structure = serialization.get_structure("name") @@ -519,10 +539,12 @@ def test_serialize_column_transformer(self): ], remainder="passthrough", ) + + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" fixture = ( "sklearn.compose._column_transformer.ColumnTransformer(" - "numeric=sklearn.preprocessing.data.StandardScaler," - "nominal=sklearn.preprocessing._encoders.OneHotEncoder)" + "numeric=sklearn.preprocessing.{}.StandardScaler," + "nominal=sklearn.preprocessing._encoders.OneHotEncoder)".format(scaler_name) ) fixture_short_name = "sklearn.ColumnTransformer" @@ -543,7 +565,7 @@ def test_serialize_column_transformer(self): fixture_structure = { fixture: [], - "sklearn.preprocessing.data.StandardScaler": ["numeric"], + "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["numeric"], "sklearn.preprocessing._encoders.OneHotEncoder": ["nominal"], } @@ -587,21 +609,26 @@ def test_serialize_column_transformer_pipeline(self): model = sklearn.pipeline.Pipeline( steps=[("transformer", inner), ("classifier", sklearn.tree.DecisionTreeClassifier())] ) + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" + tree_name = "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes" fixture_name = ( "sklearn.pipeline.Pipeline(" "transformer=sklearn.compose._column_transformer." "ColumnTransformer(" - "numeric=sklearn.preprocessing.data.StandardScaler," + "numeric=sklearn.preprocessing.{}.StandardScaler," "nominal=sklearn.preprocessing._encoders.OneHotEncoder)," - "classifier=sklearn.tree.tree.DecisionTreeClassifier)" + "classifier=sklearn.tree.{}.DecisionTreeClassifier)".format(scaler_name, tree_name) ) fixture_structure = { - "sklearn.preprocessing.data.StandardScaler": ["transformer", "numeric"], + "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): [ + "transformer", + "numeric", + ], "sklearn.preprocessing._encoders.OneHotEncoder": ["transformer", "nominal"], "sklearn.compose._column_transformer.ColumnTransformer(numeric=" - "sklearn.preprocessing.data.StandardScaler,nominal=sklearn." - "preprocessing._encoders.OneHotEncoder)": ["transformer"], - "sklearn.tree.tree.DecisionTreeClassifier": ["classifier"], + "sklearn.preprocessing.{}.StandardScaler,nominal=sklearn." + "preprocessing._encoders.OneHotEncoder)".format(scaler_name): ["transformer"], + "sklearn.tree.{}.DecisionTreeClassifier".format(tree_name): ["classifier"], fixture_name: [], } @@ -630,6 +657,7 @@ def test_serialize_column_transformer_pipeline(self): structure = serialization.get_structure("name") self.assertEqual(serialization.name, fixture_name) self.assertEqual(serialization.description, fixture_description) + self.assertDictEqual(structure, fixture_structure) # del serialization.model new_model = self.extension.flow_to_model(serialization) @@ -656,15 +684,18 @@ def test_serialize_feature_union(self): structure = serialization.get_structure("name") # OneHotEncoder was moved to _encoders module in 0.20 module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data" + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" fixture_name = ( "sklearn.pipeline.FeatureUnion(" "ohe=sklearn.preprocessing.{}.OneHotEncoder," - "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder) + "scaler=sklearn.preprocessing.{}.StandardScaler)".format( + module_name_encoder, scaler_name + ) ) fixture_structure = { fixture_name: [], "sklearn.preprocessing.{}." "OneHotEncoder".format(module_name_encoder): ["ohe"], - "sklearn.preprocessing.data.StandardScaler": ["scaler"], + "sklearn.preprocessing.{}.StandardScaler".format(scaler_name): ["scaler"], } self.assertEqual(serialization.name, fixture_name) self.assertDictEqual(structure, fixture_structure) @@ -728,17 +759,20 @@ def test_serialize_feature_union_switched_names(self): fu2_serialization = self.extension.model_to_flow(fu2) # OneHotEncoder was moved to _encoders module in 0.20 module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data" + scaler_name = "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" self.assertEqual( fu1_serialization.name, "sklearn.pipeline.FeatureUnion(" "ohe=sklearn.preprocessing.{}.OneHotEncoder," - "scaler=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder), + "scaler=sklearn.preprocessing.{}.StandardScaler)".format( + module_name_encoder, scaler_name + ), ) self.assertEqual( fu2_serialization.name, "sklearn.pipeline.FeatureUnion(" "scaler=sklearn.preprocessing.{}.OneHotEncoder," - "ohe=sklearn.preprocessing.data.StandardScaler)".format(module_name_encoder), + "ohe=sklearn.preprocessing.{}.StandardScaler)".format(module_name_encoder, scaler_name), ) def test_serialize_complex_flow(self): @@ -766,10 +800,15 @@ def test_serialize_complex_flow(self): # OneHotEncoder was moved to _encoders module in 0.20 module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data" ohe_name = "sklearn.preprocessing.%s.OneHotEncoder" % module_name_encoder - scaler_name = "sklearn.preprocessing.data.StandardScaler" - tree_name = "sklearn.tree.tree.DecisionTreeClassifier" - boosting_name = ( - "sklearn.ensemble.weight_boosting.AdaBoostClassifier" "(base_estimator=%s)" % tree_name + scaler_name = "sklearn.preprocessing.{}.StandardScaler".format( + "data" if LooseVersion(sklearn.__version__) < "0.22" else "_data" + ) + tree_name = "sklearn.tree.{}.DecisionTreeClassifier".format( + "tree" if LooseVersion(sklearn.__version__) < "0.22" else "_classes" + ) + weight_name = "weight" if LooseVersion(sklearn.__version__) < "0.22" else "_weight" + boosting_name = "sklearn.ensemble.{}_boosting.AdaBoostClassifier(base_estimator={})".format( + weight_name, tree_name ) pipeline_name = "sklearn.pipeline.Pipeline(ohe=%s,scaler=%s," "boosting=%s)" % ( ohe_name, @@ -1195,12 +1234,24 @@ def test__get_fn_arguments_with_defaults(self): (sklearn.tree.DecisionTreeClassifier.__init__, 13), (sklearn.pipeline.Pipeline.__init__, 1), ] - else: + elif sklearn_version < "0.22": fns = [ (sklearn.ensemble.RandomForestRegressor.__init__, 16), (sklearn.tree.DecisionTreeClassifier.__init__, 13), (sklearn.pipeline.Pipeline.__init__, 2), ] + elif sklearn_version < "0.23": + fns = [ + (sklearn.ensemble.RandomForestRegressor.__init__, 18), + (sklearn.tree.DecisionTreeClassifier.__init__, 14), + (sklearn.pipeline.Pipeline.__init__, 2), + ] + else: + fns = [ + (sklearn.ensemble.RandomForestRegressor.__init__, 18), + (sklearn.tree.DecisionTreeClassifier.__init__, 14), + (sklearn.pipeline.Pipeline.__init__, 2), + ] for fn, num_params_with_defaults in fns: defaults, defaultless = self.extension._get_fn_arguments_with_defaults(fn) @@ -1225,11 +1276,18 @@ def test_deserialize_with_defaults(self): pipe_orig = sklearn.pipeline.Pipeline(steps=steps) pipe_adjusted = sklearn.clone(pipe_orig) - params = { - "Imputer__strategy": "median", - "OneHotEncoder__sparse": False, - "Estimator__min_samples_leaf": 42, - } + if LooseVersion(sklearn.__version__) < "0.23": + params = { + "Imputer__strategy": "median", + "OneHotEncoder__sparse": False, + "Estimator__min_samples_leaf": 42, + } + else: + params = { + "Imputer__strategy": "mean", + "OneHotEncoder__sparse": True, + "Estimator__min_samples_leaf": 1, + } pipe_adjusted.set_params(**params) flow = self.extension.model_to_flow(pipe_adjusted) pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True) @@ -1256,11 +1314,18 @@ def test_deserialize_adaboost_with_defaults(self): pipe_orig = sklearn.pipeline.Pipeline(steps=steps) pipe_adjusted = sklearn.clone(pipe_orig) - params = { - "Imputer__strategy": "median", - "OneHotEncoder__sparse": False, - "Estimator__n_estimators": 10, - } + if LooseVersion(sklearn.__version__) < "0.22": + params = { + "Imputer__strategy": "median", + "OneHotEncoder__sparse": False, + "Estimator__n_estimators": 10, + } + else: + params = { + "Imputer__strategy": "mean", + "OneHotEncoder__sparse": True, + "Estimator__n_estimators": 50, + } pipe_adjusted.set_params(**params) flow = self.extension.model_to_flow(pipe_adjusted) pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True) @@ -1293,14 +1358,24 @@ def test_deserialize_complex_with_defaults(self): pipe_orig = sklearn.pipeline.Pipeline(steps=steps) pipe_adjusted = sklearn.clone(pipe_orig) - params = { - "Imputer__strategy": "median", - "OneHotEncoder__sparse": False, - "Estimator__n_estimators": 10, - "Estimator__base_estimator__n_estimators": 10, - "Estimator__base_estimator__base_estimator__learning_rate": 0.1, - "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13, - } + if LooseVersion(sklearn.__version__) < "0.23": + params = { + "Imputer__strategy": "median", + "OneHotEncoder__sparse": False, + "Estimator__n_estimators": 10, + "Estimator__base_estimator__n_estimators": 10, + "Estimator__base_estimator__base_estimator__learning_rate": 0.1, + "Estimator__base_estimator__base_estimator__loss__n_neighbors": 13, + } + else: + params = { + "Imputer__strategy": "mean", + "OneHotEncoder__sparse": True, + "Estimator__n_estimators": 50, + "Estimator__base_estimator__n_estimators": 10, + "Estimator__base_estimator__base_estimator__learning_rate": 0.1, + "Estimator__base_estimator__base_estimator__loss__n_neighbors": 5, + } pipe_adjusted.set_params(**params) flow = self.extension.model_to_flow(pipe_adjusted) pipe_deserialized = self.extension.flow_to_model(flow, initialize_with_defaults=True) @@ -1349,7 +1424,10 @@ def test_openml_param_name_to_sklearn(self): def test_obtain_parameter_values_flow_not_from_server(self): model = sklearn.linear_model.LogisticRegression(solver="lbfgs") flow = self.extension.model_to_flow(model) - msg = "Flow sklearn.linear_model.logistic.LogisticRegression has no " "flow_id!" + logistic_name = "logistic" if LooseVersion(sklearn.__version__) < "0.22" else "_logistic" + msg = "Flow sklearn.linear_model.{}.LogisticRegression has no flow_id!".format( + logistic_name + ) with self.assertRaisesRegex(ValueError, msg): self.extension.obtain_parameter_values(flow) diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index 9f289870e..8d08f4eaf 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -305,15 +305,27 @@ def test_publish_error(self, api_call_mock, flow_exists_mock, get_flow_mock): "collected from {}: {}".format(__file__.split("/")[-1], flow.flow_id) ) - fixture = ( - "The flow on the server is inconsistent with the local flow. " - "The server flow ID is 1. Please check manually and remove " - "the flow if necessary! Error is:\n" - "'Flow sklearn.ensemble.forest.RandomForestClassifier: " - "values for attribute 'name' differ: " - "'sklearn.ensemble.forest.RandomForestClassifier'" - "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'" - ) + if LooseVersion(sklearn.__version__) < "0.22": + fixture = ( + "The flow on the server is inconsistent with the local flow. " + "The server flow ID is 1. Please check manually and remove " + "the flow if necessary! Error is:\n" + "'Flow sklearn.ensemble.forest.RandomForestClassifier: " + "values for attribute 'name' differ: " + "'sklearn.ensemble.forest.RandomForestClassifier'" + "\nvs\n'sklearn.ensemble.forest.RandomForestClassifie'.'" + ) + else: + # sklearn.ensemble.forest -> sklearn.ensemble._forest + fixture = ( + "The flow on the server is inconsistent with the local flow. " + "The server flow ID is 1. Please check manually and remove " + "the flow if necessary! Error is:\n" + "'Flow sklearn.ensemble._forest.RandomForestClassifier: " + "values for attribute 'name' differ: " + "'sklearn.ensemble._forest.RandomForestClassifier'" + "\nvs\n'sklearn.ensemble._forest.RandomForestClassifie'.'" + ) self.assertEqual(context_manager.exception.args[0], fixture) self.assertEqual(get_flow_mock.call_count, 2) @@ -463,19 +475,40 @@ def test_sklearn_to_upload_to_flow(self): # OneHotEncoder was moved to _encoders module in 0.20 module_name_encoder = "_encoders" if LooseVersion(sklearn.__version__) >= "0.20" else "data" - fixture_name = ( - "%ssklearn.model_selection._search.RandomizedSearchCV(" - "estimator=sklearn.pipeline.Pipeline(" - "ohe=sklearn.preprocessing.%s.OneHotEncoder," - "scaler=sklearn.preprocessing.data.StandardScaler," - "fu=sklearn.pipeline.FeatureUnion(" - "pca=sklearn.decomposition.truncated_svd.TruncatedSVD," - "fs=" - "sklearn.feature_selection.univariate_selection.SelectPercentile)," - "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier(" - "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))" - % (sentinel, module_name_encoder) - ) + if LooseVersion(sklearn.__version__) < "0.22": + fixture_name = ( + "%ssklearn.model_selection._search.RandomizedSearchCV(" + "estimator=sklearn.pipeline.Pipeline(" + "ohe=sklearn.preprocessing.%s.OneHotEncoder," + "scaler=sklearn.preprocessing.data.StandardScaler," + "fu=sklearn.pipeline.FeatureUnion(" + "pca=sklearn.decomposition.truncated_svd.TruncatedSVD," + "fs=" + "sklearn.feature_selection.univariate_selection.SelectPercentile)," + "boosting=sklearn.ensemble.weight_boosting.AdaBoostClassifier(" + "base_estimator=sklearn.tree.tree.DecisionTreeClassifier)))" + % (sentinel, module_name_encoder) + ) + else: + # sklearn.sklearn.preprocessing.data -> sklearn.sklearn.preprocessing._data + # sklearn.sklearn.decomposition.truncated_svd -> sklearn.decomposition._truncated_svd + # sklearn.feature_selection.univariate_selection -> + # sklearn.feature_selection._univariate_selection + # sklearn.ensemble.weight_boosting -> sklearn.ensemble._weight_boosting + # sklearn.tree.tree.DecisionTree... -> sklearn.tree._classes.DecisionTree... + fixture_name = ( + "%ssklearn.model_selection._search.RandomizedSearchCV(" + "estimator=sklearn.pipeline.Pipeline(" + "ohe=sklearn.preprocessing.%s.OneHotEncoder," + "scaler=sklearn.preprocessing._data.StandardScaler," + "fu=sklearn.pipeline.FeatureUnion(" + "pca=sklearn.decomposition._truncated_svd.TruncatedSVD," + "fs=" + "sklearn.feature_selection._univariate_selection.SelectPercentile)," + "boosting=sklearn.ensemble._weight_boosting.AdaBoostClassifier(" + "base_estimator=sklearn.tree._classes.DecisionTreeClassifier)))" + % (sentinel, module_name_encoder) + ) self.assertEqual(new_flow.name, fixture_name) new_flow.model.fit(X, y) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 74f011b7c..aca9580c9 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -199,8 +199,11 @@ def _perform_run( classes_without_random_state = [ "sklearn.model_selection._search.GridSearchCV", "sklearn.pipeline.Pipeline", - "sklearn.linear_model.base.LinearRegression", ] + if LooseVersion(sklearn.__version__) < "0.22": + classes_without_random_state.append("sklearn.linear_model.base.LinearRegression") + else: + classes_without_random_state.append("sklearn.linear_model._base.LinearRegression") def _remove_random_state(flow): if "random_state" in flow.parameters: @@ -779,10 +782,13 @@ def _test_local_evaluations(self, run): (sklearn.metrics.cohen_kappa_score, {"weights": None}), (sklearn.metrics.roc_auc_score, {}), (sklearn.metrics.average_precision_score, {}), - (sklearn.metrics.jaccard_similarity_score, {}), (sklearn.metrics.precision_score, {"average": "macro"}), (sklearn.metrics.brier_score_loss, {}), ] + if LooseVersion(sklearn.__version__) < "0.23": + tests.append((sklearn.metrics.jaccard_similarity_score, {})) + else: + tests.append((sklearn.metrics.jaccard_score, {})) for test_idx, test in enumerate(tests): alt_scores = run.get_metric_fn(sklearn_fn=test[0], kwargs=test[1],) self.assertEqual(len(alt_scores), 10)