From e754f23affbe27ae442a8152e0f81d9eda33edc1 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sat, 23 Jan 2021 19:40:16 +0300 Subject: [PATCH 1/6] [python][tests] transfer test_save_and_load_linear to test_engine (#3821) --- tests/python_package_test/test_basic.py | 25 ------------------------ tests/python_package_test/test_engine.py | 25 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py index 19e15e5d6869..7cc349ded449 100644 --- a/tests/python_package_test/test_basic.py +++ b/tests/python_package_test/test_basic.py @@ -110,31 +110,6 @@ def test_chunked_dataset_linear(): valid_data.construct() -def test_save_and_load_linear(tmp_path): - X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, - random_state=2) - X_train = np.concatenate([np.ones((X_train.shape[0], 1)), X_train], 1) - X_train[:X_train.shape[0] // 2, 0] = 0 - y_train[:X_train.shape[0] // 2] = 1 - params = {'linear_tree': True} - train_data_1 = lgb.Dataset(X_train, label=y_train, params=params) - est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0]) - pred_1 = est_1.predict(X_train) - - tmp_dataset = str(tmp_path / 'temp_dataset.bin') - train_data_1.save_binary(tmp_dataset) - train_data_2 = lgb.Dataset(tmp_dataset) - est_2 = lgb.train(params, train_data_2, num_boost_round=10) - pred_2 = est_2.predict(X_train) - np.testing.assert_allclose(pred_1, pred_2) - - model_file = str(tmp_path / 'model.txt') - est_2.save_model(model_file) - est_3 = lgb.Booster(model_file=model_file) - pred_3 = est_3.predict(X_train) - np.testing.assert_allclose(pred_2, pred_3) - - def test_subset_group(): X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../examples/lambdarank/rank.train')) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index b3cb47126e1e..a8254bf07f23 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -2566,6 +2566,31 @@ def test_linear_trees(tmp_path): est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0]) +def test_save_and_load_linear(tmp_path): + X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, + random_state=2) + X_train = np.concatenate([np.ones((X_train.shape[0], 1)), X_train], 1) + X_train[:X_train.shape[0] // 2, 0] = 0 + y_train[:X_train.shape[0] // 2] = 1 + params = {'linear_tree': True} + train_data_1 = lgb.Dataset(X_train, label=y_train, params=params) + est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0]) + pred_1 = est_1.predict(X_train) + + tmp_dataset = str(tmp_path / 'temp_dataset.bin') + train_data_1.save_binary(tmp_dataset) + train_data_2 = lgb.Dataset(tmp_dataset) + est_2 = lgb.train(params, train_data_2, num_boost_round=10) + pred_2 = est_2.predict(X_train) + np.testing.assert_allclose(pred_1, pred_2) + + model_file = str(tmp_path / 'model.txt') + est_2.save_model(model_file) + est_3 = lgb.Booster(model_file=model_file) + pred_3 = est_3.predict(X_train) + np.testing.assert_allclose(pred_2, pred_3) + + def test_predict_with_start_iteration(): def inner_test(X, y, params, early_stopping_rounds): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) From d951be99ec724ae7ec27d680463cdbf7a442e963 Mon Sep 17 00:00:00 2001 From: Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com> Date: Sat, 23 Jan 2021 11:46:16 -0500 Subject: [PATCH 2/6] Don't copy more than has been allocated to device_features. (#3752) --- src/treelearner/cuda_tree_learner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treelearner/cuda_tree_learner.cpp b/src/treelearner/cuda_tree_learner.cpp index 78735c07c6e5..449557833bf2 100644 --- a/src/treelearner/cuda_tree_learner.cpp +++ b/src/treelearner/cuda_tree_learner.cpp @@ -408,7 +408,7 @@ void CUDATreeLearner::copyDenseFeature() { // looking for dword_features_ non-sparse feature-groups if (!train_data_->IsMultiGroup(i)) { dense_feature_group_map_.push_back(i); - auto sizes_in_byte = train_data_->FeatureGroupSizesInByte(i); + auto sizes_in_byte = std::min(train_data_->FeatureGroupSizesInByte(i), static_cast(num_data_)); void* tmp_data = train_data_->FeatureGroupData(i); Log::Debug("Started copying dense features from CPU to GPU - 2"); CUDASUCCESS_OR_FATAL(cudaMemcpyAsync(&device_features[copied_feature * num_data_], tmp_data, sizes_in_byte, cudaMemcpyHostToDevice, stream_[device_id])); From 736337897d08c8e61c7464482c555a8af1792c1d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 24 Jan 2021 03:15:03 +0300 Subject: [PATCH 3/6] make sure we use documented Xcode version for producing artifacts (#3824) --- .ci/setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.ci/setup.sh b/.ci/setup.sh index abd64f218304..7b1763f8200a 100755 --- a/.ci/setup.sh +++ b/.ci/setup.sh @@ -4,7 +4,7 @@ if [[ $OS_NAME == "macos" ]]; then if [[ $COMPILER == "clang" ]]; then brew install libomp if [[ $AZURE == "true" ]]; then - sudo xcode-select -s /Applications/Xcode_9.4.1.app/Contents/Developer + sudo xcode-select -s /Applications/Xcode_9.4.1.app/Contents/Developer || exit -1 fi else # gcc if [[ $TASK != "mpi" ]]; then From da44387176316632f126114182f6754a68a1d557 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 24 Jan 2021 03:15:20 +0300 Subject: [PATCH 4/6] [dask][tests] move make_ranking into utils (#3827) * move make_ranking into utils * do not cache --- tests/python_package_test/test_dask.py | 91 ++------------------------ tests/python_package_test/utils.py | 82 +++++++++++++++++++++++ 2 files changed, 88 insertions(+), 85 deletions(-) diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py index a80bb700f993..42b1f035a80e 100644 --- a/tests/python_package_test/test_dask.py +++ b/tests/python_package_test/test_dask.py @@ -25,6 +25,9 @@ import lightgbm import lightgbm.dask as dlgbm +from .utils import make_ranking + + data_output = ['array', 'scipy_csr_matrix', 'dataframe'] data_centers = [[[-4, -4], [4, 4]], [[-4, -4], [4, 4], [-4, 4]]] group_sizes = [5, 5, 5, 10, 10, 10, 20, 20, 20, 50, 50] @@ -44,92 +47,13 @@ def listen_port(): listen_port.port = 13000 -def _make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2, - group=None, random_gs=False, avg_gs=10, random_state=0): - """Generate a learning-to-rank dataset - feature vectors grouped together with - integer-valued graded relevance scores. Replace this with a sklearn.datasets function - if ranking objective becomes supported in sklearn.datasets module. - - Parameters - ---------- - n_samples : int, optional (default=100) - Total number of documents (records) in the dataset. - n_features : int, optional (default=20) - Total number of features in the dataset. - n_informative : int, optional (default=5) - Number of features that are "informative" for ranking, as they are bias + beta * y - where bias and beta are standard normal variates. If this is greater than n_features, the dataset will have - n_features features, all will be informative. - group : array-like, optional (default=None) - 1-d array or list of group sizes. When `group` is specified, this overrides n_samples, random_gs, and - avg_gs by simply creating groups with sizes group[0], ..., group[-1]. - gmax : int, optional (default=2) - Maximum graded relevance value for creating relevance/target vector. If you set this to 2, for example, all - documents in a group will have relevance scores of either 0, 1, or 2. - random_gs : bool, optional (default=False) - True will make group sizes ~ Poisson(avg_gs), False will make group sizes == avg_gs. - avg_gs : int, optional (default=10) - Average number of documents (records) in each group. - - Returns - ------- - X : 2-d np.ndarray of shape = [n_samples (or np.sum(group)), n_features] - Input feature matrix for ranking objective. - y : 1-d np.array of shape = [n_samples (or np.sum(group))] - Integer-graded relevance scores. - group_ids : 1-d np.array of shape = [n_samples (or np.sum(group))] - Array of group ids, each value indicates to which group each record belongs. - """ - rnd_generator = check_random_state(random_state) - - y_vec, group_id_vec = np.empty((0,), dtype=int), np.empty((0,), dtype=int) - gid = 0 - - # build target, group ID vectors. - relvalues = range(gmax + 1) - - # build y/target and group-id vectors with user-specified group sizes. - if group is not None and hasattr(group, '__len__'): - n_samples = np.sum(group) - - for i, gsize in enumerate(group): - y_vec = np.concatenate((y_vec, rnd_generator.choice(relvalues, size=gsize, replace=True))) - group_id_vec = np.concatenate((group_id_vec, [i] * gsize)) - - # build y/target and group-id vectors according to n_samples, avg_gs, and random_gs. - else: - while len(y_vec) < n_samples: - gsize = avg_gs if not random_gs else rnd_generator.poisson(avg_gs) - - # groups should contain > 1 element for pairwise learning objective. - if gsize < 1: - continue - - y_vec = np.append(y_vec, rnd_generator.choice(relvalues, size=gsize, replace=True)) - group_id_vec = np.append(group_id_vec, [gid] * gsize) - gid += 1 - - y_vec, group_id_vec = y_vec[:n_samples], group_id_vec[:n_samples] - - # build feature data, X. Transform first few into informative features. - n_informative = max(min(n_features, n_informative), 0) - X = rnd_generator.uniform(size=(n_samples, n_features)) - - for j in range(n_informative): - bias, coef = rnd_generator.normal(size=2) - X[:, j] = bias + coef * y_vec - - return X, y_vec, group_id_vec - - def _create_ranking_data(n_samples=100, output='array', chunk_size=50, **kwargs): - X, y, g = _make_ranking(n_samples=n_samples, random_state=42, **kwargs) + X, y, g = make_ranking(n_samples=n_samples, random_state=42, **kwargs) rnd = np.random.RandomState(42) w = rnd.rand(X.shape[0]) * 0.01 g_rle = np.array([len(list(grp)) for _, grp in itertools.groupby(g)]) if output == 'dataframe': - # add target, weight, and group to DataFrame so that partitions abide by group boundaries. X_df = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(X.shape[1])]) X = X_df.copy() @@ -149,9 +73,7 @@ def _create_ranking_data(n_samples=100, output='array', chunk_size=50, **kwargs) # encode group identifiers into run-length encoding, the format LightGBMRanker is expecting # so that within each partition, sum(g) = n_samples. dg = dg.map_partitions(lambda p: p.groupby('g', sort=False).apply(lambda z: z.shape[0])) - elif output == 'array': - # ranking arrays: one chunk per group. Each chunk must include all columns. p = X.shape[1] dX, dy, dw, dg = [], [], [], [] @@ -166,7 +88,6 @@ def _create_ranking_data(n_samples=100, output='array', chunk_size=50, **kwargs) dy = da.concatenate(dy, axis=0) dw = da.concatenate(dw, axis=0) dg = da.concatenate(dg, axis=0) - else: raise ValueError('Ranking data creation only supported for Dask arrays and dataframes') @@ -179,7 +100,7 @@ def _create_data(objective, n_samples=100, centers=2, output='array', chunk_size elif objective == 'regression': X, y = make_regression(n_samples=n_samples, random_state=42) else: - raise ValueError(objective) + raise ValueError("Unknown objective '%s'" % objective) rnd = np.random.RandomState(42) weights = rnd.random(X.shape[0]) * 0.01 @@ -198,7 +119,7 @@ def _create_data(objective, n_samples=100, centers=2, output='array', chunk_size dy = da.from_array(y, chunks=chunk_size) dw = da.from_array(weights, chunk_size) else: - raise ValueError("Unknown output type %s" % output) + raise ValueError("Unknown output type '%s'" % output) return X, y, weights, dX, dy, dw diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index 758f34d6e76f..320b8e204aa3 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -1,7 +1,9 @@ # coding: utf-8 from functools import lru_cache +import numpy as np import sklearn.datasets +from sklearn.utils import check_random_state @lru_cache(maxsize=None) @@ -27,3 +29,83 @@ def load_iris(**kwargs): @lru_cache(maxsize=None) def load_linnerud(**kwargs): return sklearn.datasets.load_linnerud(**kwargs) + + +def make_ranking(n_samples=100, n_features=20, n_informative=5, gmax=2, + group=None, random_gs=False, avg_gs=10, random_state=0): + """Generate a learning-to-rank dataset - feature vectors grouped together with + integer-valued graded relevance scores. Replace this with a sklearn.datasets function + if ranking objective becomes supported in sklearn.datasets module. + + Parameters + ---------- + n_samples : int, optional (default=100) + Total number of documents (records) in the dataset. + n_features : int, optional (default=20) + Total number of features in the dataset. + n_informative : int, optional (default=5) + Number of features that are "informative" for ranking, as they are bias + beta * y + where bias and beta are standard normal variates. If this is greater than n_features, the dataset will have + n_features features, all will be informative. + gmax : int, optional (default=2) + Maximum graded relevance value for creating relevance/target vector. If you set this to 2, for example, all + documents in a group will have relevance scores of either 0, 1, or 2. + group : array-like, optional (default=None) + 1-d array or list of group sizes. When `group` is specified, this overrides n_samples, random_gs, and + avg_gs by simply creating groups with sizes group[0], ..., group[-1]. + random_gs : bool, optional (default=False) + True will make group sizes ~ Poisson(avg_gs), False will make group sizes == avg_gs. + avg_gs : int, optional (default=10) + Average number of documents (records) in each group. + random_state : int, optional (default=0) + Random seed. + + Returns + ------- + X : 2-d np.ndarray of shape = [n_samples (or np.sum(group)), n_features] + Input feature matrix for ranking objective. + y : 1-d np.array of shape = [n_samples (or np.sum(group))] + Integer-graded relevance scores. + group_ids : 1-d np.array of shape = [n_samples (or np.sum(group))] + Array of group ids, each value indicates to which group each record belongs. + """ + rnd_generator = check_random_state(random_state) + + y_vec, group_id_vec = np.empty((0,), dtype=int), np.empty((0,), dtype=int) + gid = 0 + + # build target, group ID vectors. + relvalues = range(gmax + 1) + + # build y/target and group-id vectors with user-specified group sizes. + if group is not None and hasattr(group, '__len__'): + n_samples = np.sum(group) + + for i, gsize in enumerate(group): + y_vec = np.concatenate((y_vec, rnd_generator.choice(relvalues, size=gsize, replace=True))) + group_id_vec = np.concatenate((group_id_vec, [i] * gsize)) + + # build y/target and group-id vectors according to n_samples, avg_gs, and random_gs. + else: + while len(y_vec) < n_samples: + gsize = avg_gs if not random_gs else rnd_generator.poisson(avg_gs) + + # groups should contain > 1 element for pairwise learning objective. + if gsize < 1: + continue + + y_vec = np.append(y_vec, rnd_generator.choice(relvalues, size=gsize, replace=True)) + group_id_vec = np.append(group_id_vec, [gid] * gsize) + gid += 1 + + y_vec, group_id_vec = y_vec[:n_samples], group_id_vec[:n_samples] + + # build feature data, X. Transform first few into informative features. + n_informative = max(min(n_features, n_informative), 0) + X = rnd_generator.uniform(size=(n_samples, n_features)) + + for j in range(n_informative): + bias, coef = rnd_generator.normal(size=2) + X[:, j] = bias + coef * y_vec + + return X, y_vec, group_id_vec From 5a4fec6d11555fd24997e1b838192d2829aa37c0 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 24 Jan 2021 04:48:28 +0300 Subject: [PATCH 5/6] [ci] improve optional checks (#3826) --- .github/workflows/triggering_comments.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triggering_comments.yml b/.github/workflows/triggering_comments.yml index f20b397449e8..7ad5f82b53b7 100644 --- a/.github/workflows/triggering_comments.yml +++ b/.github/workflows/triggering_comments.yml @@ -6,7 +6,7 @@ on: jobs: triggering-tests: - if: github.event.issue.pull_request && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) + if: github.event.issue.pull_request && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association) && startsWith(github.event.comment.body, '/gha run') runs-on: ubuntu-latest env: SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }} From ac706e10e461d1c5987a3975cc48aa82fde33566 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Sun, 24 Jan 2021 04:49:48 +0300 Subject: [PATCH 6/6] [dask][tests] reduce code duplication in Dask tests (#3828) --- tests/python_package_test/test_dask.py | 98 ++++++++++++++------------ 1 file changed, 51 insertions(+), 47 deletions(-) diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py index 42b1f035a80e..8273bdd4439f 100644 --- a/tests/python_package_test/test_dask.py +++ b/tests/python_package_test/test_dask.py @@ -133,11 +133,14 @@ def test_classifier(output, centers, client, listen_port): centers=centers ) + params = { + "n_estimators": 10, + "num_leaves": 10 + } dask_classifier = dlgbm.DaskLGBMClassifier( time_out=5, local_listen_port=listen_port, - n_estimators=10, - num_leaves=10 + **params ) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) p1 = dask_classifier.predict(dX) @@ -145,7 +148,7 @@ def test_classifier(output, centers, client, listen_port): s1 = accuracy_score(dy, p1) p1 = p1.compute() - local_classifier = lightgbm.LGBMClassifier(n_estimators=10, num_leaves=10) + local_classifier = lightgbm.LGBMClassifier(**params) local_classifier.fit(X, y, sample_weight=w) p2 = local_classifier.predict(X) p2_proba = local_classifier.predict_proba(X) @@ -169,20 +172,20 @@ def test_classifier_pred_contrib(output, centers, client, listen_port): centers=centers ) + params = { + "n_estimators": 10, + "num_leaves": 10 + } dask_classifier = dlgbm.DaskLGBMClassifier( time_out=5, local_listen_port=listen_port, tree_learner='data', - n_estimators=10, - num_leaves=10 + **params ) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) preds_with_contrib = dask_classifier.predict(dX, pred_contrib=True).compute() - local_classifier = lightgbm.LGBMClassifier( - n_estimators=10, - num_leaves=10 - ) + local_classifier = lightgbm.LGBMClassifier(**params) local_classifier.fit(X, y, sample_weight=w) local_preds_with_contrib = local_classifier.predict(X, pred_contrib=True) @@ -243,16 +246,19 @@ def test_classifier_local_predict(client, listen_port): output='array' ) + params = { + "n_estimators": 10, + "num_leaves": 10 + } dask_classifier = dlgbm.DaskLGBMClassifier( time_out=5, local_port=listen_port, - n_estimators=10, - num_leaves=10 + **params ) dask_classifier = dask_classifier.fit(dX, dy, sample_weight=dw, client=client) p1 = dask_classifier.to_local().predict(dX) - local_classifier = lightgbm.LGBMClassifier(n_estimators=10, num_leaves=10) + local_classifier = lightgbm.LGBMClassifier(**params) local_classifier.fit(X, y, sample_weight=w) p2 = local_classifier.predict(X) @@ -270,12 +276,15 @@ def test_regressor(output, client, listen_port): output=output ) + params = { + "random_state": 42, + "num_leaves": 10 + } dask_regressor = dlgbm.DaskLGBMRegressor( time_out=5, local_listen_port=listen_port, - seed=42, - num_leaves=10, - tree='data' + tree='data', + **params ) dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) p1 = dask_regressor.predict(dX) @@ -283,7 +292,7 @@ def test_regressor(output, client, listen_port): s1 = r2_score(dy, p1) p1 = p1.compute() - local_regressor = lightgbm.LGBMRegressor(seed=42, num_leaves=10) + local_regressor = lightgbm.LGBMRegressor(**params) local_regressor.fit(X, y, sample_weight=w) s2 = local_regressor.score(X, y) p2 = local_regressor.predict(X) @@ -306,20 +315,20 @@ def test_regressor_pred_contrib(output, client, listen_port): output=output ) + params = { + "n_estimators": 10, + "num_leaves": 10 + } dask_regressor = dlgbm.DaskLGBMRegressor( time_out=5, local_listen_port=listen_port, tree_learner='data', - n_estimators=10, - num_leaves=10 + **params ) dask_regressor = dask_regressor.fit(dX, dy, sample_weight=dw, client=client) preds_with_contrib = dask_regressor.predict(dX, pred_contrib=True).compute() - local_regressor = lightgbm.LGBMRegressor( - n_estimators=10, - num_leaves=10 - ) + local_regressor = lightgbm.LGBMRegressor(**params) local_regressor.fit(X, y, sample_weight=w) local_preds_with_contrib = local_regressor.predict(X, pred_contrib=True) @@ -341,26 +350,23 @@ def test_regressor_quantile(output, client, listen_port, alpha): output=output ) + params = { + "objective": "quantile", + "alpha": alpha, + "random_state": 42, + "n_estimators": 10, + "num_leaves": 10 + } dask_regressor = dlgbm.DaskLGBMRegressor( local_listen_port=listen_port, - seed=42, - objective='quantile', - alpha=alpha, - n_estimators=10, - num_leaves=10, - tree_learner_type='data_parallel' + tree_learner_type='data_parallel', + **params ) dask_regressor = dask_regressor.fit(dX, dy, client=client, sample_weight=dw) p1 = dask_regressor.predict(dX).compute() q1 = np.count_nonzero(y < p1) / y.shape[0] - local_regressor = lightgbm.LGBMRegressor( - seed=42, - objective='quantile', - alpha=alpha, - n_estimatores=10, - num_leaves=10 - ) + local_regressor = lightgbm.LGBMRegressor(**params) local_regressor.fit(X, y, sample_weight=w) p2 = local_regressor.predict(X) q2 = np.count_nonzero(y < p2) / y.shape[0] @@ -377,7 +383,7 @@ def test_regressor_local_predict(client, listen_port): dask_regressor = dlgbm.DaskLGBMRegressor( local_listen_port=listen_port, - seed=42, + random_state=42, n_estimators=10, num_leaves=10, tree_type='data' @@ -407,25 +413,23 @@ def test_ranker(output, client, listen_port, group): # use many trees + leaves to overfit, help ensure that dask data-parallel strategy matches that of # serial learner. See https://github.com/microsoft/LightGBM/issues/3292#issuecomment-671288210. + params = { + "random_state": 42, + "n_estimators": 50, + "num_leaves": 20, + "min_child_samples": 1 + } dask_ranker = dlgbm.DaskLGBMRanker( time_out=5, local_listen_port=listen_port, tree_learner_type='data_parallel', - n_estimators=50, - num_leaves=20, - seed=42, - min_child_samples=1 + **params ) dask_ranker = dask_ranker.fit(dX, dy, sample_weight=dw, group=dg, client=client) rnkvec_dask = dask_ranker.predict(dX) rnkvec_dask = rnkvec_dask.compute() - local_ranker = lightgbm.LGBMRanker( - n_estimators=50, - num_leaves=20, - seed=42, - min_child_samples=1 - ) + local_ranker = lightgbm.LGBMRanker(**params) local_ranker.fit(X, y, sample_weight=w, group=g) rnkvec_local = local_ranker.predict(X) @@ -453,7 +457,7 @@ def test_ranker_local_predict(output, client, listen_port, group): tree_learner='data', n_estimators=10, num_leaves=10, - seed=42, + random_state=42, min_child_samples=1 ) dask_ranker = dask_ranker.fit(dX, dy, group=dg, client=client)