diff --git a/doc/source/ray-air/doc_code/air_key_concepts.py b/doc/source/ray-air/doc_code/air_key_concepts.py index 966590dbcb3ca..ab637617b1e3b 100644 --- a/doc/source/ray-air/doc_code/air_key_concepts.py +++ b/doc/source/ray-air/doc_code/air_key_concepts.py @@ -35,6 +35,8 @@ scaling_config=ScalingConfig( num_workers=num_workers, use_gpu=use_gpu, + # Make sure to leave some CPUs free for Ray Data operations. + _max_cpu_fraction_per_node=0.9, ), label_column="target", params=params, diff --git a/doc/source/ray-air/examples/analyze_tuning_results.ipynb b/doc/source/ray-air/examples/analyze_tuning_results.ipynb index 0296163a1b37d..e2fcecc5e53b2 100644 --- a/doc/source/ray-air/examples/analyze_tuning_results.ipynb +++ b/doc/source/ray-air/examples/analyze_tuning_results.ipynb @@ -164,13 +164,14 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "228ae052", "metadata": {}, "source": [ "With these parameters in place, we'll create a Ray AIR `XGBoostTrainer`.\n", "\n", - "Note a few things here. First, we pass in a `scaling_config` to configure the distributed training behavior of each individual XGBoost training job. Here, we want to distribute training across 2 workers.\n", + "Note that we pass in a `scaling_config` to configure the distributed training behavior of each individual XGBoost training job. We want to distribute training across 2 workers. We also keep some CPU resources free for Ray Data operations.\n", "\n", "The `label_column` specifies which columns in the dataset contains the target values. `params` are the XGBoost training params defined above - we can tune these later! The `datasets` dict contains the dataset we would like to train on. Lastly, we pass the number of boosting rounds to XGBoost." ] @@ -183,7 +184,7 @@ "outputs": [], "source": [ "trainer = XGBoostTrainer(\n", - " scaling_config=ScalingConfig(num_workers=2),\n", + " scaling_config=ScalingConfig(num_workers=2, _max_cpu_fraction_per_node=0.9),\n", " label_column=\"target\",\n", " params=params,\n", " datasets={\"train\": train_dataset},\n", diff --git a/doc/source/ray-air/examples/xgboost_starter.py b/doc/source/ray-air/examples/xgboost_starter.py index a216c8e3f4490..7938072e5e2d3 100644 --- a/doc/source/ray-air/examples/xgboost_starter.py +++ b/doc/source/ray-air/examples/xgboost_starter.py @@ -31,6 +31,8 @@ num_workers=2, # Whether to use GPU acceleration. use_gpu=False, + # Make sure to leave some CPUs free for Ray Data operations. + _max_cpu_fraction_per_node=0.9, ), label_column="target", num_boost_round=20, diff --git a/python/ray/air/tests/test_resource_changing.py b/python/ray/air/tests/test_resource_changing.py index 478d61602bd27..d2766734bb577 100644 --- a/python/ray/air/tests/test_resource_changing.py +++ b/python/ray/air/tests/test_resource_changing.py @@ -115,7 +115,9 @@ def test_gbdt_trainer(ray_start_8_cpus): trainer = AssertingXGBoostTrainer( datasets={TRAIN_DATASET_KEY: train_ds}, label_column="target", - scaling_config=ScalingConfig(num_workers=2, placement_strategy="SPREAD"), + scaling_config=ScalingConfig( + num_workers=2, placement_strategy="SPREAD", _max_cpu_fraction_per_node=0.9 + ), params={ "objective": "binary:logistic", "eval_metric": ["logloss"], diff --git a/python/ray/train/tests/test_lightgbm_trainer.py b/python/ray/train/tests/test_lightgbm_trainer.py index 2affbb0eaba73..430a74ecc48e3 100644 --- a/python/ray/train/tests/test_lightgbm_trainer.py +++ b/python/ray/train/tests/test_lightgbm_trainer.py @@ -25,6 +25,14 @@ def ray_start_6_cpus(): ray.shutdown() +@pytest.fixture +def ray_start_8_cpus(): + address_info = ray.init(num_cpus=8) + yield address_info + # The code after the yield will run as teardown code. + ray.shutdown() + + scale_config = ScalingConfig(num_workers=2) data_raw = load_breast_cancer() @@ -183,11 +191,11 @@ def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame": assert preprocessor.fitted_ -def test_tune(ray_start_6_cpus): +def test_tune(ray_start_8_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = LightGBMTrainer( - scaling_config=scale_config, + scaling_config=ScalingConfig(num_workers=2, resources_per_worker={"CPU": 1}), label_column="target", params={**params, **{"max_depth": 1}}, datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset}, diff --git a/python/ray/train/tests/test_xgboost_trainer.py b/python/ray/train/tests/test_xgboost_trainer.py index 931515e207adb..b61af80619fee 100644 --- a/python/ray/train/tests/test_xgboost_trainer.py +++ b/python/ray/train/tests/test_xgboost_trainer.py @@ -25,6 +25,14 @@ def ray_start_4_cpus(): ray.shutdown() +@pytest.fixture +def ray_start_8_cpus(): + address_info = ray.init(num_cpus=8) + yield address_info + # The code after the yield will run as teardown code. + ray.shutdown() + + scale_config = ScalingConfig(num_workers=2) data_raw = load_breast_cancer() @@ -197,7 +205,7 @@ def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame": assert preprocessor.fitted_ -def test_tune(ray_start_4_cpus): +def test_tune(ray_start_8_cpus): train_dataset = ray.data.from_pandas(train_df) valid_dataset = ray.data.from_pandas(test_df) trainer = XGBoostTrainer( diff --git a/python/requirements/ml/requirements_upstream.txt b/python/requirements/ml/requirements_upstream.txt index 63350f2530067..415f02902f19b 100644 --- a/python/requirements/ml/requirements_upstream.txt +++ b/python/requirements/ml/requirements_upstream.txt @@ -4,6 +4,6 @@ ray_lightning==0.3.0 tune-sklearn==0.4.4 -xgboost_ray==0.1.10 -lightgbm_ray==0.1.5 +xgboost_ray==0.1.15 +lightgbm_ray==0.1.8 modin==0.12.1; python_version >= '3.7'