ray-project · amogkam · Feb 15, 2023 · Feb 14, 2023 · Feb 14, 2023 · Feb 14, 2023
@@ -35,6 +35,8 @@
     scaling_config=ScalingConfig(
         num_workers=num_workers,
         use_gpu=use_gpu,
+        # Make sure to leave some CPUs free for Ray Data operations.
+        _max_cpu_fraction_per_node=0.9,
     ),
     label_column="target",
     params=params,

@@ -164,13 +164,14 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "228ae052",
    "metadata": {},
    "source": [
     "With these parameters in place, we'll create a Ray AIR `XGBoostTrainer`.\n",
     "\n",
-    "Note a few things here. First, we pass in a `scaling_config` to configure the distributed training behavior of each individual XGBoost training job. Here, we want to distribute training across 2 workers.\n",
+    "Note that we pass in a `scaling_config` to configure the distributed training behavior of each individual XGBoost training job. We want to distribute training across 2 workers. We also keep some CPU resources free for Ray Data operations.\n",
     "\n",
     "The `label_column` specifies which columns in the dataset contains the target values. `params` are the XGBoost training params defined above - we can tune these later! The `datasets` dict contains the dataset we would like to train on. Lastly, we pass the number of boosting rounds to XGBoost."
    ]
@@ -183,7 +184,7 @@
    "outputs": [],
    "source": [
     "trainer = XGBoostTrainer(\n",
-    "    scaling_config=ScalingConfig(num_workers=2),\n",
+    "    scaling_config=ScalingConfig(num_workers=2, _max_cpu_fraction_per_node=0.9),\n",
     "    label_column=\"target\",\n",
     "    params=params,\n",
     "    datasets={\"train\": train_dataset},\n",

@@ -31,6 +31,8 @@
         num_workers=2,
         # Whether to use GPU acceleration.
         use_gpu=False,
+        # Make sure to leave some CPUs free for Ray Data operations.
+        _max_cpu_fraction_per_node=0.9,
     ),
     label_column="target",
     num_boost_round=20,

diff --git a/python/ray/air/tests/test_resource_changing.py b/python/ray/air/tests/test_resource_changing.py
@@ -115,7 +115,9 @@ def test_gbdt_trainer(ray_start_8_cpus):
     trainer = AssertingXGBoostTrainer(
         datasets={TRAIN_DATASET_KEY: train_ds},
         label_column="target",
-        scaling_config=ScalingConfig(num_workers=2, placement_strategy="SPREAD"),
+        scaling_config=ScalingConfig(
+            num_workers=2, placement_strategy="SPREAD", _max_cpu_fraction_per_node=0.9
+        ),
         params={
             "objective": "binary:logistic",
             "eval_metric": ["logloss"],

diff --git a/python/ray/train/tests/test_lightgbm_trainer.py b/python/ray/train/tests/test_lightgbm_trainer.py
@@ -25,6 +25,14 @@ def ray_start_6_cpus():
     ray.shutdown()
 
 
+@pytest.fixture
+def ray_start_8_cpus():
+    address_info = ray.init(num_cpus=8)
+    yield address_info
+    # The code after the yield will run as teardown code.
+    ray.shutdown()
+
+
 scale_config = ScalingConfig(num_workers=2)
 
 data_raw = load_breast_cancer()
@@ -183,11 +191,11 @@ def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame":
     assert preprocessor.fitted_
 
 
-def test_tune(ray_start_6_cpus):
+def test_tune(ray_start_8_cpus):
     train_dataset = ray.data.from_pandas(train_df)
     valid_dataset = ray.data.from_pandas(test_df)
     trainer = LightGBMTrainer(
-        scaling_config=scale_config,
+        scaling_config=ScalingConfig(num_workers=2, resources_per_worker={"CPU": 1}),
         label_column="target",
         params={**params, **{"max_depth": 1}},
         datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},

diff --git a/python/ray/train/tests/test_xgboost_trainer.py b/python/ray/train/tests/test_xgboost_trainer.py
@@ -25,6 +25,14 @@ def ray_start_4_cpus():
     ray.shutdown()
 
 
+@pytest.fixture
+def ray_start_8_cpus():
+    address_info = ray.init(num_cpus=8)
+    yield address_info
+    # The code after the yield will run as teardown code.
+    ray.shutdown()
+
+
 scale_config = ScalingConfig(num_workers=2)
 
 data_raw = load_breast_cancer()
@@ -197,7 +205,7 @@ def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame":
     assert preprocessor.fitted_
 
 
-def test_tune(ray_start_4_cpus):
+def test_tune(ray_start_8_cpus):
     train_dataset = ray.data.from_pandas(train_df)
     valid_dataset = ray.data.from_pandas(test_df)
     trainer = XGBoostTrainer(

diff --git a/python/requirements/ml/requirements_upstream.txt b/python/requirements/ml/requirements_upstream.txt
@@ -4,6 +4,6 @@
 
 ray_lightning==0.3.0
 tune-sklearn==0.4.4
-xgboost_ray==0.1.10
-lightgbm_ray==0.1.5
+xgboost_ray==0.1.15
+lightgbm_ray==0.1.8
 modin==0.12.1; python_version >= '3.7'