diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index c03a52c605c9..06badec5f2e2 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -8,7 +8,7 @@ updates:
- package-ecosystem: "maven"
directory: "/jvm-packages"
schedule:
- interval: "daily"
+ interval: "monthly"
- package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j"
schedule:
@@ -16,11 +16,11 @@ updates:
- package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-gpu"
schedule:
- interval: "daily"
+ interval: "monthly"
- package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-example"
schedule:
- interval: "daily"
+ interval: "monthly"
- package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-spark"
schedule:
@@ -28,4 +28,8 @@ updates:
- package-ecosystem: "maven"
directory: "/jvm-packages/xgboost4j-spark-gpu"
schedule:
- interval: "daily"
+ interval: "monthly"
+ - package-ecosystem: "github-actions"
+ directory: /
+ schedule:
+ interval: "monthly"
diff --git a/.github/workflows/i386.yml b/.github/workflows/i386.yml
index 4a4d65b25b61..1c4e98010310 100644
--- a/.github/workflows/i386.yml
+++ b/.github/workflows/i386.yml
@@ -5,6 +5,10 @@ on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
jobs:
build-32bit:
name: Build 32-bit
@@ -15,7 +19,7 @@ jobs:
ports:
- 5000:5000
steps:
- - uses: actions/checkout@v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Set up Docker Buildx
diff --git a/.github/workflows/jvm_tests.yml b/.github/workflows/jvm_tests.yml
index bbded088387f..9ef314ca5b0b 100644
--- a/.github/workflows/jvm_tests.yml
+++ b/.github/workflows/jvm_tests.yml
@@ -5,6 +5,10 @@ on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
jobs:
test-with-jvm:
name: Test JVM on OS ${{ matrix.os }}
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 133e151e5e4f..4755f9aaaad8 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -9,6 +9,10 @@ on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
gtest-cpu:
@@ -19,7 +23,7 @@ jobs:
matrix:
os: [macos-11]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Install system packages
@@ -45,7 +49,7 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Install system packages
@@ -72,10 +76,10 @@ jobs:
os: [ubuntu-latest]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -114,10 +118,10 @@ jobs:
os: ["ubuntu-latest"]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -171,7 +175,7 @@ jobs:
runs-on: ubuntu-latest
name: Code linting for C++
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml
index 3fbcc7a01acf..f0cad6382d87 100644
--- a/.github/workflows/python_tests.yml
+++ b/.github/workflows/python_tests.yml
@@ -9,6 +9,10 @@ defaults:
run:
shell: bash -l {0}
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
jobs:
python-mypy-lint:
runs-on: ubuntu-latest
@@ -17,10 +21,10 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -48,10 +52,10 @@ jobs:
matrix:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -80,14 +84,14 @@ jobs:
os: [macos-11, windows-latest]
python-version: ["3.8"]
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Install osx system dependencies
if: matrix.os == 'macos-11'
run: |
brew install ninja libomp
- - uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
auto-update-conda: true
python-version: ${{ matrix.python-version }}
@@ -118,11 +122,11 @@ jobs:
- {os: macos-11}
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -170,11 +174,11 @@ jobs:
- {os: windows-latest, python-version: '3.8'}
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1
+ - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3.0.4
with:
auto-update-conda: true
python-version: ${{ matrix.config.python-version }}
@@ -214,11 +218,11 @@ jobs:
- {os: ubuntu-latest, python-version: "3.8"}
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -266,11 +270,11 @@ jobs:
- {os: ubuntu-latest, python-version: "3.8"}
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14
+ - uses: mamba-org/provision-with-micromamba@3c96c0c27676490c63c18bc81f5c51895ac3e0e6 # v16
with:
cache-downloads: true
cache-env: true
@@ -305,7 +309,7 @@ jobs:
os: [ubuntu-latest]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml
index 129ab805f753..090b1f830213 100644
--- a/.github/workflows/python_wheels.yml
+++ b/.github/workflows/python_wheels.yml
@@ -25,7 +25,7 @@ jobs:
- os: macos-14
platform_id: macosx_arm64
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- uses: conda-incubator/setup-miniconda@v3.0.4
diff --git a/.github/workflows/r_nold.yml b/.github/workflows/r_nold.yml
index a014c9138493..887470190035 100644
--- a/.github/workflows/r_nold.yml
+++ b/.github/workflows/r_nold.yml
@@ -10,6 +10,10 @@ on:
permissions:
contents: read # to fetch code (actions/checkout)
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
jobs:
test-R-noLD:
if: github.event.comment.body == '/gha run r-nold-test' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
@@ -23,7 +27,7 @@ jobs:
run: |
apt update && apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev git -y
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml
index 1ed5ca20b777..f3d83b823aff 100644
--- a/.github/workflows/r_tests.yml
+++ b/.github/workflows/r_tests.yml
@@ -8,6 +8,10 @@ env:
permissions:
contents: read # to fetch code (actions/checkout)
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
jobs:
lintr:
runs-on: ${{ matrix.config.os }}
@@ -21,11 +25,11 @@ jobs:
RSPM: ${{ matrix.config.rspm }}
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: r-lib/actions/setup-r@b7e68d63e51bdf225997973e2add36d551f60f02 # v2.8.7
+ - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0
with:
r-version: ${{ matrix.config.r }}
@@ -33,8 +37,8 @@ jobs:
uses: actions/cache@937d24475381cd9c75ae6db12cb4e79714b926ed # v3.0.11
with:
path: ${{ env.R_LIBS_USER }}
- key: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
- restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
+ key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
+ restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
- name: Install dependencies
shell: Rscript {0}
@@ -46,7 +50,7 @@ jobs:
MAKEFLAGS="-j$(nproc)" R CMD INSTALL R-package/
Rscript tests/ci_build/lint_r.R $(pwd)
- test-R-on-Windows:
+ test-Rpkg:
runs-on: ${{ matrix.config.os }}
name: Test R on OS ${{ matrix.config.os }}, R ${{ matrix.config.r }}, Compiler ${{ matrix.config.compiler }}, Build ${{ matrix.config.build }}
strategy:
@@ -54,16 +58,22 @@ jobs:
matrix:
config:
- {os: windows-latest, r: 'release', compiler: 'mingw', build: 'autotools'}
+ - {os: ubuntu-latest, r: 'release', compiler: 'none', build: 'cmake'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
steps:
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - name: Install system dependencies
+ run: |
+ sudo apt update
+ sudo apt install libcurl4-openssl-dev libssl-dev libssh2-1-dev libgit2-dev libglpk-dev libxml2-dev libharfbuzz-dev libfribidi-dev
+ if: matrix.config.os == 'ubuntu-latest'
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- - uses: r-lib/actions/setup-r@b7e68d63e51bdf225997973e2add36d551f60f02 # v2.8.7
+ - uses: r-lib/actions/setup-r@929c772977a3a13c8733b363bf5a2f685c25dd91 # v2.9.0
with:
r-version: ${{ matrix.config.r }}
@@ -71,8 +81,8 @@ jobs:
uses: actions/cache@937d24475381cd9c75ae6db12cb4e79714b926ed # v3.0.11
with:
path: ${{ env.R_LIBS_USER }}
- key: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
- restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-6-${{ hashFiles('R-package/DESCRIPTION') }}
+ key: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
+ restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}-7-${{ hashFiles('R-package/DESCRIPTION') }}
- uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0
with:
@@ -89,12 +99,18 @@ jobs:
- name: Test R
run: |
python tests/ci_build/test_r_package.py --compiler='${{ matrix.config.compiler }}' --build-tool="${{ matrix.config.build }}" --task=check
+ if: matrix.config.compiler != 'none'
+
+ - name: Test R
+ run: |
+ python tests/ci_build/test_r_package.py --build-tool="${{ matrix.config.build }}" --task=check
+ if: matrix.config.compiler == 'none'
test-R-on-Debian:
name: Test R package on Debian
runs-on: ubuntu-latest
container:
- image: rhub/debian-gcc-devel
+ image: rhub/debian-gcc-release
steps:
- name: Install system dependencies
@@ -107,21 +123,21 @@ jobs:
run: |
git config --global --add safe.directory "${GITHUB_WORKSPACE}"
- - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Install dependencies
shell: bash -l {0}
run: |
- /tmp/R-devel/bin/Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
+ Rscript -e "source('./R-package/tests/helper_scripts/install_deps.R')"
- name: Test R
shell: bash -l {0}
run: |
- python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --build-tool=autotools --task=check
+ python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --build-tool=autotools --task=check
- - uses: dorny/paths-filter@v2
+ - uses: dorny/paths-filter@v3
id: changes
with:
filters: |
@@ -131,4 +147,4 @@ jobs:
- name: Run document check
if: steps.changes.outputs.r_package == 'true'
run: |
- python3 tests/ci_build/test_r_package.py --r=/tmp/R-devel/bin/R --task=doc
+ python3 tests/ci_build/test_r_package.py --r=/usr/bin/R --task=doc
diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml
index 78cde0a43cb2..4651e2ac0dff 100644
--- a/.github/workflows/scorecards.yml
+++ b/.github/workflows/scorecards.yml
@@ -22,12 +22,12 @@ jobs:
steps:
- name: "Checkout code"
- uses: actions/checkout@a12a3943b4bdde767164f792f33f40b04645d846 # tag=v3.0.0
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
persist-credentials: false
- name: "Run analysis"
- uses: ossf/scorecard-action@08b4669551908b1024bb425080c797723083c031 # tag=v2.2.0
+ uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3
with:
results_file: results.sarif
results_format: sarif
@@ -41,7 +41,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
- uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # tag=v3.1.2
+ uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1
with:
name: SARIF file
path: results.sarif
@@ -49,6 +49,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
- uses: github/codeql-action/upload-sarif@7b6664fa89524ee6e3c3e9749402d5afd69b3cd8 # tag=v2.14.1
+ uses: github/codeql-action/upload-sarif@83a02f7883b12e0e4e1a146174f5e2292a01e601 # v2.16.4
with:
sarif_file: results.sarif
diff --git a/.github/workflows/update_rapids.yml b/.github/workflows/update_rapids.yml
index 395a42148c23..9f9c85f62e28 100644
--- a/.github/workflows/update_rapids.yml
+++ b/.github/workflows/update_rapids.yml
@@ -3,7 +3,7 @@ name: update-rapids
on:
workflow_dispatch:
schedule:
- - cron: "0 20 * * *" # Run once daily
+ - cron: "0 20 * * 1" # Run once weekly
permissions:
pull-requests: write
@@ -25,14 +25,14 @@ jobs:
name: Check latest RAPIDS
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
submodules: 'true'
- name: Check latest RAPIDS and update conftest.sh
run: |
bash tests/buildkite/update-rapids.sh
- name: Create Pull Request
- uses: peter-evans/create-pull-request@v5
+ uses: peter-evans/create-pull-request@v6
if: github.ref == 'refs/heads/master'
with:
add-paths: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index dbfa1cdc225b..c69b0d2a3dc7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,7 +69,6 @@ option(USE_DMLC_GTEST "Use google tests bundled with dmlc-core submodule" OFF)
option(USE_DEVICE_DEBUG "Generate CUDA device debug info." OFF)
option(USE_NVTX "Build with cuda profiling annotations. Developers only." OFF)
set(NVTX_HEADER_DIR "" CACHE PATH "Path to the stand-alone nvtx header")
-option(RABIT_MOCK "Build rabit with mock" OFF)
option(HIDE_CXX_SYMBOLS "Build shared library and hide all C++ symbols" OFF)
option(KEEP_BUILD_ARTIFACTS_IN_BINARY_DIR "Output build artifacts in CMake binary dir" OFF)
## CUDA
@@ -282,9 +281,6 @@ if(MSVC)
endif()
endif()
-# rabit
-add_subdirectory(rabit)
-
# core xgboost
add_subdirectory(${xgboost_SOURCE_DIR}/src)
target_link_libraries(objxgboost PUBLIC dmlc)
diff --git a/NEWS.md b/NEWS.md
index 43019d877cd0..b067c8e3ca88 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2101,7 +2101,7 @@ This release marks a major milestone for the XGBoost project.
## v0.90 (2019.05.18)
### XGBoost Python package drops Python 2.x (#4379, #4381)
-Python 2.x is reaching its end-of-life at the end of this year. [Many scientific Python packages are now moving to drop Python 2.x](https://python3statement.org/).
+Python 2.x is reaching its end-of-life at the end of this year. [Many scientific Python packages are now moving to drop Python 2.x](https://python3statement.github.io/).
### XGBoost4J-Spark now requires Spark 2.4.x (#4377)
* Spark 2.3 is reaching its end-of-life soon. See discussion at #4389.
diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt
index d3a69abc278e..37c5dbf4c1ed 100644
--- a/R-package/CMakeLists.txt
+++ b/R-package/CMakeLists.txt
@@ -26,7 +26,6 @@ endif()
target_compile_definitions(
xgboost-r PUBLIC
-DXGBOOST_STRICT_R_MODE=1
- -DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1
-DDMLC_LOG_BEFORE_THROW=0
-DDMLC_DISABLE_STDIN=1
-DDMLC_LOG_CUSTOMIZE=1
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 580d1f87325f..c9e085e77e0a 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -20,15 +20,9 @@ export("xgb.attr<-")
export("xgb.attributes<-")
export("xgb.config<-")
export("xgb.parameters<-")
-export(cb.cv.predict)
-export(cb.early.stop)
-export(cb.evaluation.log)
-export(cb.gblinear.history)
-export(cb.print.evaluation)
-export(cb.reset.parameters)
-export(cb.save.model)
export(getinfo)
export(setinfo)
+export(xgb.Callback)
export(xgb.DMatrix)
export(xgb.DMatrix.hasinfo)
export(xgb.DMatrix.save)
@@ -39,6 +33,13 @@ export(xgb.QuantileDMatrix)
export(xgb.QuantileDMatrix.from_iterator)
export(xgb.attr)
export(xgb.attributes)
+export(xgb.cb.cv.predict)
+export(xgb.cb.early.stop)
+export(xgb.cb.evaluation.log)
+export(xgb.cb.gblinear.history)
+export(xgb.cb.print.evaluation)
+export(xgb.cb.reset.parameters)
+export(xgb.cb.save.model)
export(xgb.config)
export(xgb.copy.Booster)
export(xgb.create.features)
@@ -72,14 +73,10 @@ export(xgb.slice.DMatrix)
export(xgb.train)
export(xgboost)
import(methods)
+importClassesFrom(Matrix,CsparseMatrix)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(Matrix,dgRMatrix)
-importClassesFrom(Matrix,dgeMatrix)
-importFrom(Matrix,colSums)
importFrom(Matrix,sparse.model.matrix)
-importFrom(Matrix,sparseMatrix)
-importFrom(Matrix,sparseVector)
-importFrom(Matrix,t)
importFrom(data.table,":=")
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
@@ -101,6 +98,7 @@ importFrom(methods,new)
importFrom(stats,coef)
importFrom(stats,median)
importFrom(stats,predict)
+importFrom(stats,sd)
importFrom(stats,variable.names)
importFrom(utils,head)
importFrom(utils,object.size)
diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R
index 02e0a7cd4b8e..39734ab092d3 100644
--- a/R-package/R/callbacks.R
+++ b/R-package/R/callbacks.R
@@ -1,478 +1,833 @@
-#' Callback closures for booster training.
+.reserved_cb_names <- c("names", "class", "call", "params", "niter", "nfeatures", "folds")
+
+#' @title XGBoost Callback Constructor
+#' @description Constructor for defining the structure of callback functions that can be executed
+#' at different stages of model training (before / after training, before / after each boosting
+#' iteration).
+#' @param cb_name Name for the callback.
#'
-#' These are used to perform various service tasks either during boosting iterations or at the end.
-#' This approach helps to modularize many of such tasks without bloating the main training methods,
-#' and it offers .
+#' If the callback produces some non-NULL result (from executing the function passed under
+#' `f_after_training`), that result will be added as an R attribute to the resulting booster
+#' (or as a named element in the result of CV), with the attribute name specified here.
#'
-#' @details
-#' By default, a callback function is run after each boosting iteration.
-#' An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function.
+#' Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.
+#' @param env An environment object that will be passed to the different functions in the callback.
+#' Note that this environment will not be shared with other callbacks.
+#' @param f_before_training A function that will be executed before the training has started.
#'
-#' When a callback function has \code{finalize} parameter, its finalizer part will also be run after
-#' the boosting is completed.
+#' If passing `NULL` for this or for the other function inputs, then no function will be executed.
#'
-#' WARNING: side-effects!!! Be aware that these callback functions access and modify things in
-#' the environment from which they are called from, which is a fairly uncommon thing to do in R.
+#' If passing a function, it will be called with parameters supplied as non-named arguments
+#' matching the function signatures that are shown in the default value for each function argument.
+#' @param f_before_iter A function that will be executed before each boosting round.
#'
-#' To write a custom callback closure, make sure you first understand the main concepts about R environments.
-#' Check either R documentation on \code{\link[base]{environment}} or the
-#' \href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R"
-#' book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
-#' choose ones that do something similar to what you want to achieve. Also, you would need to get familiar
-#' with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
+#' This function can signal whether the training should be finalized or not, by outputting
+#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at
+#' a given round is `TRUE`, then training will be stopped before the current iteration happens.
#'
-#' @seealso
-#' \code{\link{cb.print.evaluation}},
-#' \code{\link{cb.evaluation.log}},
-#' \code{\link{cb.reset.parameters}},
-#' \code{\link{cb.early.stop}},
-#' \code{\link{cb.save.model}},
-#' \code{\link{cb.cv.predict}},
-#' \code{\link{xgb.train}},
-#' \code{\link{xgb.cv}}
+#' Return values of `NULL` will be interpreted as `FALSE`.
+#' @param f_after_iter A function that will be executed after each boosting round.
#'
-#' @name callbacks
-NULL
-
-#
-# Callbacks -------------------------------------------------------------------
-#
-
-#' Callback closure for printing the result of evaluation
+#' This function can signal whether the training should be finalized or not, by outputting
+#' a value that evaluates to `TRUE` - i.e. if the output from the function provided here at
+#' a given round is `TRUE`, then training will be stopped at that round.
#'
-#' @param period results would be printed every number of periods
-#' @param showsd whether standard deviations should be printed (when available)
+#' Return values of `NULL` will be interpreted as `FALSE`.
+#' @param f_after_training A function that will be executed after training is finished.
#'
-#' @details
-#' The callback function prints the result of evaluation at every \code{period} iterations.
-#' The initial and the last iteration's evaluations are always printed.
+#' This function can optionally output something non-NULL, which will become part of the R
+#' attributes of the booster (assuming one passes `keep_extra_attributes=TRUE` to \link{xgb.train})
+#' under the name supplied for parameter `cb_name` imn the case of \link{xgb.train}; or a part
+#' of the named elements in the result of \link{xgb.cv}.
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+#' @details Arguments that will be passed to the supplied functions are as follows:\itemize{
#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{bst_evaluation} (also \code{bst_evaluation_err} when available),
-#' \code{iteration},
-#' \code{begin_iteration},
-#' \code{end_iteration}.
+#' \item env The same environment that is passed under argument `env`.
#'
-#' @seealso
-#' \code{\link{callbacks}}
+#' It may be modified by the functions in order to e.g. keep tracking of what happens
+#' across iterations or similar.
#'
-#' @export
-cb.print.evaluation <- function(period = 1, showsd = TRUE) {
-
- callback <- function(env = parent.frame()) {
- if (length(env$bst_evaluation) == 0 ||
- period == 0 ||
- NVL(env$rank, 0) != 0)
- return()
-
- i <- env$iteration
- if ((i - 1) %% period == 0 ||
- i == env$begin_iteration ||
- i == env$end_iteration) {
- stdev <- if (showsd) env$bst_evaluation_err else NULL
- msg <- .format_eval_string(i, env$bst_evaluation, stdev)
- cat(msg, '\n')
- }
- }
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.print.evaluation'
- callback
-}
-
-
-#' Callback closure for logging the evaluation history
+#' This environment is only used by the functions supplied to the callback, and will
+#' not be kept after the model fitting function terminates (see parameter `f_after_training`).
#'
-#' @details
-#' This callback function appends the current iteration evaluation results \code{bst_evaluation}
-#' available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
+#' \item model The booster object when using \link{xgb.train}, or the folds when using
+#' \link{xgb.cv}.
+#'
+#' For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
+#' \item `dtrain`: The training data for the fold (as an `xgb.DMatrix` object).
+#' \item `bst`: Rhe `xgb.Booster` object for the fold.
+#' \item `evals`: A list containing two DMatrices, with names `train` and `test`
+#' (`test` is the held-out data for the fold).
+#' \item `index`: The indices of the hold-out data for that fold (base-1 indexing),
+#' from which the `test` entry in `evals` was obtained.
+#' }
#'
-#' The finalizer callback (called with \code{finalize = TURE} in the end) converts
-#' the \code{evaluation_log} list into a final data.table.
+#' This object should \bold{not} be in-place modified in ways that conflict with the
+#' training (e.g. resetting the parameters for a training update in a way that resets
+#' the number of rounds to zero in order to overwrite rounds).
#'
-#' The iteration evaluation result \code{bst_evaluation} must be a named numeric vector.
+#' Note that any R attributes that are assigned to the booster during the callback functions,
+#' will not be kept thereafter as the booster object variable is not re-assigned during
+#' training. It is however possible to set C-level attributes of the booster through
+#' \link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
+#' of the iterations and after the training is done.
#'
-#' Note: in the column names of the final data.table, the dash '-' character is replaced with
-#' the underscore '_' in order to make the column names more like regular R identifiers.
+#' For keeping variables across iterations, it's recommended to use `env` instead.
+#' \item data The data to which the model is being fit, as an `xgb.DMatrix` object.
+#'
+#' Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
+#' folds can be found in the `model` object.
+#'
+#' \item evals The evaluation data, as passed under argument `evals` to
+#' \link{xgb.train}.
+#'
+#' For \link{xgb.cv}, this will always be `NULL`.
+#'
+#' \item begin_iteration Index of the first boosting iteration that will be executed
+#' (base-1 indexing).
+#'
+#' This will typically be '1', but when using training continuation, depending on the
+#' parameters for updates, boosting rounds will be continued from where the previous
+#' model ended, in which case this will be larger than 1.
+#'
+#' \item end_iteration Index of the last boostign iteration that will be executed
+#' (base-1 indexing, inclusive of this end).
+#'
+#' It should match with argument `nrounds` passed to \link{xgb.train} or \link{xgb.cv}.
+#'
+#' Note that boosting might be interrupted before reaching this last iteration, for
+#' example by using the early stopping callback \link{xgb.cb.early.stop}.
+#'
+#' \item iteration Index of the iteration number that is being executed (first iteration
+#' will be the same as parameter `begin_iteration`, then next one will add +1, and so on).
+#'
+#' \item iter_feval Evaluation metrics for `evals` that were supplied, either
+#' determined by the objective, or by parameter `feval`.
+#'
+#' For \link{xgb.train}, this will be a named vector with one entry per element in
+#' `evals`, where the names are determined as 'evals name' + '-' + 'metric name' - for
+#' example, if `evals` contains an entry named "tr" and the metric is "rmse",
+#' this will be a one-element vector with name "tr-rmse".
+#'
+#' For \link{xgb.cv}, this will be a 2d matrix with dimensions `[length(evals), nfolds]`,
+#' where the row names will follow the same naming logic as the one-dimensional vector
+#' that is passed in \link{xgb.train}.
+#'
+#' Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
+#' this table by calculating the row-wise means and standard deviations.
+#'
+#' \item final_feval The evaluation results after the last boosting round is executed
+#' (same format as `iter_feval`, and will be the exact same input as passed under
+#' `iter_feval` to the last round that is executed during model fitting).
+#'
+#' \item prev_cb_res Result from a previous run of a callback sharing the same name
+#' (as given by parameter `cb_name`) when conducting training continuation, if there
+#' was any in the booster R attributes.
+#'
+#' Some times, one might want to append the new results to the previous one, and this will
+#' be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
+#' which will append the new rows to the previous table.
+#'
+#' If no such previous callback result is available (which it never will when fitting
+#' a model from start instead of updating an existing model), this will be `NULL`.
+#'
+#' For \link{xgb.cv}, which doesn't support training continuation, this will always be `NULL`.
+#' }
+#'
+#' The following names (`cb_name` values) are reserved for internal callbacks:\itemize{
+#' \item print_evaluation
+#' \item evaluation_log
+#' \item reset_parameters
+#' \item early_stop
+#' \item save_model
+#' \item cv_predict
+#' \item gblinear_history
+#' }
#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{evaluation_log},
-#' \code{bst_evaluation},
-#' \code{iteration}.
+#' The following names are reserved for other non-callback attributes:\itemize{
+#' \item names
+#' \item class
+#' \item call
+#' \item params
+#' \item niter
+#' \item nfeatures
+#' \item folds
+#' }
+#'
+#' When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
+#' will always be executed before the others, as it sets some booster C-level attributes
+#' that other callbacks might also use. Otherwise, the order of execution will match with
+#' the order in which the callbacks are passed to the model fitting function.
+#' @seealso Built-in callbacks:\itemize{
+#' \item \link{xgb.cb.print.evaluation}
+#' \item \link{xgb.cb.evaluation.log}
+#' \item \link{xgb.cb.reset.parameters}
+#' \item \link{xgb.cb.early.stop}
+#' \item \link{xgb.cb.save.model}
+#' \item \link{xgb.cb.cv.predict}
+#' \item \link{xgb.cb.gblinear.history}
+#' }
+#' @examples
+#' # Example constructing a custom callback that calculates
+#' # squared error on the training data (no separate test set),
+#' # and outputs the per-iteration results.
+#' ssq_callback <- xgb.Callback(
+#' cb_name = "ssq",
+#' f_before_training = function(env, model, data, evals,
+#' begin_iteration, end_iteration) {
+#' # A vector to keep track of a number at each iteration
+#' env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
+#' },
+#' f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+#' # This calculates the sum of squared errors on the training data.
+#' # Note that this can be better done by passing an 'evals' entry,
+#' # but this demonstrates a way in which callbacks can be structured.
+#' pred <- predict(model, data)
+#' err <- pred - getinfo(data, "label")
+#' sq_err <- sum(err^2)
+#' env$logs[iteration] <- sq_err
+#' cat(
+#' sprintf(
+#' "Squared error at iteration %d: %.2f\n",
+#' iteration, sq_err
+#' )
+#' )
+#'
+#' # A return value of 'TRUE' here would signal to finalize the training
+#' return(FALSE)
+#' },
+#' f_after_training = function(env, model, data, evals, iteration,
+#' final_feval, prev_cb_res) {
+#' return(env$logs)
+#' }
+#' )
#'
-#' @seealso
-#' \code{\link{callbacks}}
+#' data(mtcars)
+#' y <- mtcars$mpg
+#' x <- as.matrix(mtcars[, -1])
+#' dm <- xgb.DMatrix(x, label = y, nthread = 1)
+#' model <- xgb.train(
+#' data = dm,
+#' params = list(objective = "reg:squarederror", nthread = 1),
+#' nrounds = 5,
+#' callbacks = list(ssq_callback),
+#' keep_extra_attributes = TRUE
+#' )
#'
+#' # Result from 'f_after_iter' will be available as an attribute
+#' attributes(model)$ssq
#' @export
-cb.evaluation.log <- function() {
+xgb.Callback <- function(
+ cb_name = "custom_callback",
+ env = new.env(),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) NULL,
+ f_before_iter = function(env, model, data, evals, iteration) NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) NULL
+) {
+ stopifnot(is.null(f_before_training) || is.function(f_before_training))
+ stopifnot(is.null(f_before_iter) || is.function(f_before_iter))
+ stopifnot(is.null(f_after_iter) || is.function(f_after_iter))
+ stopifnot(is.null(f_after_training) || is.function(f_after_training))
+ stopifnot(is.character(cb_name) && length(cb_name) == 1)
+
+ if (cb_name %in% .reserved_cb_names) {
+ stop("Cannot use reserved callback name '", cb_name, "'.")
+ }
- mnames <- NULL
+ out <- list(
+ cb_name = cb_name,
+ env = env,
+ f_before_training = f_before_training,
+ f_before_iter = f_before_iter,
+ f_after_iter = f_after_iter,
+ f_after_training = f_after_training
+ )
+ class(out) <- "xgb.Callback"
+ return(out)
+}
- init <- function(env) {
- if (!is.list(env$evaluation_log))
- stop("'evaluation_log' has to be a list")
- mnames <<- names(env$bst_evaluation)
- if (is.null(mnames) || any(mnames == ""))
- stop("bst_evaluation must have non-empty names")
+.execute.cb.before.training <- function(
+ callbacks,
+ model,
+ data,
+ evals,
+ begin_iteration,
+ end_iteration
+) {
+ for (callback in callbacks) {
+ if (!is.null(callback$f_before_training)) {
+ callback$f_before_training(
+ callback$env,
+ model,
+ data,
+ evals,
+ begin_iteration,
+ end_iteration
+ )
+ }
+ }
+}
- mnames <<- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
- if (!is.null(env$bst_evaluation_err))
- mnames <<- c(paste0(mnames, '_mean'), paste0(mnames, '_std'))
+.execute.cb.before.iter <- function(
+ callbacks,
+ model,
+ data,
+ evals,
+ iteration
+) {
+ if (!length(callbacks)) {
+ return(FALSE)
}
+ out <- sapply(callbacks, function(cb) {
+ if (is.null(cb$f_before_iter)) {
+ return(FALSE)
+ }
+ should_stop <- cb$f_before_iter(
+ cb$env,
+ model,
+ data,
+ evals,
+ iteration
+ )
+ if (!NROW(should_stop)) {
+ should_stop <- FALSE
+ } else if (NROW(should_stop) > 1) {
+ should_stop <- head(as.logical(should_stop), 1)
+ }
+ return(should_stop)
+ })
+ return(any(out))
+}
- finalizer <- function(env) {
- env$evaluation_log <- as.data.table(t(simplify2array(env$evaluation_log)))
- setnames(env$evaluation_log, c('iter', mnames))
-
- if (!is.null(env$bst_evaluation_err)) {
- # rearrange col order from _mean,_mean,...,_std,_std,...
- # to be _mean,_std,_mean,_std,...
- len <- length(mnames)
- means <- mnames[seq_len(len / 2)]
- stds <- mnames[(len / 2 + 1):len]
- cnames <- numeric(len)
- cnames[c(TRUE, FALSE)] <- means
- cnames[c(FALSE, TRUE)] <- stds
- env$evaluation_log <- env$evaluation_log[, c('iter', cnames), with = FALSE]
+.execute.cb.after.iter <- function(
+ callbacks,
+ model,
+ data,
+ evals,
+ iteration,
+ iter_feval
+) {
+ if (!length(callbacks)) {
+ return(FALSE)
+ }
+ out <- sapply(callbacks, function(cb) {
+ if (is.null(cb$f_after_iter)) {
+ return(FALSE)
}
+ should_stop <- cb$f_after_iter(
+ cb$env,
+ model,
+ data,
+ evals,
+ iteration,
+ iter_feval
+ )
+ if (!NROW(should_stop)) {
+ should_stop <- FALSE
+ } else if (NROW(should_stop) > 1) {
+ should_stop <- head(as.logical(should_stop), 1)
+ }
+ return(should_stop)
+ })
+ return(any(out))
+}
+
+.execute.cb.after.training <- function(
+ callbacks,
+ model,
+ data,
+ evals,
+ iteration,
+ final_feval,
+ prev_cb_res
+) {
+ if (!length(callbacks)) {
+ return(NULL)
+ }
+ old_cb_res <- attributes(model)
+ out <- lapply(callbacks, function(cb) {
+ if (is.null(cb$f_after_training)) {
+ return(NULL)
+ } else {
+ return(
+ cb$f_after_training(
+ cb$env,
+ model,
+ data,
+ evals,
+ iteration,
+ final_feval,
+ getElement(old_cb_res, cb$cb_name)
+ )
+ )
+ }
+ })
+ names(out) <- sapply(callbacks, function(cb) cb$cb_name)
+ if (NROW(out)) {
+ out <- out[!sapply(out, is.null)]
}
+ return(out)
+}
- callback <- function(env = parent.frame(), finalize = FALSE) {
- if (is.null(mnames))
- init(env)
+.summarize.feval <- function(iter_feval, showsd) {
+ if (NCOL(iter_feval) > 1L && showsd) {
+ stdev <- apply(iter_feval, 1, sd)
+ } else {
+ stdev <- NULL
+ }
+ if (NCOL(iter_feval) > 1L) {
+ iter_feval <- rowMeans(iter_feval)
+ }
+ return(list(feval = iter_feval, stdev = stdev))
+}
- if (finalize)
- return(finalizer(env))
+.print.evaluation <- function(iter_feval, showsd, iteration) {
+ tmp <- .summarize.feval(iter_feval, showsd)
+ msg <- .format_eval_string(iteration, tmp$feval, tmp$stdev)
+ cat(msg, '\n')
+}
- ev <- env$bst_evaluation
- if (!is.null(env$bst_evaluation_err))
- ev <- c(ev, env$bst_evaluation_err)
- env$evaluation_log <- c(env$evaluation_log,
- list(c(iter = env$iteration, ev)))
+# Format the evaluation metric string
+.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
+ if (length(eval_res) == 0)
+ stop('no evaluation results')
+ enames <- names(eval_res)
+ if (is.null(enames))
+ stop('evaluation results must have names')
+ iter <- sprintf('[%d]\t', iter)
+ if (!is.null(eval_err)) {
+ if (length(eval_res) != length(eval_err))
+ stop('eval_res & eval_err lengths mismatch')
+ # Note: UTF-8 code for plus/minus sign is U+00B1
+ res <- paste0(sprintf("%s:%f\U00B1%f", enames, eval_res, eval_err), collapse = '\t')
+ } else {
+ res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
}
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.evaluation.log'
- callback
+ return(paste0(iter, res))
}
-#' Callback closure for resetting the booster's parameters at each iteration.
+#' @title Callback for printing the result of evaluation
+#' @param period results would be printed every number of periods
+#' @param showsd whether standard deviations should be printed (when available)
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+#' @description
+#' The callback function prints the result of evaluation at every \code{period} iterations.
+#' The initial and the last iteration's evaluations are always printed.
#'
+#' Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
+#' @seealso \link{xgb.Callback}
+#' @export
+xgb.cb.print.evaluation <- function(period = 1, showsd = TRUE) {
+ if (length(period) != 1 || period != floor(period) || period < 1) {
+ stop("'period' must be a positive integer.")
+ }
+
+ xgb.Callback(
+ cb_name = "print_evaluation",
+ env = as.environment(list(period = period, showsd = showsd, is_first_call = TRUE)),
+ f_before_training = NULL,
+ f_before_iter = NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ if (is.null(iter_feval)) {
+ return(FALSE)
+ }
+ if (env$is_first_call || (iteration - 1) %% env$period == 0) {
+ .print.evaluation(iter_feval, env$showsd, iteration)
+ env$last_printed_iter <- iteration
+ }
+ env$is_first_call <- FALSE
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ if (is.null(final_feval)) {
+ return(NULL)
+ }
+ if (is.null(env$last_printed_iter) || iteration > env$last_printed_iter) {
+ .print.evaluation(final_feval, env$showsd, iteration)
+ }
+ }
+ )
+}
+
+#' @title Callback for logging the evaluation history
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+#' @details This callback creates a table with per-iteration evaluation metrics (see parameters
+#' `evals` and `feval` in \link{xgb.train}).
+#' @details
+#' Note: in the column names of the final data.table, the dash '-' character is replaced with
+#' the underscore '_' in order to make the column names more like regular R identifiers.
+#' @seealso \link{xgb.cb.print.evaluation}
+#' @export
+xgb.cb.evaluation.log <- function() {
+ xgb.Callback(
+ cb_name = "evaluation_log",
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ env$evaluation_log <- vector("list", end_iteration - begin_iteration + 1)
+ env$next_log <- 1
+ },
+ f_before_iter = NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ tmp <- .summarize.feval(iter_feval, TRUE)
+ env$evaluation_log[[env$next_log]] <- list(iter = iteration, metrics = tmp$feval, sds = tmp$stdev)
+ env$next_log <- env$next_log + 1
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ if (!NROW(env$evaluation_log)) {
+ return(prev_cb_res)
+ }
+ # in case of early stopping
+ if (env$next_log <= length(env$evaluation_log)) {
+ env$evaluation_log <- head(env$evaluation_log, env$next_log - 1)
+ }
+
+ iters <- data.frame(iter = sapply(env$evaluation_log, function(x) x$iter))
+ metrics <- do.call(rbind, lapply(env$evaluation_log, function(x) x$metrics))
+ mnames <- gsub("-", "_", names(env$evaluation_log[[1]]$metrics), fixed = TRUE)
+ colnames(metrics) <- mnames
+ has_sds <- !is.null(env$evaluation_log[[1]]$sds)
+ if (has_sds) {
+ sds <- do.call(rbind, lapply(env$evaluation_log, function(x) x$sds))
+ colnames(sds) <- mnames
+ metrics <- lapply(
+ mnames,
+ function(metric) {
+ out <- cbind(metrics[, metric], sds[, metric])
+ colnames(out) <- paste0(metric, c("_mean", "_std"))
+ return(out)
+ }
+ )
+ metrics <- do.call(cbind, metrics)
+ }
+ evaluation_log <- cbind(iters, metrics)
+
+ if (!is.null(prev_cb_res)) {
+ if (!is.data.table(prev_cb_res)) {
+ prev_cb_res <- data.table::as.data.table(prev_cb_res)
+ }
+ prev_take <- prev_cb_res[prev_cb_res$iter < min(evaluation_log$iter)]
+ if (nrow(prev_take)) {
+ evaluation_log <- rbind(prev_cb_res, evaluation_log)
+ }
+ }
+ evaluation_log <- data.table::as.data.table(evaluation_log)
+ return(evaluation_log)
+ }
+ )
+}
+
+#' @title Callback for resetting the booster's parameters at each iteration.
#' @param new_params a list where each element corresponds to a parameter that needs to be reset.
#' Each element's value must be either a vector of values of length \code{nrounds}
#' to be set at each iteration,
#' or a function of two parameters \code{learning_rates(iteration, nrounds)}
#' which returns a new parameter value by using the current iteration number
#' and the total number of boosting rounds.
-#'
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
#' @details
-#' This is a "pre-iteration" callback function used to reset booster's parameters
-#' at the beginning of each iteration.
-#'
#' Note that when training is resumed from some previous model, and a function is used to
#' reset a parameter value, the \code{nrounds} argument in this function would be the
#' the number of boosting rounds in the current training.
#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{bst} or \code{bst_folds},
-#' \code{iteration},
-#' \code{begin_iteration},
-#' \code{end_iteration}.
-#'
-#' @seealso
-#' \code{\link{callbacks}}
-#'
+#' Does not leave any attribute in the booster.
#' @export
-cb.reset.parameters <- function(new_params) {
-
- if (typeof(new_params) != "list")
- stop("'new_params' must be a list")
+xgb.cb.reset.parameters <- function(new_params) {
+ stopifnot(is.list(new_params))
pnames <- gsub(".", "_", names(new_params), fixed = TRUE)
- nrounds <- NULL
-
- # run some checks in the beginning
- init <- function(env) {
- nrounds <<- env$end_iteration - env$begin_iteration + 1
-
- if (is.null(env$bst) && is.null(env$bst_folds))
- stop("Parent frame has neither 'bst' nor 'bst_folds'")
-
- # Some parameters are not allowed to be changed,
- # since changing them would simply wreck some chaos
- not_allowed <- pnames %in%
- c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq')
- if (any(not_allowed))
- stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.")
-
- for (n in pnames) {
- p <- new_params[[n]]
- if (is.function(p)) {
- if (length(formals(p)) != 2)
- stop("Parameter '", n, "' is a function but not of two arguments")
- } else if (is.numeric(p) || is.character(p)) {
- if (length(p) != nrounds)
- stop("Length of '", n, "' has to be equal to 'nrounds'")
- } else {
- stop("Parameter '", n, "' is not a function or a vector")
+ not_allowed <- pnames %in%
+ c('num_class', 'num_output_group', 'size_leaf_vector', 'updater_seq')
+ if (any(not_allowed))
+ stop('Parameters ', paste(pnames[not_allowed]), " cannot be changed during boosting.")
+
+ xgb.Callback(
+ cb_name = "reset_parameters",
+ env = as.environment(list(new_params = new_params)),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ env$end_iteration <- end_iteration
+
+ pnames <- gsub(".", "_", names(env$new_params), fixed = TRUE)
+ for (n in pnames) {
+ p <- env$new_params[[n]]
+ if (is.function(p)) {
+ if (length(formals(p)) != 2)
+ stop("Parameter '", n, "' is a function but not of two arguments")
+ } else if (is.numeric(p) || is.character(p)) {
+ if (length(p) != env$end_iteration)
+ stop("Length of '", n, "' has to be equal to 'nrounds'")
+ } else {
+ stop("Parameter '", n, "' is not a function or a vector")
+ }
}
- }
- }
-
- callback <- function(env = parent.frame()) {
- if (is.null(nrounds))
- init(env)
-
- i <- env$iteration
- pars <- lapply(new_params, function(p) {
- if (is.function(p))
- return(p(i, nrounds))
- p[i]
- })
+ },
+ f_before_iter = function(env, model, data, evals, iteration) {
+ pars <- lapply(env$new_params, function(p) {
+ if (is.function(p)) {
+ return(p(iteration, env$end_iteration))
+ } else {
+ return(p[iteration])
+ }
+ })
- if (!is.null(env$bst)) {
- xgb.parameters(env$bst) <- pars
- } else {
- for (fd in env$bst_folds)
- xgb.parameters(fd$bst) <- pars
- }
- }
- attr(callback, 'is_pre_iteration') <- TRUE
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.reset.parameters'
- callback
+ if (inherits(model, "xgb.Booster")) {
+ xgb.parameters(model) <- pars
+ } else {
+ for (fd in model) {
+ xgb.parameters(fd$bst) <- pars
+ }
+ }
+ return(FALSE)
+ },
+ f_after_iter = NULL,
+ f_after_training = NULL
+ )
}
-
-#' Callback closure to activate the early stopping.
-#'
+#' @title Callback to activate early stopping
#' @param stopping_rounds The number of rounds with no improvement in
#' the evaluation metric in order to stop the training.
-#' @param maximize whether to maximize the evaluation metric
-#' @param metric_name the name of an evaluation column to use as a criteria for early
+#' @param maximize Whether to maximize the evaluation metric.
+#' @param metric_name The name of an evaluation column to use as a criteria for early
#' stopping. If not set, the last column would be used.
-#' Let's say the test data in \code{watchlist} was labelled as \code{dtest},
+#' Let's say the test data in \code{evals} was labelled as \code{dtest},
#' and one wants to use the AUC in test data for early stopping regardless of where
-#' it is in the \code{watchlist}, then one of the following would need to be set:
+#' it is in the \code{evals}, then one of the following would need to be set:
#' \code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
#' All dash '-' characters in metric names are considered equivalent to '_'.
-#' @param verbose whether to print the early stopping information.
+#' @param verbose Whether to print the early stopping information.
+#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced
+#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds
+#' up to the detected best iteration, discarding the ones that come after.
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+#' @description
+#' This callback function determines the condition for early stopping.
#'
-#' @details
-#' This callback function determines the condition for early stopping
-#' by setting the \code{stop_condition = TRUE} flag in its calling frame.
-#'
-#' The following additional fields are assigned to the model's R object:
+#' The following attributes are assigned to the booster's object:
#' \itemize{
#' \item \code{best_score} the evaluation score at the best iteration
-#' \item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index)
+#' \item \code{best_iteration} at which boosting iteration the best score has occurred
+#' (0-based index for interoperability of binary models)
#' }
-#' The Same values are also stored as xgb-attributes:
-#' \itemize{
-#' \item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models)
-#' \item \code{best_msg} message string is also stored.
-#' }
-#'
-#' At least one data element is required in the evaluation watchlist for early stopping to work.
#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{stop_condition},
-#' \code{bst_evaluation},
-#' \code{rank},
-#' \code{bst} (or \code{bst_folds} and \code{basket}),
-#' \code{iteration},
-#' \code{begin_iteration},
-#' \code{end_iteration},
-#'
-#' @seealso
-#' \code{\link{callbacks}},
-#' \code{\link{xgb.attr}}
+#' The same values are also stored as R attributes as a result of the callback, plus an additional
+#' attribute `stopped_by_max_rounds` which indicates whether an early stopping by the `stopping_rounds`
+#' condition occurred. Note that the `best_iteration` that is stored under R attributes will follow
+#' base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
+#' through \link{xgb.attr} or \link{xgb.attributes}.
#'
+#' At least one dataset is required in `evals` for early stopping to work.
#' @export
-cb.early.stop <- function(stopping_rounds, maximize = FALSE,
- metric_name = NULL, verbose = TRUE) {
- # state variables
- best_iteration <- -1
- best_score <- Inf
- best_msg <- NULL
- metric_idx <- 1
-
- init <- function(env) {
- if (length(env$bst_evaluation) == 0)
- stop("For early stopping, watchlist must have at least one element")
-
- eval_names <- gsub('-', '_', names(env$bst_evaluation), fixed = TRUE)
- if (!is.null(metric_name)) {
- metric_idx <<- which(gsub('-', '_', metric_name, fixed = TRUE) == eval_names)
- if (length(metric_idx) == 0)
- stop("'metric_name' for early stopping is not one of the following:\n",
- paste(eval_names, collapse = ' '), '\n')
- }
- if (is.null(metric_name) &&
- length(env$bst_evaluation) > 1) {
- metric_idx <<- length(eval_names)
- if (verbose)
- cat('Multiple eval metrics are present. Will use ',
- eval_names[metric_idx], ' for early stopping.\n', sep = '')
- }
-
- metric_name <<- eval_names[metric_idx]
+xgb.cb.early.stop <- function(
+ stopping_rounds,
+ maximize = FALSE,
+ metric_name = NULL,
+ verbose = TRUE,
+ keep_all_iter = TRUE
+) {
+ if (!is.null(metric_name)) {
+ stopifnot(is.character(metric_name))
+ stopifnot(length(metric_name) == 1L)
+ }
- # maximize is usually NULL when not set in xgb.train and built-in metrics
- if (is.null(maximize))
- maximize <<- grepl('(_auc|_map|_ndcg|_pre)', metric_name)
+ xgb.Callback(
+ cb_name = "early_stop",
+ env = as.environment(
+ list(
+ checked_evnames = FALSE,
+ stopping_rounds = stopping_rounds,
+ maximize = maximize,
+ metric_name = metric_name,
+ verbose = verbose,
+ keep_all_iter = keep_all_iter,
+ stopped_by_max_rounds = FALSE
+ )
+ ),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ if (inherits(model, "xgb.Booster") && !length(evals)) {
+ stop("For early stopping, 'evals' must have at least one element")
+ }
+ env$begin_iteration <- begin_iteration
+ return(NULL)
+ },
+ f_before_iter = function(env, model, data, evals, iteration) NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ sds <- NULL
+ if (NCOL(iter_feval) > 1) {
+ tmp <- .summarize.feval(iter_feval, TRUE)
+ iter_feval <- tmp$feval
+ sds <- tmp$stdev
+ }
- if (verbose && NVL(env$rank, 0) == 0)
- cat("Will train until ", metric_name, " hasn't improved in ",
- stopping_rounds, " rounds.\n\n", sep = '')
+ if (!env$checked_evnames) {
- best_iteration <<- 1
- if (maximize) best_score <<- -Inf
+ eval_names <- gsub('-', '_', names(iter_feval), fixed = TRUE)
+ if (!is.null(env$metric_name)) {
+ env$metric_idx <- which(gsub('-', '_', env$metric_name, fixed = TRUE) == eval_names)
+ if (length(env$metric_idx) == 0)
+ stop("'metric_name' for early stopping is not one of the following:\n",
+ paste(eval_names, collapse = ' '), '\n')
+ }
- env$stop_condition <- FALSE
+ if (is.null(env$metric_name)) {
+ if (NROW(iter_feval) == 1) {
+ env$metric_idx <- 1L
+ } else {
+ env$metric_idx <- length(eval_names)
+ if (env$verbose)
+ cat('Multiple eval metrics are present. Will use ',
+ eval_names[env$metric_idx], ' for early stopping.\n', sep = '')
+ }
+ }
- if (!is.null(env$bst)) {
- if (!inherits(env$bst, 'xgb.Booster'))
- stop("'bst' in the parent frame must be an 'xgb.Booster'")
- if (!is.null(best_score <- xgb.attr(env$bst, 'best_score'))) {
- best_score <<- as.numeric(best_score)
- best_iteration <<- as.numeric(xgb.attr(env$bst, 'best_iteration')) + 1
- best_msg <<- as.numeric(xgb.attr(env$bst, 'best_msg'))
- } else {
- xgb.attributes(env$bst) <- list(best_iteration = best_iteration - 1,
- best_score = best_score)
- }
- } else if (is.null(env$bst_folds) || is.null(env$basket)) {
- stop("Parent frame has neither 'bst' nor ('bst_folds' and 'basket')")
- }
- }
+ env$metric_name <- eval_names[env$metric_idx]
- finalizer <- function(env) {
- if (!is.null(env$bst)) {
- attr_best_score <- as.numeric(xgb.attr(env$bst, 'best_score'))
- if (best_score != attr_best_score) {
- # If the difference is too big, throw an error
- if (abs(best_score - attr_best_score) >= 1e-14) {
- stop("Inconsistent 'best_score' values between the closure state: ", best_score,
- " and the xgb.attr: ", attr_best_score)
- }
- # If the difference is due to floating-point truncation, update best_score
- best_score <- attr_best_score
- }
- xgb.attr(env$bst, "best_iteration") <- best_iteration - 1
- xgb.attr(env$bst, "best_score") <- best_score
- } else {
- env$basket$best_iteration <- best_iteration
- }
- }
+ # maximize is usually NULL when not set in xgb.train and built-in metrics
+ if (is.null(env$maximize))
+ env$maximize <- grepl('(_auc|_aupr|_map|_ndcg|_pre)', env$metric_name)
- callback <- function(env = parent.frame(), finalize = FALSE) {
- if (best_iteration < 0)
- init(env)
+ if (env$verbose)
+ cat("Will train until ", env$metric_name, " hasn't improved in ",
+ env$stopping_rounds, " rounds.\n\n", sep = '')
- if (finalize)
- return(finalizer(env))
+ env$best_iteration <- env$begin_iteration
+ if (env$maximize) {
+ env$best_score <- -Inf
+ } else {
+ env$best_score <- Inf
+ }
- i <- env$iteration
- score <- env$bst_evaluation[metric_idx]
+ if (inherits(model, "xgb.Booster")) {
+ best_score <- xgb.attr(model, 'best_score')
+ if (NROW(best_score)) env$best_score <- as.numeric(best_score)
+ best_iteration <- xgb.attr(model, 'best_iteration')
+ if (NROW(best_iteration)) env$best_iteration <- as.numeric(best_iteration) + 1
+ }
- if ((maximize && score > best_score) ||
- (!maximize && score < best_score)) {
+ env$checked_evnames <- TRUE
+ }
- best_msg <<- .format_eval_string(
- i, env$bst_evaluation, env$bst_evaluation_err
- )
- best_score <<- score
- best_iteration <<- i
- # save the property to attributes, so they will occur in checkpoint
- if (!is.null(env$bst)) {
- xgb.attributes(env$bst) <- list(
- best_iteration = best_iteration - 1, # convert to 0-based index
- best_score = best_score,
- best_msg = best_msg
- )
+ score <- iter_feval[env$metric_idx]
+ if ((env$maximize && score > env$best_score) ||
+ (!env$maximize && score < env$best_score)) {
+
+ env$best_score <- score
+ env$best_iteration <- iteration
+ # save the property to attributes, so they will occur in checkpoint
+ if (inherits(model, "xgb.Booster")) {
+ xgb.attributes(model) <- list(
+ best_iteration = env$best_iteration - 1, # convert to 0-based index
+ best_score = env$best_score
+ )
+ }
+ } else if (iteration - env$best_iteration >= env$stopping_rounds) {
+ if (env$verbose) {
+ best_msg <- .format_eval_string(iteration, iter_feval, sds)
+ cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '')
+ }
+ env$stopped_by_max_rounds <- TRUE
+ return(TRUE)
}
- } else if (i - best_iteration >= stopping_rounds) {
- env$stop_condition <- TRUE
- env$end_iteration <- i
- if (verbose && NVL(env$rank, 0) == 0)
- cat("Stopping. Best iteration:\n", best_msg, "\n\n", sep = '')
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) {
+ # Note: it loses the attributes after being sliced,
+ # so they have to be re-assigned afterwards.
+ prev_attr <- xgb.attributes(model)
+ if (NROW(prev_attr)) {
+ suppressWarnings({
+ prev_attr <- within(prev_attr, rm("best_score", "best_iteration"))
+ })
+ }
+ .Call(XGBoosterSliceAndReplace_R, xgb.get.handle(model), 0L, env$best_iteration, 1L)
+ if (NROW(prev_attr)) {
+ xgb.attributes(model) <- prev_attr
+ }
+ }
+ attrs_set <- list(best_iteration = env$best_iteration - 1, best_score = env$best_score)
+ if (inherits(model, "xgb.Booster")) {
+ xgb.attributes(model) <- attrs_set
+ } else {
+ for (fd in model) {
+ xgb.attributes(fd$bst) <- attrs_set # to use in the cv.predict callback
+ }
+ }
+ return(
+ list(
+ best_iteration = env$best_iteration,
+ best_score = env$best_score,
+ stopped_by_max_rounds = env$stopped_by_max_rounds
+ )
+ )
}
- }
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.early.stop'
- callback
+ )
}
+.save.model.w.formatted.name <- function(model, save_name, iteration) {
+ # Note: this throws a warning if the name doesn't have anything to format through 'sprintf'
+ suppressWarnings({
+ save_name <- sprintf(save_name, iteration)
+ })
+ xgb.save(model, save_name)
+}
-#' Callback closure for saving a model file.
-#'
-#' @param save_period save the model to disk after every
+#' @title Callback for saving a model file.
+#' @param save_period Save the model to disk after every
#' \code{save_period} iterations; 0 means save the model at the end.
-#' @param save_name the name or path for the saved model file.
-#'
-#' Note that the format of the model being saved is determined by the file
-#' extension specified here (see \link{xgb.save} for details about how it works).
-#'
+#' @param save_name The name or path for the saved model file.
#' It can contain a \code{\link[base]{sprintf}} formatting specifier
#' to include the integer iteration number in the file name.
-#' E.g., with \code{save_name} = 'xgboost_%04d.ubj',
-#' the file saved at iteration 50 would be named "xgboost_0050.ubj".
-#' @seealso \link{xgb.save}
-#' @details
-#' This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
-#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{bst},
-#' \code{iteration},
-#' \code{begin_iteration},
-#' \code{end_iteration}.
-#'
-#' @seealso
-#' \code{\link{callbacks}}
+#' E.g., with \code{save_name} = 'xgboost_%04d.model',
+#' the file saved at iteration 50 would be named "xgboost_0050.model".
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train},
+#' but \bold{not} to \link{xgb.cv}.
+#' @description
+#' This callback function allows to save an xgb-model file, either periodically
+#' after each \code{save_period}'s or at the end.
#'
+#' Does not leave any attribute in the booster.
#' @export
-cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
-
- if (save_period < 0)
+xgb.cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
+ if (save_period < 0) {
stop("'save_period' cannot be negative")
+ }
+ if (!is.character(save_name) || length(save_name) != 1L) {
+ stop("'save_name' must be a single character refering to file name.")
+ }
- callback <- function(env = parent.frame()) {
- if (is.null(env$bst))
- stop("'save_model' callback requires the 'bst' booster object in its calling frame")
-
- if ((save_period > 0 && (env$iteration - env$begin_iteration) %% save_period == 0) ||
- (save_period == 0 && env$iteration == env$end_iteration)) {
- # Note: this throws a warning if the name doesn't have anything to format through 'sprintf'
- suppressWarnings({
- save_name <- sprintf(save_name, env$iteration)
- })
- xgb.save(env$bst, save_name)
+ xgb.Callback(
+ cb_name = "save_model",
+ env = as.environment(list(save_period = save_period, save_name = save_name, last_save = 0)),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ env$begin_iteration <- begin_iteration
+ },
+ f_before_iter = NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ if (env$save_period > 0 && (iteration - env$begin_iteration) %% env$save_period == 0) {
+ .save.model.w.formatted.name(model, env$save_name, iteration)
+ env$last_save <- iteration
+ }
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ if (env$save_period == 0 && iteration > env$last_save) {
+ .save.model.w.formatted.name(model, env$save_name, iteration)
+ }
}
- }
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.save.model'
- callback
+ )
}
-
-#' Callback closure for returning cross-validation based predictions.
-#'
-#' @param save_models a flag for whether to save the folds' models.
-#'
-#' @details
+#' @title Callback for returning cross-validation based predictions.
+#' @param save_models A flag for whether to save the folds' models.
+#' @param outputmargin Whether to save margin predictions (same effect as passing this
+#' parameter to \link{predict.xgb.Booster}).
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.cv},
+#' but \bold{not} to \link{xgb.train}.
+#' @description
#' This callback function saves predictions for all of the test folds,
#' and also allows to save the folds' models.
-#'
-#' It is a "finalizer" callback and it uses early stopping information whenever it is available,
-#' thus it must be run after the early stopping callback if the early stopping is used.
-#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{bst_folds},
-#' \code{basket},
-#' \code{data},
-#' \code{end_iteration},
-#' \code{params},
-#'
-#' @return
-#' Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
+#' @details
+#' Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
#' depending on the number of prediction outputs per data row. The order of predictions corresponds
#' to the order of rows in the original dataset. Note that when a custom \code{folds} list is
#' provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
@@ -480,84 +835,107 @@ cb.save.model <- function(save_period = 0, save_name = "xgboost.ubj") {
#' meaningful when user-provided folds have overlapping indices as in, e.g., random sampling splits.
#' When some of the indices in the training dataset are not included into user-provided \code{folds},
#' their prediction value would be \code{NA}.
-#'
-#' @seealso
-#' \code{\link{callbacks}}
-#'
#' @export
-cb.cv.predict <- function(save_models = FALSE) {
-
- finalizer <- function(env) {
- if (is.null(env$basket) || is.null(env$bst_folds))
- stop("'cb.cv.predict' callback requires 'basket' and 'bst_folds' lists in its calling frame")
-
- N <- nrow(env$data)
- pred <- NULL
-
- iterationrange <- c(1, NVL(env$basket$best_iteration, env$end_iteration))
- if (NVL(env$params[['booster']], '') == 'gblinear') {
- iterationrange <- "all"
- }
- for (fd in env$bst_folds) {
- pr <- predict(fd$bst, fd$watchlist[[2]], iterationrange = iterationrange, reshape = TRUE)
- if (is.null(pred)) {
- if (NCOL(pr) > 1L) {
- pred <- matrix(NA_real_, N, ncol(pr))
+xgb.cb.cv.predict <- function(save_models = FALSE, outputmargin = FALSE) {
+ xgb.Callback(
+ cb_name = "cv_predict",
+ env = as.environment(list(save_models = save_models, outputmargin = outputmargin)),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ if (inherits(model, "xgb.Booster")) {
+ stop("'cv.predict' callback is only for 'xgb.cv'.")
+ }
+ },
+ f_before_iter = NULL,
+ f_after_iter = NULL,
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ pred <- NULL
+ for (fd in model) {
+ pr <- predict(
+ fd$bst,
+ fd$evals[[2L]],
+ outputmargin = env$outputmargin,
+ reshape = TRUE
+ )
+ if (is.null(pred)) {
+ if (NCOL(pr) > 1L) {
+ pred <- matrix(NA_real_, nrow(data), ncol(pr))
+ } else {
+ pred <- matrix(NA_real_, nrow(data))
+ }
+ }
+ if (is.matrix(pred)) {
+ pred[fd$index, ] <- pr
} else {
- pred <- matrix(NA_real_, N)
+ pred[fd$index] <- pr
}
}
- if (is.matrix(pred)) {
- pred[fd$index, ] <- pr
- } else {
- pred[fd$index] <- pr
+ out <- list(pred = pred)
+ if (env$save_models) {
+ out$models <- lapply(model, function(fd) fd$bst)
}
+ return(out)
}
- env$basket$pred <- pred
- if (save_models) {
- env$basket$models <- lapply(env$bst_folds, function(fd) {
- return(fd$bst)
- })
- }
- }
+ )
+}
- callback <- function(env = parent.frame(), finalize = FALSE) {
- if (finalize)
- return(finalizer(env))
+.list2mat <- function(coef_list, sparse) {
+ if (sparse) {
+ coef_mat <- methods::new("dgRMatrix")
+ coef_mat@p <- as.integer(c(0, cumsum(sapply(coef_list, function(x) length(x@x)))))
+ coef_mat@j <- as.integer(unlist(lapply(coef_list, slot, "i")) - 1L)
+ coef_mat@x <- unlist(lapply(coef_list, slot, "x"))
+ coef_mat@Dim <- as.integer(c(length(coef_list), length(coef_list[[1L]])))
+ # Note: function 'xgb.gblinear.history' might later on try to slice by columns
+ coef_mat <- methods::as(coef_mat, "CsparseMatrix")
+ return(coef_mat)
+ } else {
+ return(unname(do.call(rbind, coef_list)))
}
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.cv.predict'
- callback
}
+.extract.coef <- function(model, sparse) {
+ coefs <- .internal.coef.xgb.Booster(model, add_names = FALSE)
+ if (NCOL(coefs) > 1L) {
+ coefs <- as.vector(coefs)
+ }
+ if (sparse) {
+ coefs <- methods::as(coefs, "sparseVector")
+ }
+ return(coefs)
+}
-#' Callback closure for collecting the model coefficients history of a gblinear booster
-#' during its training.
-#'
-#' @param sparse when set to FALSE/TRUE, a dense/sparse matrix is used to store the result.
+#' @title Callback for collecting coefficients history of a gblinear booster
+#' @param sparse when set to `FALSE`/`TRUE`, a dense/sparse matrix is used to store the result.
#' Sparse format is useful when one expects only a subset of coefficients to be non-zero,
#' when using the "thrifty" feature selector with fairly small number of top features
#' selected per iteration.
-#'
+#' @return An `xgb.Callback` object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
#' @details
#' To keep things fast and simple, gblinear booster does not internally store the history of linear
#' model coefficients at each boosting iteration. This callback provides a workaround for storing
#' the coefficients' path, by extracting them after each training iteration.
#'
-#' Callback function expects the following values to be set in its calling frame:
-#' \code{bst} (or \code{bst_folds}).
+#' This callback will construct a matrix where rows are boosting iterations and columns are
+#' feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
+#' corresponding to the first column).
#'
-#' @return
-#' Results are stored in the \code{coefs} element of the closure.
-#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy
-#' way to access it.
-#' With \code{xgb.train}, it is either a dense of a sparse matrix.
-#' While with \code{xgb.cv}, it is a list (an element per each fold) of such
-#' matrices.
+#' When there is more than one coefficient per feature (e.g. multi-class classification),
+#' the result will be reshaped into a vector where coefficients are arranged first by features and
+#' then by class (e.g. first 1 through N coefficients will be for the first class, then
+#' coefficients N+1 through 2N for the second class, and so on).
+#'
+#' If the result has only one coefficient per feature in the data, then the resulting matrix
+#' will have column names matching with the feature names, otherwise (when there's more than
+#' one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
+#' (so e.g. column 'c1' for class '0' will be named 'c1:0').
#'
-#' @seealso
-#' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
+#' With \code{xgb.train}, the output is either a dense or a sparse matrix.
+#' With with \code{xgb.cv}, it is a list (one element per each fold) of such
+#' matrices.
#'
+#' Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
+#' outputs from this callback.
+#' @seealso \link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
#' @examples
#' #### Binary classification:
#'
@@ -577,7 +955,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#' # rate does not break the convergence, but allows us to illustrate the typical pattern of
#' # "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
-#' callbacks = list(cb.gblinear.history()))
+#' callbacks = list(xgb.cb.gblinear.history()))
#' # Extract the coefficients' path and plot them vs boosting iteration number:
#' coef_path <- xgb.gblinear.history(bst)
#' matplot(coef_path, type = 'l')
@@ -586,7 +964,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#' # Will try the classical componentwise boosting which selects a single best feature per round:
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
#' updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
-#' callbacks = list(cb.gblinear.history()))
+#' callbacks = list(xgb.cb.gblinear.history()))
#' matplot(xgb.gblinear.history(bst), type = 'l')
#' # Componentwise boosting is known to have similar effect to Lasso regularization.
#' # Try experimenting with various values of top_k, eta, nrounds,
@@ -594,7 +972,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' # For xgb.cv:
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
-#' callbacks = list(cb.gblinear.history()))
+#' callbacks = list(xgb.cb.gblinear.history()))
#' # coefficients in the CV fold #3
#' matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
#'
@@ -607,7 +985,7 @@ cb.cv.predict <- function(save_models = FALSE) {
#' # For the default linear updater 'shotgun' it sometimes is helpful
#' # to use smaller eta to reduce instability
#' bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
-#' callbacks = list(cb.gblinear.history()))
+#' callbacks = list(xgb.cb.gblinear.history()))
#' # Will plot the coefficient paths separately for each class:
#' matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
#' matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
@@ -615,104 +993,141 @@ cb.cv.predict <- function(save_models = FALSE) {
#'
#' # CV:
#' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
-#' callbacks = list(cb.gblinear.history(FALSE)))
+#' callbacks = list(xgb.cb.gblinear.history(FALSE)))
#' # 1st fold of 1st class
#' matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
#'
#' @export
-cb.gblinear.history <- function(sparse = FALSE) {
- coefs <- NULL
-
- init <- function(env) {
- # xgb.train(): bst will be present
- # xgb.cv(): bst_folds will be present
- if (is.null(env$bst) && is.null(env$bst_folds)) {
- stop("Parent frame has neither 'bst' nor 'bst_folds'")
- }
- }
-
- # convert from list to (sparse) matrix
- list2mat <- function(coef_list) {
- if (sparse) {
- coef_mat <- sparseMatrix(x = unlist(lapply(coef_list, slot, "x")),
- i = unlist(lapply(coef_list, slot, "i")),
- p = c(0, cumsum(sapply(coef_list, function(x) length(x@x)))),
- dims = c(length(coef_list[[1]]), length(coef_list)))
- return(t(coef_mat))
- } else {
- return(do.call(rbind, coef_list))
- }
- }
-
- finalizer <- function(env) {
- if (length(coefs) == 0)
- return()
- if (!is.null(env$bst)) { # # xgb.train:
- coefs <<- list2mat(coefs)
- } else { # xgb.cv:
- # second lapply transposes the list
- coefs <<- lapply(
- X = lapply(
- X = seq_along(coefs[[1]]),
- FUN = function(i) lapply(coefs, "[[", i)
- ),
- FUN = list2mat
- )
- }
- }
+xgb.cb.gblinear.history <- function(sparse = FALSE) {
+ xgb.Callback(
+ cb_name = "gblinear_history",
+ env = as.environment(list(sparse = sparse)),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration) {
+ if (!inherits(model, "xgb.Booster")) {
+ model <- model[[1L]]$bst
+ }
+ if (xgb.booster_type(model) != "gblinear") {
+ stop("Callback 'xgb.cb.gblinear.history' is only for booster='gblinear'.")
+ }
+ env$coef_hist <- vector("list", end_iteration - begin_iteration + 1)
+ env$next_idx <- 1
+ },
+ f_before_iter = NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ if (inherits(model, "xgb.Booster")) {
+ coef_this <- .extract.coef(model, env$sparse)
+ } else {
+ coef_this <- lapply(model, function(fd) .extract.coef(fd$bst, env$sparse))
+ }
+ env$coef_hist[[env$next_idx]] <- coef_this
+ env$next_idx <- env$next_idx + 1
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) {
+ # in case of early stopping
+ if (env$next_idx <= length(env$coef_hist)) {
+ env$coef_hist <- head(env$coef_hist, env$next_idx - 1)
+ }
- extract.coef <- function(env) {
- if (!is.null(env$bst)) { # # xgb.train:
- cf <- as.numeric(grep('(booster|bias|weigh)', xgb.dump(env$bst), invert = TRUE, value = TRUE))
- if (sparse) cf <- as(cf, "sparseVector")
- } else { # xgb.cv:
- cf <- vector("list", length(env$bst_folds))
- for (i in seq_along(env$bst_folds)) {
- dmp <- xgb.dump(env$bst_folds[[i]]$bst)
- cf[[i]] <- as.numeric(grep('(booster|bias|weigh)', dmp, invert = TRUE, value = TRUE))
- if (sparse) cf[[i]] <- as(cf[[i]], "sparseVector")
+ is_booster <- inherits(model, "xgb.Booster")
+ if (is_booster) {
+ out <- .list2mat(env$coef_hist, env$sparse)
+ } else {
+ out <- lapply(
+ X = lapply(
+ X = seq_along(env$coef_hist[[1]]),
+ FUN = function(i) lapply(env$coef_hist, "[[", i)
+ ),
+ FUN = .list2mat,
+ env$sparse
+ )
}
+ if (!is.null(prev_cb_res)) {
+ if (is_booster) {
+ out <- rbind(prev_cb_res, out)
+ } else {
+ # Note: this case should never be encountered, since training cannot
+ # be continued from the result of xgb.cv, but this code should in
+ # theory do the job if the situation were to be encountered.
+ out <- lapply(
+ out,
+ function(lst) {
+ lapply(
+ seq_along(lst),
+ function(i) rbind(prev_cb_res[[i]], lst[[i]])
+ )
+ }
+ )
+ }
+ }
+ feature_names <- getinfo(data, "feature_name")
+ if (!NROW(feature_names)) {
+ feature_names <- paste0("V", seq(1L, ncol(data)))
+ }
+ expected_ncols <- length(feature_names) + 1
+ if (is_booster) {
+ mat_ncols <- ncol(out)
+ } else {
+ mat_ncols <- ncol(out[[1L]])
+ }
+ if (mat_ncols %% expected_ncols == 0) {
+ feature_names <- c("(Intercept)", feature_names)
+ n_rep <- mat_ncols / expected_ncols
+ if (n_rep > 1) {
+ feature_names <- unlist(
+ lapply(
+ seq(1, n_rep),
+ function(cl) paste(feature_names, cl - 1, sep = ":")
+ )
+ )
+ }
+ if (is_booster) {
+ colnames(out) <- feature_names
+ } else {
+ out <- lapply(
+ out,
+ function(mat) {
+ colnames(mat) <- feature_names
+ return(mat)
+ }
+ )
+ }
+ }
+ return(out)
}
- cf
- }
-
- callback <- function(env = parent.frame(), finalize = FALSE) {
- if (is.null(coefs)) init(env)
- if (finalize) return(finalizer(env))
- cf <- extract.coef(env)
- coefs <<- c(coefs, list(cf))
- }
-
- attr(callback, 'call') <- match.call()
- attr(callback, 'name') <- 'cb.gblinear.history'
- callback
+ )
}
#' @title Extract gblinear coefficients history.
#' @description A helper function to extract the matrix of linear coefficients' history
-#' from a gblinear model created while using the \code{cb.gblinear.history()}
-#' callback.
+#' from a gblinear model created while using the \link{xgb.cb.gblinear.history}
+#' callback (which must be added manually as by default it's not used).
#' @details Note that this is an R-specific function that relies on R attributes that
#' are not saved when using xgboost's own serialization functions like \link{xgb.load}
#' or \link{xgb.load.raw}.
#'
-#' In order for a serialized model to be accepted by tgis function, one must use R
+#' In order for a serialized model to be accepted by this function, one must use R
#' serializers such as \link{saveRDS}.
#' @param model either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
-#' using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
+#' using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
#' loaded from \link{xgb.load} or \link{xgb.load.raw}.
#' @param class_index zero-based class index to extract the coefficients for only that
#' specific class in a multinomial multiclass model. When it is NULL, all the
#' coefficients are returned. Has no effect in non-multiclass models.
#'
#' @return
-#' For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns
-#' corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would
-#' return) and the rows corresponding to boosting iterations.
+#' For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
+#' corresponding to iteration's coefficients and the rows corresponding to boosting iterations.
#'
-#' For an \code{xgb.cv} result, a list of such matrices is returned with the elements
+#' For an \link{xgb.cv} result, a list of such matrices is returned with the elements
#' corresponding to CV folds.
#'
+#' When there is more than one coefficient per feature (e.g. multi-class classification)
+#' and `class_index` is not provided,
+#' the result will be reshaped into a vector where coefficients are arranged first by features and
+#' then by class (e.g. first 1 through N coefficients will be for the first class, then
+#' coefficients N+1 through 2N for the second class, and so on).
+#' @seealso \link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
#' @export
xgb.gblinear.history <- function(model, class_index = NULL) {
@@ -721,14 +1136,14 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
stop("model must be an object of either xgb.Booster or xgb.cv.synchronous class")
is_cv <- inherits(model, "xgb.cv.synchronous")
- if (is_cv) {
- callbacks <- model$callbacks
+ if (!is_cv) {
+ coef_path <- getElement(attributes(model), "gblinear_history")
} else {
- callbacks <- attributes(model)$callbacks
+ coef_path <- getElement(model, "gblinear_history")
+ }
+ if (is.null(coef_path)) {
+ stop("model must be trained while using the xgb.cb.gblinear.history() callback")
}
-
- if (is.null(callbacks) || is.null(callbacks$cb.gblinear.history))
- stop("model must be trained while using the cb.gblinear.history() callback")
if (!is_cv) {
num_class <- xgb.num_class(model)
@@ -748,105 +1163,82 @@ xgb.gblinear.history <- function(model, class_index = NULL) {
(class_index[1] < 0 || class_index[1] >= num_class))
stop("class_index has to be within [0,", num_class - 1, "]")
- coef_path <- environment(callbacks$cb.gblinear.history)[["coefs"]]
if (!is.null(class_index) && num_class > 1) {
+ seq_take <- seq(1 + class_index * (num_feat + 1), (class_index + 1) * (num_feat + 1))
coef_path <- if (is.list(coef_path)) {
- lapply(coef_path,
- function(x) x[, seq(1 + class_index, by = num_class, length.out = num_feat)])
+ lapply(coef_path, function(x) x[, seq_take])
} else {
- coef_path <- coef_path[, seq(1 + class_index, by = num_class, length.out = num_feat)]
+ coef_path <- coef_path[, seq_take]
}
}
- coef_path
+ return(coef_path)
}
+.callbacks.only.train <- "save_model"
+.callbacks.only.cv <- "cv_predict"
-#
-# Internal utility functions for callbacks ------------------------------------
-#
-
-# Format the evaluation metric string
-.format_eval_string <- function(iter, eval_res, eval_err = NULL) {
- if (length(eval_res) == 0)
- stop('no evaluation results')
- enames <- names(eval_res)
- if (is.null(enames))
- stop('evaluation results must have names')
- iter <- sprintf('[%d]\t', iter)
- if (!is.null(eval_err)) {
- if (length(eval_res) != length(eval_err))
- stop('eval_res & eval_err lengths mismatch')
- # Note: UTF-8 code for plus/minus sign is U+00B1
- res <- paste0(sprintf("%s:%f\U00B1%f", enames, eval_res, eval_err), collapse = '\t')
- } else {
- res <- paste0(sprintf("%s:%f", enames, eval_res), collapse = '\t')
+.process.callbacks <- function(callbacks, is_cv) {
+ if (inherits(callbacks, "xgb.Callback")) {
+ callbacks <- list(callbacks)
}
- return(paste0(iter, res))
-}
-
-# Extract callback names from the list of callbacks
-callback.names <- function(cb_list) {
- unlist(lapply(cb_list, function(x) attr(x, 'name')))
-}
-
-# Extract callback calls from the list of callbacks
-callback.calls <- function(cb_list) {
- unlist(lapply(cb_list, function(x) attr(x, 'call')))
-}
-
-# Add a callback cb to the list and make sure that
-# cb.early.stop and cb.cv.predict are at the end of the list
-# with cb.cv.predict being the last (when present)
-add.cb <- function(cb_list, cb) {
- cb_list <- c(cb_list, cb)
- names(cb_list) <- callback.names(cb_list)
- if ('cb.early.stop' %in% names(cb_list)) {
- cb_list <- c(cb_list, cb_list['cb.early.stop'])
- # this removes only the first one
- cb_list['cb.early.stop'] <- NULL
+ if (!is.list(callbacks)) {
+ stop("'callbacks' must be a list.")
}
- if ('cb.cv.predict' %in% names(cb_list)) {
- cb_list <- c(cb_list, cb_list['cb.cv.predict'])
- cb_list['cb.cv.predict'] <- NULL
+ cb_names <- character()
+ if (length(callbacks)) {
+ is_callback <- sapply(callbacks, inherits, "xgb.Callback")
+ if (!all(is_callback)) {
+ stop("Entries in 'callbacks' must be 'xgb.Callback' objects.")
+ }
+ cb_names <- sapply(callbacks, function(cb) cb$cb_name)
+ if (length(cb_names) != length(callbacks)) {
+ stop("Passed invalid callback(s).")
+ }
+ if (anyDuplicated(cb_names) > 0) {
+ stop("Callbacks must have unique names.")
+ }
+ if (is_cv) {
+ if (any(.callbacks.only.train %in% cb_names)) {
+ stop(
+ "Passed callback(s) not supported for 'xgb.cv': ",
+ paste(intersect(.callbacks.only.train, cb_names), collapse = ", ")
+ )
+ }
+ } else {
+ if (any(.callbacks.only.cv %in% cb_names)) {
+ stop(
+ "Passed callback(s) not supported for 'xgb.train': ",
+ paste(intersect(.callbacks.only.cv, cb_names), collapse = ", ")
+ )
+ }
+ }
+ # Early stopping callback needs to be executed before the others
+ if ("early_stop" %in% cb_names) {
+ mask <- cb_names == "early_stop"
+ callbacks <- c(list(callbacks[[which(mask)]]), callbacks[!mask])
+ }
}
- cb_list
+ return(list(callbacks = callbacks, cb_names = cb_names))
}
-# Sort callbacks list into categories
-categorize.callbacks <- function(cb_list) {
- list(
- pre_iter = Filter(function(x) {
- pre <- attr(x, 'is_pre_iteration')
- !is.null(pre) && pre
- }, cb_list),
- post_iter = Filter(function(x) {
- pre <- attr(x, 'is_pre_iteration')
- is.null(pre) || !pre
- }, cb_list),
- finalize = Filter(function(x) {
- 'finalize' %in% names(formals(x))
- }, cb_list)
- )
+# Note: don't try to use functions like 'append', as they will
+# merge the elements of the different callbacks into a single list.
+add.callback <- function(callbacks, cb, as_first_elt = FALSE) {
+ if (!as_first_elt) {
+ callbacks[[length(callbacks) + 1]] <- cb
+ return(callbacks)
+ } else {
+ if (!length(callbacks)) {
+ return(list(cb))
+ }
+ new_cb <- vector("list", length(callbacks) + 1)
+ new_cb[[1]] <- cb
+ new_cb[seq(2, length(new_cb))] <- callbacks
+ return(new_cb)
+ }
}
-# Check whether all callback functions with names given by 'query_names' are present in the 'cb_list'.
-has.callbacks <- function(cb_list, query_names) {
- if (length(cb_list) < length(query_names))
- return(FALSE)
- if (!is.list(cb_list) ||
- any(sapply(cb_list, class) != 'function')) {
- stop('`cb_list` must be a list of callback functions')
- }
- cb_names <- callback.names(cb_list)
- if (!is.character(cb_names) ||
- length(cb_names) != length(cb_list) ||
- any(cb_names == "")) {
- stop('All callbacks in the `cb_list` must have a non-empty `name` attribute')
- }
- if (!is.character(query_names) ||
- length(query_names) == 0 ||
- any(query_names == "")) {
- stop('query_names must be a non-empty vector of non-empty character names')
- }
- return(all(query_names %in% cb_names))
+has.callbacks <- function(callbacks, cb_name) {
+ cb_names <- sapply(callbacks, function(cb) cb$name)
+ return(cb_name %in% cb_names)
}
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index e8ae787fc722..7b6a20f704dd 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -26,6 +26,11 @@ NVL <- function(x, val) {
'multi:softprob', 'rank:pairwise', 'rank:ndcg', 'rank:map'))
}
+.RANKING_OBJECTIVES <- function() {
+ return(c('binary:logistic', 'binary:logitraw', 'binary:hinge', 'multi:softmax',
+ 'multi:softprob'))
+}
+
#
# Low-level functions for boosting --------------------------------------------
@@ -142,7 +147,7 @@ check.custom.eval <- function(env = parent.frame()) {
if (!is.null(env$feval) &&
is.null(env$maximize) && (
!is.null(env$early_stopping_rounds) ||
- has.callbacks(env$callbacks, 'cb.early.stop')))
+ has.callbacks(env$callbacks, "early_stop")))
stop("Please set 'maximize' to indicate whether the evaluation metric needs to be maximized or not")
}
@@ -193,20 +198,20 @@ xgb.iter.update <- function(bst, dtrain, iter, obj) {
# Evaluate one iteration.
# Returns a named vector of evaluation metrics
# with the names in a 'datasetname-metricname' format.
-xgb.iter.eval <- function(bst, watchlist, iter, feval) {
+xgb.iter.eval <- function(bst, evals, iter, feval) {
handle <- xgb.get.handle(bst)
- if (length(watchlist) == 0)
+ if (length(evals) == 0)
return(NULL)
- evnames <- names(watchlist)
+ evnames <- names(evals)
if (is.null(feval)) {
- msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), watchlist, as.list(evnames))
+ msg <- .Call(XGBoosterEvalOneIter_R, handle, as.integer(iter), evals, as.list(evnames))
mat <- matrix(strsplit(msg, '\\s+|:')[[1]][-1], nrow = 2)
res <- structure(as.numeric(mat[2, ]), names = mat[1, ])
} else {
- res <- sapply(seq_along(watchlist), function(j) {
- w <- watchlist[[j]]
+ res <- sapply(seq_along(evals), function(j) {
+ w <- evals[[j]]
## predict using all trees
preds <- predict(bst, w, outputmargin = TRUE, iterationrange = "all")
eval_res <- feval(preds, w)
@@ -235,33 +240,43 @@ convert.labels <- function(labels, objective_name) {
}
# Generates random (stratified if needed) CV folds
-generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
+generate.cv.folds <- function(nfold, nrows, stratified, label, group, params) {
+ if (NROW(group)) {
+ if (stratified) {
+ warning(
+ paste0(
+ "Stratified splitting is not supported when using 'group' attribute.",
+ " Will use unstratified splitting."
+ )
+ )
+ }
+ return(generate.group.folds(nfold, group))
+ }
+ objective <- params$objective
+ if (!is.character(objective)) {
+ warning("Will use unstratified splitting (custom objective used)")
+ stratified <- FALSE
+ }
+ # cannot stratify if label is NULL
+ if (stratified && is.null(label)) {
+ warning("Will use unstratified splitting (no 'labels' available)")
+ stratified <- FALSE
+ }
# cannot do it for rank
- objective <- params$objective
if (is.character(objective) && strtrim(objective, 5) == 'rank:') {
- stop("\n\tAutomatic generation of CV-folds is not implemented for ranking!\n",
+ stop("\n\tAutomatic generation of CV-folds is not implemented for ranking without 'group' field!\n",
"\tConsider providing pre-computed CV-folds through the 'folds=' parameter.\n")
}
# shuffle
rnd_idx <- sample.int(nrows)
- if (stratified &&
- length(label) == length(rnd_idx)) {
+ if (stratified && length(label) == length(rnd_idx)) {
y <- label[rnd_idx]
- # WARNING: some heuristic logic is employed to identify classification setting!
# - For classification, need to convert y labels to factor before making the folds,
# and then do stratification by factor levels.
# - For regression, leave y numeric and do stratification by quantiles.
if (is.character(objective)) {
- y <- convert.labels(y, params$objective)
- } else {
- # If no 'objective' given in params, it means that user either wants to
- # use the default 'reg:squarederror' objective or has provided a custom
- # obj function. Here, assume classification setting when y has 5 or less
- # unique values:
- if (length(unique(y)) <= 5) {
- y <- factor(y)
- }
+ y <- convert.labels(y, objective)
}
folds <- xgb.createFolds(y = y, k = nfold)
} else {
@@ -277,6 +292,29 @@ generate.cv.folds <- function(nfold, nrows, stratified, label, params) {
return(folds)
}
+generate.group.folds <- function(nfold, group) {
+ ngroups <- length(group) - 1
+ if (ngroups < nfold) {
+ stop("DMatrix has fewer groups than folds.")
+ }
+ seq_groups <- seq_len(ngroups)
+ indices <- lapply(seq_groups, function(gr) seq(group[gr] + 1, group[gr + 1]))
+ assignments <- base::split(seq_groups, as.integer(seq_groups %% nfold))
+ assignments <- unname(assignments)
+
+ out <- vector("list", nfold)
+ randomized_groups <- sample(ngroups)
+ for (idx in seq_len(nfold)) {
+ groups_idx_test <- randomized_groups[assignments[[idx]]]
+ groups_test <- indices[groups_idx_test]
+ idx_test <- unlist(groups_test)
+ attributes(idx_test)$group_test <- lengths(groups_test)
+ attributes(idx_test)$group_train <- lengths(indices[-groups_idx_test])
+ out[[idx]] <- idx_test
+ }
+ return(out)
+}
+
# Creates CV folds stratified by the values of y.
# It was borrowed from caret::createFolds and simplified
# by always returning an unnamed list of fold indices.
@@ -454,7 +492,8 @@ depr_par_lut <- matrix(c(
'plot.height', 'plot_height',
'plot.width', 'plot_width',
'n_first_tree', 'trees',
- 'dummy', 'DUMMY'
+ 'dummy', 'DUMMY',
+ 'watchlist', 'evals'
), ncol = 2, byrow = TRUE)
colnames(depr_par_lut) <- c('old', 'new')
diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R
index 8a5d66198834..77d75fa9c2a5 100644
--- a/R-package/R/xgb.Booster.R
+++ b/R-package/R/xgb.Booster.R
@@ -1071,6 +1071,10 @@ xgb.best_iteration <- function(bst) {
#' coef(model)
#' @export
coef.xgb.Booster <- function(object, ...) {
+ return(.internal.coef.xgb.Booster(object, add_names = TRUE))
+}
+
+.internal.coef.xgb.Booster <- function(object, add_names = TRUE) {
booster_type <- xgb.booster_type(object)
if (booster_type != "gblinear") {
stop("Coefficients are not defined for Booster type ", booster_type)
@@ -1089,21 +1093,27 @@ coef.xgb.Booster <- function(object, ...) {
intercepts <- weights[seq(sep + 1, length(weights))]
intercepts <- intercepts + as.numeric(base_score)
- feature_names <- xgb.feature_names(object)
- if (!NROW(feature_names)) {
- # This mimics the default naming in R which names columns as "V1..N"
- # when names are needed but not available
- feature_names <- paste0("V", seq(1L, num_feature))
+ if (add_names) {
+ feature_names <- xgb.feature_names(object)
+ if (!NROW(feature_names)) {
+ # This mimics the default naming in R which names columns as "V1..N"
+ # when names are needed but not available
+ feature_names <- paste0("V", seq(1L, num_feature))
+ }
+ feature_names <- c("(Intercept)", feature_names)
}
- feature_names <- c("(Intercept)", feature_names)
if (n_cols == 1L) {
out <- c(intercepts, coefs)
- names(out) <- feature_names
+ if (add_names) {
+ names(out) <- feature_names
+ }
} else {
coefs <- matrix(coefs, nrow = num_feature, byrow = TRUE)
dim(intercepts) <- c(1L, n_cols)
out <- rbind(intercepts, coefs)
- row.names(out) <- feature_names
+ if (add_names) {
+ row.names(out) <- feature_names
+ }
# TODO: if a class names attributes is added,
# should use those names here.
}
@@ -1255,12 +1265,9 @@ print.xgb.Booster <- function(x, ...) {
cat(" ", paste(attr_names, collapse = ", "), "\n")
}
- if (!is.null(R_attrs$callbacks) && length(R_attrs$callbacks) > 0) {
- cat('callbacks:\n')
- lapply(callback.calls(R_attrs$callbacks), function(x) {
- cat(' ')
- print(x)
- })
+ additional_attr <- setdiff(names(R_attrs), .reserved_cb_names)
+ if (NROW(additional_attr)) {
+ cat("callbacks:\n ", paste(additional_attr, collapse = ", "), "\n")
}
if (!is.null(R_attrs$evaluation_log)) {
diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R
index edbc267c1067..15f6faed0ba0 100644
--- a/R-package/R/xgb.DMatrix.R
+++ b/R-package/R/xgb.DMatrix.R
@@ -1259,8 +1259,11 @@ xgb.get.DMatrix.data <- function(dmat) {
#' Get a new DMatrix containing the specified rows of
#' original xgb.DMatrix object
#'
-#' @param object Object of class "xgb.DMatrix"
-#' @param idxset a integer vector of indices of rows needed
+#' @param object Object of class "xgb.DMatrix".
+#' @param idxset An integer vector of indices of rows needed (base-1 indexing).
+#' @param allow_groups Whether to allow slicing an `xgb.DMatrix` with `group` (or
+#' equivalently `qid`) field. Note that in such case, the result will not have
+#' the groups anymore - they need to be set manually through `setinfo`.
#' @param colset currently not used (columns subsetting is not available)
#'
#' @examples
@@ -1275,11 +1278,11 @@ xgb.get.DMatrix.data <- function(dmat) {
#'
#' @rdname xgb.slice.DMatrix
#' @export
-xgb.slice.DMatrix <- function(object, idxset) {
+xgb.slice.DMatrix <- function(object, idxset, allow_groups = FALSE) {
if (!inherits(object, "xgb.DMatrix")) {
stop("object must be xgb.DMatrix")
}
- ret <- .Call(XGDMatrixSliceDMatrix_R, object, idxset)
+ ret <- .Call(XGDMatrixSliceDMatrix_R, object, idxset, allow_groups)
attr_list <- attributes(object)
nr <- nrow(object)
@@ -1296,7 +1299,15 @@ xgb.slice.DMatrix <- function(object, idxset) {
}
}
}
- return(structure(ret, class = "xgb.DMatrix"))
+
+ out <- structure(ret, class = "xgb.DMatrix")
+ parent_fields <- as.list(attributes(object)$fields)
+ if (NROW(parent_fields)) {
+ child_fields <- parent_fields[!(names(parent_fields) %in% c("group", "qid"))]
+ child_fields <- as.environment(child_fields)
+ attributes(out)$fields <- child_fields
+ }
+ return(out)
}
#' @rdname xgb.slice.DMatrix
@@ -1340,11 +1351,11 @@ print.xgb.DMatrix <- function(x, verbose = FALSE, ...) {
}
cat(class_print, ' dim:', nrow(x), 'x', ncol(x), ' info: ')
- infos <- character(0)
- if (xgb.DMatrix.hasinfo(x, 'label')) infos <- 'label'
- if (xgb.DMatrix.hasinfo(x, 'weight')) infos <- c(infos, 'weight')
- if (xgb.DMatrix.hasinfo(x, 'base_margin')) infos <- c(infos, 'base_margin')
- if (length(infos) == 0) infos <- 'NA'
+ infos <- names(attributes(x)$fields)
+ infos <- infos[infos != "feature_name"]
+ if (!NROW(infos)) infos <- "NA"
+ infos <- infos[order(infos)]
+ infos <- paste(infos, collapse = ", ")
cat(infos)
cnames <- colnames(x)
cat(' colnames:')
diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R
index baef3bb03e28..27f8a0975ae7 100644
--- a/R-package/R/xgb.create.features.R
+++ b/R-package/R/xgb.create.features.R
@@ -71,7 +71,6 @@
#' new.dtest <- xgb.DMatrix(
#' data = new.features.test, label = agaricus.test$label, nthread = 2
#' )
-#' watchlist <- list(train = new.dtrain)
#' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
#'
#' # Model accuracy with new features
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index 29bddb57f3e2..880fd56974bc 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -1,6 +1,6 @@
#' Cross Validation
#'
-#' The cross validation function of xgboost
+#' The cross validation function of xgboost.
#'
#' @param params the list of parameters. The complete list of parameters is
#' available in the \href{http://xgboost.readthedocs.io/en/latest/parameter.html}{online documentation}. Below
@@ -19,15 +19,19 @@
#'
#' See \code{\link{xgb.train}} for further details.
#' See also demo/ for walkthrough example in R.
-#' @param data takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.
+#'
+#' Note that, while `params` accepts a `seed` entry and will use such parameter for model training if
+#' supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG
+#' system - thus, for reproducible results, one needs to call the `set.seed` function beforehand.
+#' @param data An `xgb.DMatrix` object, with corresponding fields like `label` or bounds as required
+#' for model training by the objective.
+#'
+#' Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
+#' or `xgb.ExternalDMatrix` are not supported here.
#' @param nrounds the max number of iterations
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
-#' @param label vector of response values. Should be provided only when data is an R-matrix.
-#' @param missing is only used when input is a dense matrix. By default is set to NA, which means
-#' that NA values should be considered as 'missing' by the algorithm.
-#' Sometimes, 0 or other extreme value might be used to represent missing values.
#' @param prediction A logical value indicating whether to return the test fold predictions
-#' from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.
+#' from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param metrics, list of evaluation metrics to be used in cross validation,
#' when it is not specified, the evaluation metric is chosen according to objective function.
@@ -47,27 +51,44 @@
#' @param feval customized evaluation function. Returns
#' \code{list(metric='metric-name', value='metric-value')} with given
#' prediction and dtrain.
-#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
-#' by the values of outcome labels.
+#' @param stratified A \code{boolean} indicating whether sampling of folds should be stratified
+#' by the values of outcome labels. For real-valued labels in regression objectives,
+#' stratification will be done by discretizing the labels into up to 5 buckets beforehand.
+#'
+#' If passing "auto", will be set to `TRUE` if the objective in `params` is a classification
+#' objective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to
+#' `FALSE` otherwise.
+#'
+#' This parameter is ignored when `data` has a `group` field - in such case, the splitting
+#' will be based on whole groups (note that this might make the folds have different sizes).
+#'
+#' Value `TRUE` here is \bold{not} supported for custom objectives.
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
+#'
+#' If `data` has a `group` field and the objective requires this field, each fold (list element)
+#' must additionally have two attributes (retrievable through \link{attributes}) named `group_test`
+#' and `group_train`, which should hold the `group` to assign through \link{setinfo.xgb.DMatrix} to
+#' the resulting DMatrices.
#' @param train_folds \code{list} list specifying which indicies to use for training. If \code{NULL}
#' (the default) all indices not specified in \code{folds} will be used for training.
+#'
+#' This is not supported when `data` has `group` field.
#' @param verbose \code{boolean}, print the statistics during the process
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
#' Default is 1 which means all messages are printed. This parameter is passed to the
-#' \code{\link{cb.print.evaluation}} callback.
+#' \code{\link{xgb.cb.print.evaluation}} callback.
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance
#' doesn't improve for \code{k} rounds.
-#' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#' Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
#' then this parameter must be set as well.
#' When it is \code{TRUE}, it means the larger the evaluation score the better.
-#' This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#' This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
#' @param callbacks a list of callback functions to perform various task during boosting.
-#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#' See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
#' parameters' values. User can provide either existing or their own callback methods in order
#' to customize the training process.
#' @param ... other parameters to pass to \code{params}.
@@ -90,25 +111,25 @@
#' \itemize{
#' \item \code{call} a function call.
#' \item \code{params} parameters that were passed to the xgboost library. Note that it does not
-#' capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-#' \item \code{callbacks} callback functions that were either automatically assigned or
-#' explicitly passed.
+#' capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
#' \item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
#' first column corresponding to iteration number and the rest corresponding to the
#' CV-based evaluation means and standard deviations for the training and test CV-sets.
-#' It is created by the \code{\link{cb.evaluation.log}} callback.
+#' It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
#' \item \code{niter} number of boosting iterations.
#' \item \code{nfeatures} number of features in training data.
#' \item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
#' parameter or randomly generated.
#' \item \code{best_iteration} iteration number with the best evaluation metric value
#' (only available with early stopping).
-#' \item \code{pred} CV prediction values available when \code{prediction} is set.
-#' It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-#' \item \code{models} a list of the CV folds' models. It is only available with the explicit
-#' setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
#' }
#'
+#' Plus other potential elements that are the result of callbacks, such as a list `cv_predict` with
+#' a sub-element `pred` when passing `prediction = TRUE`, which is added by the \link{xgb.cb.cv.predict}
+#' callback (note that one can also pass it manually under `callbacks` with different settings,
+#' such as saving also the models created during cross validation); or a list `early_stop` which
+#' will contain elements such as `best_iteration` when using the early stopping callback (\link{xgb.cb.early.stop}).
+#'
#' @examples
#' data(agaricus.train, package='xgboost')
#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2))
@@ -118,13 +139,14 @@
#' print(cv, verbose=TRUE)
#'
#' @export
-xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing = NA,
+xgb.cv <- function(params = list(), data, nrounds, nfold,
prediction = FALSE, showsd = TRUE, metrics = list(),
- obj = NULL, feval = NULL, stratified = TRUE, folds = NULL, train_folds = NULL,
+ obj = NULL, feval = NULL, stratified = "auto", folds = NULL, train_folds = NULL,
verbose = TRUE, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL, callbacks = list(), ...) {
check.deprecation(...)
+ stopifnot(inherits(data, "xgb.DMatrix"))
if (inherits(data, "xgb.DMatrix") && .Call(XGCheckNullPtr_R, data)) {
stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.")
}
@@ -137,16 +159,22 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
check.custom.obj()
check.custom.eval()
- # Check the labels
- if ((inherits(data, 'xgb.DMatrix') && !xgb.DMatrix.hasinfo(data, 'label')) ||
- (!inherits(data, 'xgb.DMatrix') && is.null(label))) {
- stop("Labels must be provided for CV either through xgb.DMatrix, or through 'label=' when 'data' is matrix")
- } else if (inherits(data, 'xgb.DMatrix')) {
- if (!is.null(label))
- warning("xgb.cv: label will be ignored, since data is of type xgb.DMatrix")
- cv_label <- getinfo(data, 'label')
- } else {
- cv_label <- label
+ if (stratified == "auto") {
+ if (is.character(params$objective)) {
+ stratified <- (
+ (params$objective %in% .CLASSIFICATION_OBJECTIVES())
+ && !(params$objective %in% .RANKING_OBJECTIVES())
+ )
+ } else {
+ stratified <- FALSE
+ }
+ }
+
+ # Check the labels and groups
+ cv_label <- getinfo(data, "label")
+ cv_group <- getinfo(data, "group")
+ if (!is.null(train_folds) && NROW(cv_group)) {
+ stop("'train_folds' is not supported for DMatrix object with 'group' field.")
}
# CV folds
@@ -157,63 +185,64 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
} else {
if (nfold <= 1)
stop("'nfold' must be > 1")
- folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, params)
+ folds <- generate.cv.folds(nfold, nrow(data), stratified, cv_label, cv_group, params)
}
+ # Callbacks
+ tmp <- .process.callbacks(callbacks, is_cv = TRUE)
+ callbacks <- tmp$callbacks
+ cb_names <- tmp$cb_names
+ rm(tmp)
+
+ # Early stopping callback
+ if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+ callbacks <- add.callback(
+ callbacks,
+ xgb.cb.early.stop(
+ early_stopping_rounds,
+ maximize = maximize,
+ verbose = verbose
+ ),
+ as_first_elt = TRUE
+ )
+ }
# verbosity & evaluation printing callback:
params <- c(params, list(silent = 1))
print_every_n <- max(as.integer(print_every_n), 1L)
- if (!has.callbacks(callbacks, 'cb.print.evaluation') && verbose) {
- callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n, showsd = showsd))
+ if (verbose && !("print_evaluation" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n, showsd = showsd))
}
# evaluation log callback: always is on in CV
- evaluation_log <- list()
- if (!has.callbacks(callbacks, 'cb.evaluation.log')) {
- callbacks <- add.cb(callbacks, cb.evaluation.log())
- }
- # Early stopping callback
- stop_condition <- FALSE
- if (!is.null(early_stopping_rounds) &&
- !has.callbacks(callbacks, 'cb.early.stop')) {
- callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
- maximize = maximize, verbose = verbose))
+ if (!("evaluation_log" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
}
# CV-predictions callback
- if (prediction &&
- !has.callbacks(callbacks, 'cb.cv.predict')) {
- callbacks <- add.cb(callbacks, cb.cv.predict(save_models = FALSE))
+ if (prediction && !("cv_predict" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.cv.predict(save_models = FALSE))
}
- # Sort the callbacks into categories
- cb <- categorize.callbacks(callbacks)
-
# create the booster-folds
# train_folds
- dall <- xgb.get.DMatrix(
- data = data,
- label = label,
- missing = missing,
- weight = NULL,
- nthread = params$nthread
- )
+ dall <- data
bst_folds <- lapply(seq_along(folds), function(k) {
- dtest <- xgb.slice.DMatrix(dall, folds[[k]])
+ dtest <- xgb.slice.DMatrix(dall, folds[[k]], allow_groups = TRUE)
# code originally contributed by @RolandASc on stackoverflow
if (is.null(train_folds))
- dtrain <- xgb.slice.DMatrix(dall, unlist(folds[-k]))
+ dtrain <- xgb.slice.DMatrix(dall, unlist(folds[-k]), allow_groups = TRUE)
else
- dtrain <- xgb.slice.DMatrix(dall, train_folds[[k]])
+ dtrain <- xgb.slice.DMatrix(dall, train_folds[[k]], allow_groups = TRUE)
+ if (!is.null(attributes(folds[[k]])$group_test)) {
+ setinfo(dtest, "group", attributes(folds[[k]])$group_test)
+ setinfo(dtrain, "group", attributes(folds[[k]])$group_train)
+ }
bst <- xgb.Booster(
params = params,
cachelist = list(dtrain, dtest),
modelfile = NULL
)
bst <- bst$bst
- list(dtrain = dtrain, bst = bst, watchlist = list(train = dtrain, test = dtest), index = folds[[k]])
+ list(dtrain = dtrain, bst = bst, evals = list(train = dtrain, test = dtest), index = folds[[k]])
})
- rm(dall)
- # a "basket" to collect some results from callbacks
- basket <- list()
# extract parameters that can affect the relationship b/w #trees and #iterations
num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
@@ -222,10 +251,25 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
begin_iteration <- 1
end_iteration <- nrounds
+ .execute.cb.before.training(
+ callbacks,
+ bst_folds,
+ dall,
+ NULL,
+ begin_iteration,
+ end_iteration
+ )
+
# synchronous CV boosting: run CV folds' models within each iteration
for (iteration in begin_iteration:end_iteration) {
- for (f in cb$pre_iter) f()
+ .execute.cb.before.iter(
+ callbacks,
+ bst_folds,
+ dall,
+ NULL,
+ iteration
+ )
msg <- lapply(bst_folds, function(fd) {
xgb.iter.update(
@@ -236,33 +280,42 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
)
xgb.iter.eval(
bst = fd$bst,
- watchlist = fd$watchlist,
+ evals = fd$evals,
iter = iteration - 1,
feval = feval
)
})
msg <- simplify2array(msg)
- # Note: these variables might look unused here, but they are used in the callbacks
- bst_evaluation <- rowMeans(msg) # nolint
- bst_evaluation_err <- apply(msg, 1, sd) # nolint
- for (f in cb$post_iter) f()
+ should_stop <- .execute.cb.after.iter(
+ callbacks,
+ bst_folds,
+ dall,
+ NULL,
+ iteration,
+ msg
+ )
- if (stop_condition) break
+ if (should_stop) break
}
- for (f in cb$finalize) f(finalize = TRUE)
+ cb_outputs <- .execute.cb.after.training(
+ callbacks,
+ bst_folds,
+ dall,
+ NULL,
+ iteration,
+ msg
+ )
# the CV result
ret <- list(
call = match.call(),
params = params,
- callbacks = callbacks,
- evaluation_log = evaluation_log,
- niter = end_iteration,
- nfeatures = ncol(data),
+ niter = iteration,
+ nfeatures = ncol(dall),
folds = folds
)
- ret <- c(ret, basket)
+ ret <- c(ret, cb_outputs)
class(ret) <- 'xgb.cv.synchronous'
return(invisible(ret))
@@ -285,8 +338,8 @@ xgb.cv <- function(params = list(), data, nrounds, nfold, label = NULL, missing
#' @examples
#' data(agaricus.train, package='xgboost')
#' train <- agaricus.train
-#' cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
-#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+#' cv <- xgb.cv(data = xgb.DMatrix(train$data, label = train$label), nfold = 5, max_depth = 2,
+#' eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
#' print(cv)
#' print(cv, verbose=TRUE)
#'
@@ -308,23 +361,16 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
paste0('"', unlist(x$params), '"'),
sep = ' = ', collapse = ', '), '\n', sep = '')
}
- if (!is.null(x$callbacks) && length(x$callbacks) > 0) {
- cat('callbacks:\n')
- lapply(callback.calls(x$callbacks), function(x) {
- cat(' ')
- print(x)
- })
- }
for (n in c('niter', 'best_iteration')) {
- if (is.null(x[[n]]))
+ if (is.null(x$early_stop[[n]]))
next
- cat(n, ': ', x[[n]], '\n', sep = '')
+ cat(n, ': ', x$early_stop[[n]], '\n', sep = '')
}
- if (!is.null(x$pred)) {
+ if (!is.null(x$cv_predict$pred)) {
cat('pred:\n')
- str(x$pred)
+ str(x$cv_predict$pred)
}
}
@@ -332,9 +378,9 @@ print.xgb.cv.synchronous <- function(x, verbose = FALSE, ...) {
cat('evaluation_log:\n')
print(x$evaluation_log, row.names = FALSE, ...)
- if (!is.null(x$best_iteration)) {
+ if (!is.null(x$early_stop$best_iteration)) {
cat('Best iteration:\n')
- print(x$evaluation_log[x$best_iteration], row.names = FALSE, ...)
+ print(x$evaluation_log[x$early_stop$best_iteration], row.names = FALSE, ...)
}
invisible(x)
}
diff --git a/R-package/R/xgb.load.R b/R-package/R/xgb.load.R
index 4985f74b56c6..d5b192bcb6fa 100644
--- a/R-package/R/xgb.load.R
+++ b/R-package/R/xgb.load.R
@@ -6,7 +6,7 @@
#'
#' @details
#' The input file is expected to contain a model saved in an xgboost model format
-#' using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+#' using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
#' appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
#' saved from there in xgboost format, could be loaded from R.
#'
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index 44cde2e7a843..4cea088e0e45 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -114,13 +114,13 @@
#' @param data training dataset. \code{xgb.train} accepts only an \code{xgb.DMatrix} as the input.
#' \code{xgboost}, in addition, also accepts \code{matrix}, \code{dgCMatrix}, or name of a local data file.
#' @param nrounds max number of boosting iterations.
-#' @param watchlist named list of xgb.DMatrix datasets to use for evaluating model performance.
+#' @param evals Named list of `xgb.DMatrix` datasets to use for evaluating model performance.
#' Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
#' of these datasets during each boosting iteration, and stored in the end as a field named
#' \code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-#' \code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+#' \code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
#' printed out during the training.
-#' E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+#' E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
#' the performance of each round's model on mat1 and mat2.
#' @param obj customized objective function. Returns gradient and second order
#' gradient with given prediction and dtrain.
@@ -130,31 +130,32 @@
#' @param verbose If 0, xgboost will stay silent. If 1, it will print information about performance.
#' If 2, some additional information will be printed out.
#' Note that setting \code{verbose > 0} automatically engages the
-#' \code{cb.print.evaluation(period=1)} callback function.
+#' \code{xgb.cb.print.evaluation(period=1)} callback function.
#' @param print_every_n Print each n-th iteration evaluation messages when \code{verbose>0}.
#' Default is 1 which means all messages are printed. This parameter is passed to the
-#' \code{\link{cb.print.evaluation}} callback.
+#' \code{\link{xgb.cb.print.evaluation}} callback.
#' @param early_stopping_rounds If \code{NULL}, the early stopping function is not triggered.
#' If set to an integer \code{k}, training with a validation set will stop if the performance
#' doesn't improve for \code{k} rounds.
-#' Setting this parameter engages the \code{\link{cb.early.stop}} callback.
+#' Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.
#' @param maximize If \code{feval} and \code{early_stopping_rounds} are set,
#' then this parameter must be set as well.
#' When it is \code{TRUE}, it means the larger the evaluation score the better.
-#' This parameter is passed to the \code{\link{cb.early.stop}} callback.
+#' This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.
#' @param save_period when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-#' 0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.
+#' 0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.
#' @param save_name the name or path for periodically saved model file.
#' @param xgb_model a previously built model to continue the training from.
#' Could be either an object of class \code{xgb.Booster}, or its raw data, or the name of a
#' file with a previously saved model.
#' @param callbacks a list of callback functions to perform various task during boosting.
-#' See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+#' See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
#' parameters' values. User can provide either existing or their own callback methods in order
#' to customize the training process.
#'
-#' Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
-#' are kept as R attributes, and thus do not get saved when using non-R serializaters like
+#' Note that some callbacks might try to leave attributes in the resulting model object,
+#' such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+#' as R attributes, and thus do not get saved when using XGBoost's own serializaters like
#' \link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
#' @param ... other parameters to pass to \code{params}.
#' @param label vector of response values. Should not be provided when data is
@@ -170,7 +171,7 @@
#' @details
#' These are the training functions for \code{xgboost}.
#'
-#' The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+#' The \code{xgb.train} interface supports advanced features such as \code{evals},
#' customized objective and evaluation metric functions, therefore it is more flexible
#' than the \code{xgboost} interface.
#'
@@ -206,18 +207,19 @@
#'
#' The following callbacks are automatically created when certain parameters are set:
#' \itemize{
-#' \item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+#' \item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
#' and the \code{print_every_n} parameter is passed to it.
-#' \item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-#' \item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-#' \item \code{cb.save.model}: when \code{save_period > 0} is set.
+#' \item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+#' \item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+#' \item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
#' }
#'
#' Note that objects of type `xgb.Booster` as returned by this function behave a bit differently
#' from typical R objects (it's an 'altrep' list class), and it makes a separation between
#' internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
#' and shared between interfaces through serialization functions like \link{xgb.save}; and
-#' R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+#' R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+#' and \link{attr}, which are otherwise
#' only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
#' not anyhow used by functions like \link{predict.xgb.Booster}.
#'
@@ -229,7 +231,7 @@
#' effect elsewhere.
#'
#' @seealso
-#' \code{\link{callbacks}},
+#' \code{\link{xgb.Callback}},
#' \code{\link{predict.xgb.Booster}},
#' \code{\link{xgb.cv}}
#'
@@ -252,12 +254,12 @@
#' dtest <- with(
#' agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
#' )
-#' watchlist <- list(train = dtrain, eval = dtest)
+#' evals <- list(train = dtrain, eval = dtest)
#'
#' ## A simple xgb.train example:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' ## An xgb.train example where custom objective and evaluation metric are
#' ## used:
@@ -278,15 +280,15 @@
#' # as 'objective' and 'eval_metric' parameters in the params list:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = logregobj, eval_metric = evalerror)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
#'
#' # or through the ... arguments:
#' param <- list(max_depth = 2, eta = 1, nthread = nthread)
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
#' objective = logregobj, eval_metric = evalerror)
#'
#' # or as dedicated 'obj' and 'feval' parameters of xgb.train:
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
#' obj = logregobj, feval = evalerror)
#'
#'
@@ -294,11 +296,11 @@
#' param <- list(max_depth = 2, eta = 1, nthread = nthread,
#' objective = "binary:logistic", eval_metric = "auc")
#' my_etas <- list(eta = c(0.5, 0.1))
-#' bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
-#' callbacks = list(cb.reset.parameters(my_etas)))
+#' bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+#' callbacks = list(xgb.cb.reset.parameters(my_etas)))
#'
#' ## Early stopping:
-#' bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+#' bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
#' early_stopping_rounds = 3)
#'
#' ## An 'xgboost' interface example:
@@ -309,7 +311,7 @@
#'
#' @rdname xgb.train
#' @export
-xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
+xgb.train <- function(params = list(), data, nrounds, evals = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL,
save_period = NULL, save_name = "xgboost.model",
@@ -322,68 +324,68 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
check.custom.obj()
check.custom.eval()
- # data & watchlist checks
+ # data & evals checks
dtrain <- data
if (!inherits(dtrain, "xgb.DMatrix"))
stop("second argument dtrain must be xgb.DMatrix")
- if (length(watchlist) > 0) {
- if (typeof(watchlist) != "list" ||
- !all(vapply(watchlist, inherits, logical(1), what = 'xgb.DMatrix')))
- stop("watchlist must be a list of xgb.DMatrix elements")
- evnames <- names(watchlist)
+ if (length(evals) > 0) {
+ if (typeof(evals) != "list" ||
+ !all(vapply(evals, inherits, logical(1), what = 'xgb.DMatrix')))
+ stop("'evals' must be a list of xgb.DMatrix elements")
+ evnames <- names(evals)
if (is.null(evnames) || any(evnames == ""))
- stop("each element of the watchlist must have a name tag")
+ stop("each element of 'evals' must have a name tag")
}
# Handle multiple evaluation metrics given as a list
for (m in params$eval_metric) {
params <- c(params, list(eval_metric = m))
}
- # evaluation printing callback
params <- c(params)
- print_every_n <- max(as.integer(print_every_n), 1L)
- if (!has.callbacks(callbacks, 'cb.print.evaluation') &&
- verbose) {
- callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
+ params['validate_parameters'] <- TRUE
+ if (!("seed" %in% names(params))) {
+ params[["seed"]] <- sample(.Machine$integer.max, size = 1)
}
- # evaluation log callback: it is automatically enabled when watchlist is provided
- evaluation_log <- list()
- if (!has.callbacks(callbacks, 'cb.evaluation.log') &&
- length(watchlist) > 0) {
- callbacks <- add.cb(callbacks, cb.evaluation.log())
+
+ # callbacks
+ tmp <- .process.callbacks(callbacks, is_cv = FALSE)
+ callbacks <- tmp$callbacks
+ cb_names <- tmp$cb_names
+ rm(tmp)
+
+ # Early stopping callback (should always come first)
+ if (!is.null(early_stopping_rounds) && !("early_stop" %in% cb_names)) {
+ callbacks <- add.callback(
+ callbacks,
+ xgb.cb.early.stop(
+ early_stopping_rounds,
+ maximize = maximize,
+ verbose = verbose
+ ),
+ as_first_elt = TRUE
+ )
}
- # Model saving callback
- if (!is.null(save_period) &&
- !has.callbacks(callbacks, 'cb.save.model')) {
- callbacks <- add.cb(callbacks, cb.save.model(save_period, save_name))
+ # evaluation printing callback
+ print_every_n <- max(as.integer(print_every_n), 1L)
+ if (verbose && !("print_evaluation" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.print.evaluation(print_every_n))
}
- # Early stopping callback
- stop_condition <- FALSE
- if (!is.null(early_stopping_rounds) &&
- !has.callbacks(callbacks, 'cb.early.stop')) {
- callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
- maximize = maximize, verbose = verbose))
+ # evaluation log callback: it is automatically enabled when 'evals' is provided
+ if (length(evals) && !("evaluation_log" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.evaluation.log())
}
-
- # Sort the callbacks into categories
- cb <- categorize.callbacks(callbacks)
- params['validate_parameters'] <- TRUE
- if (!("seed" %in% names(params))) {
- params[["seed"]] <- sample(.Machine$integer.max, size = 1)
+ # Model saving callback
+ if (!is.null(save_period) && !("save_model" %in% cb_names)) {
+ callbacks <- add.callback(callbacks, xgb.cb.save.model(save_period, save_name))
}
# The tree updating process would need slightly different handling
is_update <- NVL(params[['process_type']], '.') == 'update'
- past_evaluation_log <- NULL
- if (inherits(xgb_model, "xgb.Booster")) {
- past_evaluation_log <- attributes(xgb_model)$evaluation_log
- }
-
# Construct a booster (either a new one or load from xgb_model)
bst <- xgb.Booster(
params = params,
- cachelist = append(watchlist, dtrain),
+ cachelist = append(evals, dtrain),
modelfile = xgb_model
)
niter_init <- bst$niter
@@ -394,11 +396,6 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
dtrain
)
- # extract parameters that can affect the relationship b/w #trees and #iterations
- # Note: it might look like these aren't used, but they need to be defined in this
- # environment for the callbacks for work correctly.
- num_class <- max(as.numeric(NVL(params[['num_class']], 1)), 1) # nolint
-
if (is_update && nrounds > niter_init)
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
@@ -406,57 +403,83 @@ xgb.train <- function(params = list(), data, nrounds, watchlist = list(),
begin_iteration <- niter_skip + 1
end_iteration <- niter_skip + nrounds
+ .execute.cb.before.training(
+ callbacks,
+ bst,
+ dtrain,
+ evals,
+ begin_iteration,
+ end_iteration
+ )
+
# the main loop for boosting iterations
for (iteration in begin_iteration:end_iteration) {
- for (f in cb$pre_iter) f()
+ .execute.cb.before.iter(
+ callbacks,
+ bst,
+ dtrain,
+ evals,
+ iteration
+ )
xgb.iter.update(
- bst = bst,
- dtrain = dtrain,
- iter = iteration - 1,
- obj = obj
+ bst = bst,
+ dtrain = dtrain,
+ iter = iteration - 1,
+ obj = obj
)
- if (length(watchlist) > 0) {
- bst_evaluation <- xgb.iter.eval( # nolint: object_usage_linter
+ bst_evaluation <- NULL
+ if (length(evals) > 0) {
+ bst_evaluation <- xgb.iter.eval(
bst = bst,
- watchlist = watchlist,
+ evals = evals,
iter = iteration - 1,
feval = feval
)
}
- for (f in cb$post_iter) f()
+ should_stop <- .execute.cb.after.iter(
+ callbacks,
+ bst,
+ dtrain,
+ evals,
+ iteration,
+ bst_evaluation
+ )
- if (stop_condition) break
+ if (should_stop) break
}
- for (f in cb$finalize) f(finalize = TRUE)
- # store the evaluation results
- keep_evaluation_log <- FALSE
- if (length(evaluation_log) > 0 && nrow(evaluation_log) > 0) {
- keep_evaluation_log <- TRUE
- # include the previous compatible history when available
- if (inherits(xgb_model, 'xgb.Booster') &&
- !is_update &&
- !is.null(past_evaluation_log) &&
- isTRUE(all.equal(colnames(evaluation_log),
- colnames(past_evaluation_log)))) {
- evaluation_log <- rbindlist(list(past_evaluation_log, evaluation_log))
- }
- }
+ cb_outputs <- .execute.cb.after.training(
+ callbacks,
+ bst,
+ dtrain,
+ evals,
+ iteration,
+ bst_evaluation
+ )
extra_attrs <- list(
call = match.call(),
- params = params,
- callbacks = callbacks
+ params = params
)
- if (keep_evaluation_log) {
- extra_attrs$evaluation_log <- evaluation_log
- }
+
curr_attrs <- attributes(bst)
- attributes(bst) <- c(curr_attrs, extra_attrs)
+ if (NROW(curr_attrs)) {
+ curr_attrs <- curr_attrs[
+ setdiff(
+ names(curr_attrs),
+ c(names(extra_attrs), names(cb_outputs))
+ )
+ ]
+ }
+ curr_attrs <- c(extra_attrs, curr_attrs)
+ if (NROW(cb_outputs)) {
+ curr_attrs <- c(curr_attrs, cb_outputs)
+ }
+ attributes(bst) <- curr_attrs
return(bst)
}
diff --git a/R-package/R/xgboost.R b/R-package/R/xgboost.R
index 170aa5ffd5be..a1d37358162c 100644
--- a/R-package/R/xgboost.R
+++ b/R-package/R/xgboost.R
@@ -18,9 +18,9 @@ xgboost <- function(data = NULL, label = NULL, missing = NA, weight = NULL,
nthread = merged$nthread
)
- watchlist <- list(train = dtrain)
+ evals <- list(train = dtrain)
- bst <- xgb.train(params, dtrain, nrounds, watchlist, verbose = verbose, print_every_n = print_every_n,
+ bst <- xgb.train(params, dtrain, nrounds, evals, verbose = verbose, print_every_n = print_every_n,
early_stopping_rounds = early_stopping_rounds, maximize = maximize,
save_period = save_period, save_name = save_name,
xgb_model = xgb_model, callbacks = callbacks, ...)
@@ -82,12 +82,8 @@ NULL
NULL
# Various imports
-#' @importClassesFrom Matrix dgCMatrix dgeMatrix dgRMatrix
-#' @importFrom Matrix colSums
+#' @importClassesFrom Matrix dgCMatrix dgRMatrix CsparseMatrix
#' @importFrom Matrix sparse.model.matrix
-#' @importFrom Matrix sparseVector
-#' @importFrom Matrix sparseMatrix
-#' @importFrom Matrix t
#' @importFrom data.table data.table
#' @importFrom data.table is.data.table
#' @importFrom data.table as.data.table
@@ -103,6 +99,7 @@ NULL
#' @importFrom stats coef
#' @importFrom stats predict
#' @importFrom stats median
+#' @importFrom stats sd
#' @importFrom stats variable.names
#' @importFrom utils head
#' @importFrom graphics barplot
diff --git a/R-package/demo/basic_walkthrough.R b/R-package/demo/basic_walkthrough.R
index 3dbbe0586f44..9403bac2064c 100644
--- a/R-package/demo/basic_walkthrough.R
+++ b/R-package/demo/basic_walkthrough.R
@@ -74,17 +74,17 @@ print(paste("sum(abs(pred3-pred))=", sum(abs(pred3 - pred))))
# to use advanced features, we need to put data in xgb.DMatrix
dtrain <- xgb.DMatrix(data = train$data, label = train$label)
dtest <- xgb.DMatrix(data = test$data, label = test$label)
-#---------------Using watchlist----------------
-# watchlist is a list of xgb.DMatrix, each of them is tagged with name
-watchlist <- list(train = dtrain, test = dtest)
-# to train with watchlist, use xgb.train, which contains more advanced features
-# watchlist allows us to monitor the evaluation result on all data in the list
-print("Train xgboost using xgb.train with watchlist")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+#---------------Using an evaluation set----------------
+# 'evals' is a list of xgb.DMatrix, each of them is tagged with name
+evals <- list(train = dtrain, test = dtest)
+# to train with an evaluation set, use xgb.train, which contains more advanced features
+# 'evals' argument allows us to monitor the evaluation result on all data in the list
+print("Train xgboost using xgb.train with evaluation data")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# we can change evaluation metrics, or use multiple evaluation metrics
-print("train xgboost using xgb.train with watchlist, watch logloss and error")
-bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+print("train xgboost using xgb.train with evaluation data, watch logloss and error")
+bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
eval_metric = "error", eval_metric = "logloss",
nthread = 2, objective = "binary:logistic")
@@ -92,7 +92,7 @@ bst <- xgb.train(data = dtrain, max_depth = 2, eta = 1, nrounds = 2, watchlist =
xgb.DMatrix.save(dtrain, "dtrain.buffer")
# to load it in, simply call xgb.DMatrix
dtrain2 <- xgb.DMatrix("dtrain.buffer")
-bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, watchlist = watchlist,
+bst <- xgb.train(data = dtrain2, max_depth = 2, eta = 1, nrounds = 2, evals = evals,
nthread = 2, objective = "binary:logistic")
# information can be extracted from xgb.DMatrix using getinfo
label <- getinfo(dtest, "label")
diff --git a/R-package/demo/boost_from_prediction.R b/R-package/demo/boost_from_prediction.R
index 1a3d55369d2f..75af70dba0d7 100644
--- a/R-package/demo/boost_from_prediction.R
+++ b/R-package/demo/boost_from_prediction.R
@@ -5,14 +5,14 @@ data(agaricus.test, package = 'xgboost')
dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
###
# advanced: start from a initial base prediction
#
print('start running example to start from a initial prediction')
# train xgboost for 1 round
param <- list(max_depth = 2, eta = 1, nthread = 2, objective = 'binary:logistic')
-bst <- xgb.train(param, dtrain, 1, watchlist)
+bst <- xgb.train(param, dtrain, 1, evals)
# Note: we need the margin value instead of transformed prediction in set_base_margin
# do predict with output_margin=TRUE, will always give you margin values before logistic transformation
ptrain <- predict(bst, dtrain, outputmargin = TRUE)
@@ -23,4 +23,4 @@ setinfo(dtrain, "base_margin", ptrain)
setinfo(dtest, "base_margin", ptest)
print('this is result of boost from initial prediction')
-bst <- xgb.train(params = param, data = dtrain, nrounds = 1, watchlist = watchlist)
+bst <- xgb.train(params = param, data = dtrain, nrounds = 1, evals = evals)
diff --git a/R-package/demo/custom_objective.R b/R-package/demo/custom_objective.R
index 35201332c5f6..03d7b346471b 100644
--- a/R-package/demo/custom_objective.R
+++ b/R-package/demo/custom_objective.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: for customized objective function, we leave objective as default
# note: what we are getting is margin value in prediction
# you must know what you are doing
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
num_round <- 2
# user define objective function, given prediction, return gradient and second order gradient
@@ -38,7 +38,7 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
#
# there can be cases where you want additional information
@@ -62,4 +62,4 @@ param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0,
print('start training with user customized objective, with additional attributes in DMatrix')
# training with customized objective, we can also do step by step training
# simply look at xgboost.py's implementation of train
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
diff --git a/R-package/demo/early_stopping.R b/R-package/demo/early_stopping.R
index 04da1382f031..057440882567 100644
--- a/R-package/demo/early_stopping.R
+++ b/R-package/demo/early_stopping.R
@@ -8,7 +8,7 @@ dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
# note: what we are getting is margin value in prediction
# you must know what you are doing
param <- list(max_depth = 2, eta = 1, nthread = 2, verbosity = 0)
-watchlist <- list(eval = dtest)
+evals <- list(eval = dtest)
num_round <- 20
# user define objective function, given prediction, return gradient and second order gradient
# this is log likelihood loss
@@ -32,7 +32,7 @@ evalerror <- function(preds, dtrain) {
}
print('start training with early Stopping setting')
-bst <- xgb.train(param, dtrain, num_round, watchlist,
+bst <- xgb.train(param, dtrain, num_round, evals,
objective = logregobj, eval_metric = evalerror, maximize = FALSE,
early_stopping_round = 3)
bst <- xgb.cv(param, dtrain, num_round, nfold = 5,
diff --git a/R-package/demo/generalized_linear_model.R b/R-package/demo/generalized_linear_model.R
index c24fe72cbcad..d29a6dc5be58 100644
--- a/R-package/demo/generalized_linear_model.R
+++ b/R-package/demo/generalized_linear_model.R
@@ -25,9 +25,9 @@ param <- list(objective = "binary:logistic", booster = "gblinear",
##
# the rest of settings are the same
##
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
num_round <- 2
-bst <- xgb.train(param, dtrain, num_round, watchlist)
+bst <- xgb.train(param, dtrain, num_round, evals)
ypred <- predict(bst, dtest)
labels <- getinfo(dtest, 'label')
cat('error of preds=', mean(as.numeric(ypred > 0.5) != labels), '\n')
diff --git a/R-package/demo/gpu_accelerated.R b/R-package/demo/gpu_accelerated.R
index 14ed9392b7d1..617a63e74542 100644
--- a/R-package/demo/gpu_accelerated.R
+++ b/R-package/demo/gpu_accelerated.R
@@ -23,7 +23,7 @@ y <- rbinom(N, 1, plogis(m))
tr <- sample.int(N, N * 0.75)
dtrain <- xgb.DMatrix(X[tr, ], label = y[tr])
dtest <- xgb.DMatrix(X[-tr, ], label = y[-tr])
-wl <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
# An example of running 'gpu_hist' algorithm
# which is
@@ -35,11 +35,11 @@ wl <- list(train = dtrain, test = dtest)
param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
max_bin = 64, tree_method = 'gpu_hist')
pt <- proc.time()
-bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_gpu <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt
# Compare to the 'hist' algorithm:
param$tree_method <- 'hist'
pt <- proc.time()
-bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+bst_hist <- xgb.train(param, dtrain, evals = evals, nrounds = 50)
proc.time() - pt
diff --git a/R-package/demo/predict_first_ntree.R b/R-package/demo/predict_first_ntree.R
index 179c18c707f4..ba15ab39a74f 100644
--- a/R-package/demo/predict_first_ntree.R
+++ b/R-package/demo/predict_first_ntree.R
@@ -6,11 +6,11 @@ dtrain <- xgb.DMatrix(agaricus.train$data, label = agaricus.train$label)
dtest <- xgb.DMatrix(agaricus.test$data, label = agaricus.test$label)
param <- list(max_depth = 2, eta = 1, objective = 'binary:logistic')
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
nrounds <- 2
# training the model for two rounds
-bst <- xgb.train(param, dtrain, nrounds, nthread = 2, watchlist)
+bst <- xgb.train(param, dtrain, nrounds, nthread = 2, evals = evals)
cat('start testing prediction from first n trees\n')
labels <- getinfo(dtest, 'label')
diff --git a/R-package/demo/predict_leaf_indices.R b/R-package/demo/predict_leaf_indices.R
index 21b6fa71d0b7..a57baf668896 100644
--- a/R-package/demo/predict_leaf_indices.R
+++ b/R-package/demo/predict_leaf_indices.R
@@ -43,7 +43,6 @@ colnames(new.features.test) <- colnames(new.features.train)
# learning with new features
new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label)
new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label)
-watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
# Model accuracy with new features
diff --git a/R-package/demo/tweedie_regression.R b/R-package/demo/tweedie_regression.R
index dfaf6a2ae2ce..b07858e761fa 100644
--- a/R-package/demo/tweedie_regression.R
+++ b/R-package/demo/tweedie_regression.R
@@ -39,7 +39,7 @@ bst <- xgb.train(
data = d_train,
params = params,
maximize = FALSE,
- watchlist = list(train = d_train),
+ evals = list(train = d_train),
nrounds = 20)
var_imp <- xgb.importance(attr(x, 'Dimnames')[[2]], model = bst)
diff --git a/R-package/man/callbacks.Rd b/R-package/man/callbacks.Rd
deleted file mode 100644
index 9f6f69015dcb..000000000000
--- a/R-package/man/callbacks.Rd
+++ /dev/null
@@ -1,37 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{callbacks}
-\alias{callbacks}
-\title{Callback closures for booster training.}
-\description{
-These are used to perform various service tasks either during boosting iterations or at the end.
-This approach helps to modularize many of such tasks without bloating the main training methods,
-and it offers .
-}
-\details{
-By default, a callback function is run after each boosting iteration.
-An R-attribute \code{is_pre_iteration} could be set for a callback to define a pre-iteration function.
-
-When a callback function has \code{finalize} parameter, its finalizer part will also be run after
-the boosting is completed.
-
-WARNING: side-effects!!! Be aware that these callback functions access and modify things in
-the environment from which they are called from, which is a fairly uncommon thing to do in R.
-
-To write a custom callback closure, make sure you first understand the main concepts about R environments.
-Check either R documentation on \code{\link[base]{environment}} or the
-\href{http://adv-r.had.co.nz/Environments.html}{Environments chapter} from the "Advanced R"
-book by Hadley Wickham. Further, the best option is to read the code of some of the existing callbacks -
-choose ones that do something similar to what you want to achieve. Also, you would need to get familiar
-with the objects available inside of the \code{xgb.train} and \code{xgb.cv} internal environments.
-}
-\seealso{
-\code{\link{cb.print.evaluation}},
-\code{\link{cb.evaluation.log}},
-\code{\link{cb.reset.parameters}},
-\code{\link{cb.early.stop}},
-\code{\link{cb.save.model}},
-\code{\link{cb.cv.predict}},
-\code{\link{xgb.train}},
-\code{\link{xgb.cv}}
-}
diff --git a/R-package/man/cb.early.stop.Rd b/R-package/man/cb.early.stop.Rd
deleted file mode 100644
index 7cd51a3ce563..000000000000
--- a/R-package/man/cb.early.stop.Rd
+++ /dev/null
@@ -1,62 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.early.stop}
-\alias{cb.early.stop}
-\title{Callback closure to activate the early stopping.}
-\usage{
-cb.early.stop(
- stopping_rounds,
- maximize = FALSE,
- metric_name = NULL,
- verbose = TRUE
-)
-}
-\arguments{
-\item{stopping_rounds}{The number of rounds with no improvement in
-the evaluation metric in order to stop the training.}
-
-\item{maximize}{whether to maximize the evaluation metric}
-
-\item{metric_name}{the name of an evaluation column to use as a criteria for early
-stopping. If not set, the last column would be used.
-Let's say the test data in \code{watchlist} was labelled as \code{dtest},
-and one wants to use the AUC in test data for early stopping regardless of where
-it is in the \code{watchlist}, then one of the following would need to be set:
-\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
-All dash '-' characters in metric names are considered equivalent to '_'.}
-
-\item{verbose}{whether to print the early stopping information.}
-}
-\description{
-Callback closure to activate the early stopping.
-}
-\details{
-This callback function determines the condition for early stopping
-by setting the \code{stop_condition = TRUE} flag in its calling frame.
-
-The following additional fields are assigned to the model's R object:
-\itemize{
-\item \code{best_score} the evaluation score at the best iteration
-\item \code{best_iteration} at which boosting iteration the best score has occurred (1-based index)
-}
-The Same values are also stored as xgb-attributes:
-\itemize{
-\item \code{best_iteration} is stored as a 0-based iteration index (for interoperability of binary models)
-\item \code{best_msg} message string is also stored.
-}
-
-At least one data element is required in the evaluation watchlist for early stopping to work.
-
-Callback function expects the following values to be set in its calling frame:
-\code{stop_condition},
-\code{bst_evaluation},
-\code{rank},
-\code{bst} (or \code{bst_folds} and \code{basket}),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration},
-}
-\seealso{
-\code{\link{callbacks}},
-\code{\link{xgb.attr}}
-}
diff --git a/R-package/man/cb.evaluation.log.Rd b/R-package/man/cb.evaluation.log.Rd
deleted file mode 100644
index 94f8a02e6227..000000000000
--- a/R-package/man/cb.evaluation.log.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.evaluation.log}
-\alias{cb.evaluation.log}
-\title{Callback closure for logging the evaluation history}
-\usage{
-cb.evaluation.log()
-}
-\description{
-Callback closure for logging the evaluation history
-}
-\details{
-This callback function appends the current iteration evaluation results \code{bst_evaluation}
-available in the calling parent frame to the \code{evaluation_log} list in a calling frame.
-
-The finalizer callback (called with \code{finalize = TURE} in the end) converts
-the \code{evaluation_log} list into a final data.table.
-
-The iteration evaluation result \code{bst_evaluation} must be a named numeric vector.
-
-Note: in the column names of the final data.table, the dash '-' character is replaced with
-the underscore '_' in order to make the column names more like regular R identifiers.
-
-Callback function expects the following values to be set in its calling frame:
-\code{evaluation_log},
-\code{bst_evaluation},
-\code{iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
diff --git a/R-package/man/cb.print.evaluation.Rd b/R-package/man/cb.print.evaluation.Rd
deleted file mode 100644
index 59b9ba65ea30..000000000000
--- a/R-package/man/cb.print.evaluation.Rd
+++ /dev/null
@@ -1,29 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.print.evaluation}
-\alias{cb.print.evaluation}
-\title{Callback closure for printing the result of evaluation}
-\usage{
-cb.print.evaluation(period = 1, showsd = TRUE)
-}
-\arguments{
-\item{period}{results would be printed every number of periods}
-
-\item{showsd}{whether standard deviations should be printed (when available)}
-}
-\description{
-Callback closure for printing the result of evaluation
-}
-\details{
-The callback function prints the result of evaluation at every \code{period} iterations.
-The initial and the last iteration's evaluations are always printed.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_evaluation} (also \code{bst_evaluation_err} when available),
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
-}
diff --git a/R-package/man/cb.save.model.Rd b/R-package/man/cb.save.model.Rd
deleted file mode 100644
index 7701ad9900e5..000000000000
--- a/R-package/man/cb.save.model.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/callbacks.R
-\name{cb.save.model}
-\alias{cb.save.model}
-\title{Callback closure for saving a model file.}
-\usage{
-cb.save.model(save_period = 0, save_name = "xgboost.ubj")
-}
-\arguments{
-\item{save_period}{save the model to disk after every
-\code{save_period} iterations; 0 means save the model at the end.}
-
-\item{save_name}{the name or path for the saved model file.
-
-\if{html}{\out{
}}\preformatted{ Note that the format of the model being saved is determined by the file
- extension specified here (see \link{xgb.save} for details about how it works).
-
- It can contain a \code{\link[base]{sprintf}} formatting specifier
- to include the integer iteration number in the file name.
- E.g., with \code{save_name} = 'xgboost_\%04d.ubj',
- the file saved at iteration 50 would be named "xgboost_0050.ubj".
-}\if{html}{\out{
}}}
-}
-\description{
-Callback closure for saving a model file.
-}
-\details{
-This callback function allows to save an xgb-model file, either periodically after each \code{save_period}'s or at the end.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\link{xgb.save}
-
-\code{\link{callbacks}}
-}
diff --git a/R-package/man/print.xgb.cv.Rd b/R-package/man/print.xgb.cv.Rd
index 05ad61eed8ac..74fc15d01fb9 100644
--- a/R-package/man/print.xgb.cv.Rd
+++ b/R-package/man/print.xgb.cv.Rd
@@ -23,8 +23,8 @@ including the best iteration (when available).
\examples{
data(agaricus.train, package='xgboost')
train <- agaricus.train
-cv <- xgb.cv(data = train$data, label = train$label, nfold = 5, max_depth = 2,
- eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
+cv <- xgb.cv(data = xgb.DMatrix(train$data, label = train$label), nfold = 5, max_depth = 2,
+ eta = 1, nthread = 2, nrounds = 2, objective = "binary:logistic")
print(cv)
print(cv, verbose=TRUE)
diff --git a/R-package/man/xgb.Callback.Rd b/R-package/man/xgb.Callback.Rd
new file mode 100644
index 000000000000..b4edcd97842e
--- /dev/null
+++ b/R-package/man/xgb.Callback.Rd
@@ -0,0 +1,248 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.Callback}
+\alias{xgb.Callback}
+\title{XGBoost Callback Constructor}
+\usage{
+xgb.Callback(
+ cb_name = "custom_callback",
+ env = new.env(),
+ f_before_training = function(env, model, data, evals, begin_iteration, end_iteration)
+ NULL,
+ f_before_iter = function(env, model, data, evals, iteration) NULL,
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) NULL,
+ f_after_training = function(env, model, data, evals, iteration, final_feval,
+ prev_cb_res) NULL
+)
+}
+\arguments{
+\item{cb_name}{Name for the callback.
+
+If the callback produces some non-NULL result (from executing the function passed under
+\code{f_after_training}), that result will be added as an R attribute to the resulting booster
+(or as a named element in the result of CV), with the attribute name specified here.
+
+Names of callbacks must be unique - i.e. there cannot be two callbacks with the same name.}
+
+\item{env}{An environment object that will be passed to the different functions in the callback.
+Note that this environment will not be shared with other callbacks.}
+
+\item{f_before_training}{A function that will be executed before the training has started.
+
+If passing \code{NULL} for this or for the other function inputs, then no function will be executed.
+
+If passing a function, it will be called with parameters supplied as non-named arguments
+matching the function signatures that are shown in the default value for each function argument.}
+
+\item{f_before_iter}{A function that will be executed before each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped before the current iteration happens.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_iter}{A function that will be executed after each boosting round.
+
+This function can signal whether the training should be finalized or not, by outputting
+a value that evaluates to \code{TRUE} - i.e. if the output from the function provided here at
+a given round is \code{TRUE}, then training will be stopped at that round.
+
+Return values of \code{NULL} will be interpreted as \code{FALSE}.}
+
+\item{f_after_training}{A function that will be executed after training is finished.
+
+This function can optionally output something non-NULL, which will become part of the R
+attributes of the booster (assuming one passes \code{keep_extra_attributes=TRUE} to \link{xgb.train})
+under the name supplied for parameter \code{cb_name} imn the case of \link{xgb.train}; or a part
+of the named elements in the result of \link{xgb.cv}.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Constructor for defining the structure of callback functions that can be executed
+at different stages of model training (before / after training, before / after each boosting
+iteration).
+}
+\details{
+Arguments that will be passed to the supplied functions are as follows:\itemize{
+
+\item env The same environment that is passed under argument \code{env}.
+
+It may be modified by the functions in order to e.g. keep tracking of what happens
+across iterations or similar.
+
+This environment is only used by the functions supplied to the callback, and will
+not be kept after the model fitting function terminates (see parameter \code{f_after_training}).
+
+\item model The booster object when using \link{xgb.train}, or the folds when using
+\link{xgb.cv}.
+
+For \link{xgb.cv}, folds are a list with a structure as follows:\itemize{
+\item \code{dtrain}: The training data for the fold (as an \code{xgb.DMatrix} object).
+\item \code{bst}: Rhe \code{xgb.Booster} object for the fold.
+\item \code{evals}: A list containing two DMatrices, with names \code{train} and \code{test}
+(\code{test} is the held-out data for the fold).
+\item \code{index}: The indices of the hold-out data for that fold (base-1 indexing),
+from which the \code{test} entry in \code{evals} was obtained.
+}
+
+This object should \bold{not} be in-place modified in ways that conflict with the
+training (e.g. resetting the parameters for a training update in a way that resets
+the number of rounds to zero in order to overwrite rounds).
+
+Note that any R attributes that are assigned to the booster during the callback functions,
+will not be kept thereafter as the booster object variable is not re-assigned during
+training. It is however possible to set C-level attributes of the booster through
+\link{xgb.attr} or \link{xgb.attributes}, which should remain available for the rest
+of the iterations and after the training is done.
+
+For keeping variables across iterations, it's recommended to use \code{env} instead.
+\item data The data to which the model is being fit, as an \code{xgb.DMatrix} object.
+
+Note that, for \link{xgb.cv}, this will be the full data, while data for the specific
+folds can be found in the \code{model} object.
+
+\item evals The evaluation data, as passed under argument \code{evals} to
+\link{xgb.train}.
+
+For \link{xgb.cv}, this will always be \code{NULL}.
+
+\item begin_iteration Index of the first boosting iteration that will be executed
+(base-1 indexing).
+
+This will typically be '1', but when using training continuation, depending on the
+parameters for updates, boosting rounds will be continued from where the previous
+model ended, in which case this will be larger than 1.
+
+\item end_iteration Index of the last boostign iteration that will be executed
+(base-1 indexing, inclusive of this end).
+
+It should match with argument \code{nrounds} passed to \link{xgb.train} or \link{xgb.cv}.
+
+Note that boosting might be interrupted before reaching this last iteration, for
+example by using the early stopping callback \link{xgb.cb.early.stop}.
+
+\item iteration Index of the iteration number that is being executed (first iteration
+will be the same as parameter \code{begin_iteration}, then next one will add +1, and so on).
+
+\item iter_feval Evaluation metrics for \code{evals} that were supplied, either
+determined by the objective, or by parameter \code{feval}.
+
+For \link{xgb.train}, this will be a named vector with one entry per element in
+\code{evals}, where the names are determined as 'evals name' + '-' + 'metric name' - for
+example, if \code{evals} contains an entry named "tr" and the metric is "rmse",
+this will be a one-element vector with name "tr-rmse".
+
+For \link{xgb.cv}, this will be a 2d matrix with dimensions \verb{[length(evals), nfolds]},
+where the row names will follow the same naming logic as the one-dimensional vector
+that is passed in \link{xgb.train}.
+
+Note that, internally, the built-in callbacks such as \link{xgb.cb.print.evaluation} summarize
+this table by calculating the row-wise means and standard deviations.
+
+\item final_feval The evaluation results after the last boosting round is executed
+(same format as \code{iter_feval}, and will be the exact same input as passed under
+\code{iter_feval} to the last round that is executed during model fitting).
+
+\item prev_cb_res Result from a previous run of a callback sharing the same name
+(as given by parameter \code{cb_name}) when conducting training continuation, if there
+was any in the booster R attributes.
+
+Some times, one might want to append the new results to the previous one, and this will
+be done automatically by the built-in callbacks such as \link{xgb.cb.evaluation.log},
+which will append the new rows to the previous table.
+
+If no such previous callback result is available (which it never will when fitting
+a model from start instead of updating an existing model), this will be \code{NULL}.
+
+For \link{xgb.cv}, which doesn't support training continuation, this will always be \code{NULL}.
+}
+
+The following names (\code{cb_name} values) are reserved for internal callbacks:\itemize{
+\item print_evaluation
+\item evaluation_log
+\item reset_parameters
+\item early_stop
+\item save_model
+\item cv_predict
+\item gblinear_history
+}
+
+The following names are reserved for other non-callback attributes:\itemize{
+\item names
+\item class
+\item call
+\item params
+\item niter
+\item nfeatures
+\item folds
+}
+
+When using the built-in early stopping callback (\link{xgb.cb.early.stop}), said callback
+will always be executed before the others, as it sets some booster C-level attributes
+that other callbacks might also use. Otherwise, the order of execution will match with
+the order in which the callbacks are passed to the model fitting function.
+}
+\examples{
+# Example constructing a custom callback that calculates
+# squared error on the training data (no separate test set),
+# and outputs the per-iteration results.
+ssq_callback <- xgb.Callback(
+ cb_name = "ssq",
+ f_before_training = function(env, model, data, evals,
+ begin_iteration, end_iteration) {
+ # A vector to keep track of a number at each iteration
+ env$logs <- rep(NA_real_, end_iteration - begin_iteration + 1)
+ },
+ f_after_iter = function(env, model, data, evals, iteration, iter_feval) {
+ # This calculates the sum of squared errors on the training data.
+ # Note that this can be better done by passing an 'evals' entry,
+ # but this demonstrates a way in which callbacks can be structured.
+ pred <- predict(model, data)
+ err <- pred - getinfo(data, "label")
+ sq_err <- sum(err^2)
+ env$logs[iteration] <- sq_err
+ cat(
+ sprintf(
+ "Squared error at iteration \%d: \%.2f\n",
+ iteration, sq_err
+ )
+ )
+
+ # A return value of 'TRUE' here would signal to finalize the training
+ return(FALSE)
+ },
+ f_after_training = function(env, model, data, evals, iteration,
+ final_feval, prev_cb_res) {
+ return(env$logs)
+ }
+)
+
+data(mtcars)
+y <- mtcars$mpg
+x <- as.matrix(mtcars[, -1])
+dm <- xgb.DMatrix(x, label = y, nthread = 1)
+model <- xgb.train(
+ data = dm,
+ params = list(objective = "reg:squarederror", nthread = 1),
+ nrounds = 5,
+ callbacks = list(ssq_callback),
+ keep_extra_attributes = TRUE
+)
+
+# Result from 'f_after_iter' will be available as an attribute
+attributes(model)$ssq
+}
+\seealso{
+Built-in callbacks:\itemize{
+\item \link{xgb.cb.print.evaluation}
+\item \link{xgb.cb.evaluation.log}
+\item \link{xgb.cb.reset.parameters}
+\item \link{xgb.cb.early.stop}
+\item \link{xgb.cb.save.model}
+\item \link{xgb.cb.cv.predict}
+\item \link{xgb.cb.gblinear.history}
+}
+}
diff --git a/R-package/man/cb.cv.predict.Rd b/R-package/man/xgb.cb.cv.predict.Rd
similarity index 53%
rename from R-package/man/cb.cv.predict.Rd
rename to R-package/man/xgb.cb.cv.predict.Rd
index 4cabac1c9569..d2d9a084be13 100644
--- a/R-package/man/cb.cv.predict.Rd
+++ b/R-package/man/xgb.cb.cv.predict.Rd
@@ -1,16 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callbacks.R
-\name{cb.cv.predict}
-\alias{cb.cv.predict}
-\title{Callback closure for returning cross-validation based predictions.}
+\name{xgb.cb.cv.predict}
+\alias{xgb.cb.cv.predict}
+\title{Callback for returning cross-validation based predictions.}
\usage{
-cb.cv.predict(save_models = FALSE)
+xgb.cb.cv.predict(save_models = FALSE, outputmargin = FALSE)
}
\arguments{
-\item{save_models}{a flag for whether to save the folds' models.}
+\item{save_models}{A flag for whether to save the folds' models.}
+
+\item{outputmargin}{Whether to save margin predictions (same effect as passing this
+parameter to \link{predict.xgb.Booster}).}
}
\value{
-Predictions are returned inside of the \code{pred} element, which is either a vector or a matrix,
+An \code{xgb.Callback} object, which can be passed to \link{xgb.cv},
+but \bold{not} to \link{xgb.train}.
+}
+\description{
+This callback function saves predictions for all of the test folds,
+and also allows to save the folds' models.
+}
+\details{
+Predictions are saved inside of the \code{pred} element, which is either a vector or a matrix,
depending on the number of prediction outputs per data row. The order of predictions corresponds
to the order of rows in the original dataset. Note that when a custom \code{folds} list is
provided in \code{xgb.cv}, the predictions would only be returned properly when this list is a
@@ -19,23 +30,3 @@ meaningful when user-provided folds have overlapping indices as in, e.g., random
When some of the indices in the training dataset are not included into user-provided \code{folds},
their prediction value would be \code{NA}.
}
-\description{
-Callback closure for returning cross-validation based predictions.
-}
-\details{
-This callback function saves predictions for all of the test folds,
-and also allows to save the folds' models.
-
-It is a "finalizer" callback and it uses early stopping information whenever it is available,
-thus it must be run after the early stopping callback if the early stopping is used.
-
-Callback function expects the following values to be set in its calling frame:
-\code{bst_folds},
-\code{basket},
-\code{data},
-\code{end_iteration},
-\code{params},
-}
-\seealso{
-\code{\link{callbacks}}
-}
diff --git a/R-package/man/xgb.cb.early.stop.Rd b/R-package/man/xgb.cb.early.stop.Rd
new file mode 100644
index 000000000000..2a70f4943d92
--- /dev/null
+++ b/R-package/man/xgb.cb.early.stop.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.early.stop}
+\alias{xgb.cb.early.stop}
+\title{Callback to activate early stopping}
+\usage{
+xgb.cb.early.stop(
+ stopping_rounds,
+ maximize = FALSE,
+ metric_name = NULL,
+ verbose = TRUE,
+ keep_all_iter = TRUE
+)
+}
+\arguments{
+\item{stopping_rounds}{The number of rounds with no improvement in
+the evaluation metric in order to stop the training.}
+
+\item{maximize}{Whether to maximize the evaluation metric.}
+
+\item{metric_name}{The name of an evaluation column to use as a criteria for early
+stopping. If not set, the last column would be used.
+Let's say the test data in \code{evals} was labelled as \code{dtest},
+and one wants to use the AUC in test data for early stopping regardless of where
+it is in the \code{evals}, then one of the following would need to be set:
+\code{metric_name='dtest-auc'} or \code{metric_name='dtest_auc'}.
+All dash '-' characters in metric names are considered equivalent to '_'.}
+
+\item{verbose}{Whether to print the early stopping information.}
+
+\item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced
+in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds
+up to the detected best iteration, discarding the ones that come after.}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+This callback function determines the condition for early stopping.
+
+The following attributes are assigned to the booster's object:
+\itemize{
+\item \code{best_score} the evaluation score at the best iteration
+\item \code{best_iteration} at which boosting iteration the best score has occurred
+(0-based index for interoperability of binary models)
+}
+
+The same values are also stored as R attributes as a result of the callback, plus an additional
+attribute \code{stopped_by_max_rounds} which indicates whether an early stopping by the \code{stopping_rounds}
+condition occurred. Note that the \code{best_iteration} that is stored under R attributes will follow
+base-1 indexing, so it will be larger by '1' than the C-level 'best_iteration' that is accessed
+through \link{xgb.attr} or \link{xgb.attributes}.
+
+At least one dataset is required in \code{evals} for early stopping to work.
+}
diff --git a/R-package/man/xgb.cb.evaluation.log.Rd b/R-package/man/xgb.cb.evaluation.log.Rd
new file mode 100644
index 000000000000..4cc6ef636c66
--- /dev/null
+++ b/R-package/man/xgb.cb.evaluation.log.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.evaluation.log}
+\alias{xgb.cb.evaluation.log}
+\title{Callback for logging the evaluation history}
+\usage{
+xgb.cb.evaluation.log()
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+Callback for logging the evaluation history
+}
+\details{
+This callback creates a table with per-iteration evaluation metrics (see parameters
+\code{evals} and \code{feval} in \link{xgb.train}).
+
+Note: in the column names of the final data.table, the dash '-' character is replaced with
+the underscore '_' in order to make the column names more like regular R identifiers.
+}
+\seealso{
+\link{xgb.cb.print.evaluation}
+}
diff --git a/R-package/man/cb.gblinear.history.Rd b/R-package/man/xgb.cb.gblinear.history.Rd
similarity index 63%
rename from R-package/man/cb.gblinear.history.Rd
rename to R-package/man/xgb.cb.gblinear.history.Rd
index 2a03c14db2f6..0ebaa4685030 100644
--- a/R-package/man/cb.gblinear.history.Rd
+++ b/R-package/man/xgb.cb.gblinear.history.Rd
@@ -1,37 +1,48 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callbacks.R
-\name{cb.gblinear.history}
-\alias{cb.gblinear.history}
-\title{Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.}
+\name{xgb.cb.gblinear.history}
+\alias{xgb.cb.gblinear.history}
+\title{Callback for collecting coefficients history of a gblinear booster}
\usage{
-cb.gblinear.history(sparse = FALSE)
+xgb.cb.gblinear.history(sparse = FALSE)
}
\arguments{
-\item{sparse}{when set to FALSE/TRUE, a dense/sparse matrix is used to store the result.
+\item{sparse}{when set to \code{FALSE}/\code{TRUE}, a dense/sparse matrix is used to store the result.
Sparse format is useful when one expects only a subset of coefficients to be non-zero,
when using the "thrifty" feature selector with fairly small number of top features
selected per iteration.}
}
\value{
-Results are stored in the \code{coefs} element of the closure.
-The \code{\link{xgb.gblinear.history}} convenience function provides an easy
-way to access it.
-With \code{xgb.train}, it is either a dense of a sparse matrix.
-While with \code{xgb.cv}, it is a list (an element per each fold) of such
-matrices.
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
}
\description{
-Callback closure for collecting the model coefficients history of a gblinear booster
-during its training.
+Callback for collecting coefficients history of a gblinear booster
}
\details{
To keep things fast and simple, gblinear booster does not internally store the history of linear
model coefficients at each boosting iteration. This callback provides a workaround for storing
the coefficients' path, by extracting them after each training iteration.
-Callback function expects the following values to be set in its calling frame:
-\code{bst} (or \code{bst_folds}).
+This callback will construct a matrix where rows are boosting iterations and columns are
+feature coefficients (same order as when calling \link{coef.xgb.Booster}, with the intercept
+corresponding to the first column).
+
+When there is more than one coefficient per feature (e.g. multi-class classification),
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
+
+If the result has only one coefficient per feature in the data, then the resulting matrix
+will have column names matching with the feature names, otherwise (when there's more than
+one coefficient per feature) the names will be composed as 'column name' + ':' + 'class index'
+(so e.g. column 'c1' for class '0' will be named 'c1:0').
+
+With \code{xgb.train}, the output is either a dense or a sparse matrix.
+With with \code{xgb.cv}, it is a list (one element per each fold) of such
+matrices.
+
+Function \link{xgb.gblinear.history} function provides an easy way to retrieve the
+outputs from this callback.
}
\examples{
#### Binary classification:
@@ -52,7 +63,7 @@ param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "a
# rate does not break the convergence, but allows us to illustrate the typical pattern of
# "stochastic explosion" behaviour of this lock-free algorithm at early boosting iterations.
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 1.,
- callbacks = list(cb.gblinear.history()))
+ callbacks = list(xgb.cb.gblinear.history()))
# Extract the coefficients' path and plot them vs boosting iteration number:
coef_path <- xgb.gblinear.history(bst)
matplot(coef_path, type = 'l')
@@ -61,7 +72,7 @@ matplot(coef_path, type = 'l')
# Will try the classical componentwise boosting which selects a single best feature per round:
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 200, eta = 0.8,
updater = 'coord_descent', feature_selector = 'thrifty', top_k = 1,
- callbacks = list(cb.gblinear.history()))
+ callbacks = list(xgb.cb.gblinear.history()))
matplot(xgb.gblinear.history(bst), type = 'l')
# Componentwise boosting is known to have similar effect to Lasso regularization.
# Try experimenting with various values of top_k, eta, nrounds,
@@ -69,7 +80,7 @@ matplot(xgb.gblinear.history(bst), type = 'l')
# For xgb.cv:
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8,
- callbacks = list(cb.gblinear.history()))
+ callbacks = list(xgb.cb.gblinear.history()))
# coefficients in the CV fold #3
matplot(xgb.gblinear.history(bst)[[3]], type = 'l')
@@ -82,7 +93,7 @@ param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3,
# For the default linear updater 'shotgun' it sometimes is helpful
# to use smaller eta to reduce instability
bst <- xgb.train(param, dtrain, list(tr=dtrain), nrounds = 50, eta = 0.5,
- callbacks = list(cb.gblinear.history()))
+ callbacks = list(xgb.cb.gblinear.history()))
# Will plot the coefficient paths separately for each class:
matplot(xgb.gblinear.history(bst, class_index = 0), type = 'l')
matplot(xgb.gblinear.history(bst, class_index = 1), type = 'l')
@@ -90,11 +101,11 @@ matplot(xgb.gblinear.history(bst, class_index = 2), type = 'l')
# CV:
bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 70, eta = 0.5,
- callbacks = list(cb.gblinear.history(FALSE)))
+ callbacks = list(xgb.cb.gblinear.history(FALSE)))
# 1st fold of 1st class
matplot(xgb.gblinear.history(bst, class_index = 0)[[1]], type = 'l')
}
\seealso{
-\code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}.
+\link{xgb.gblinear.history}, \link{coef.xgb.Booster}.
}
diff --git a/R-package/man/xgb.cb.print.evaluation.Rd b/R-package/man/xgb.cb.print.evaluation.Rd
new file mode 100644
index 000000000000..c4f2e6991278
--- /dev/null
+++ b/R-package/man/xgb.cb.print.evaluation.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.print.evaluation}
+\alias{xgb.cb.print.evaluation}
+\title{Callback for printing the result of evaluation}
+\usage{
+xgb.cb.print.evaluation(period = 1, showsd = TRUE)
+}
+\arguments{
+\item{period}{results would be printed every number of periods}
+
+\item{showsd}{whether standard deviations should be printed (when available)}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
+\description{
+The callback function prints the result of evaluation at every \code{period} iterations.
+The initial and the last iteration's evaluations are always printed.
+
+Does not leave any attribute in the booster (see \link{xgb.cb.evaluation.log} for that).
+}
+\seealso{
+\link{xgb.Callback}
+}
diff --git a/R-package/man/cb.reset.parameters.Rd b/R-package/man/xgb.cb.reset.parameters.Rd
similarity index 57%
rename from R-package/man/cb.reset.parameters.Rd
rename to R-package/man/xgb.cb.reset.parameters.Rd
index ee0a5d1bde93..c7e8638178ac 100644
--- a/R-package/man/cb.reset.parameters.Rd
+++ b/R-package/man/xgb.cb.reset.parameters.Rd
@@ -1,10 +1,10 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/callbacks.R
-\name{cb.reset.parameters}
-\alias{cb.reset.parameters}
-\title{Callback closure for resetting the booster's parameters at each iteration.}
+\name{xgb.cb.reset.parameters}
+\alias{xgb.cb.reset.parameters}
+\title{Callback for resetting the booster's parameters at each iteration.}
\usage{
-cb.reset.parameters(new_params)
+xgb.cb.reset.parameters(new_params)
}
\arguments{
\item{new_params}{a list where each element corresponds to a parameter that needs to be reset.
@@ -14,23 +14,16 @@ or a function of two parameters \code{learning_rates(iteration, nrounds)}
which returns a new parameter value by using the current iteration number
and the total number of boosting rounds.}
}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train} or \link{xgb.cv}.
+}
\description{
-Callback closure for resetting the booster's parameters at each iteration.
+Callback for resetting the booster's parameters at each iteration.
}
\details{
-This is a "pre-iteration" callback function used to reset booster's parameters
-at the beginning of each iteration.
-
Note that when training is resumed from some previous model, and a function is used to
reset a parameter value, the \code{nrounds} argument in this function would be the
the number of boosting rounds in the current training.
-Callback function expects the following values to be set in its calling frame:
-\code{bst} or \code{bst_folds},
-\code{iteration},
-\code{begin_iteration},
-\code{end_iteration}.
-}
-\seealso{
-\code{\link{callbacks}}
+Does not leave any attribute in the booster.
}
diff --git a/R-package/man/xgb.cb.save.model.Rd b/R-package/man/xgb.cb.save.model.Rd
new file mode 100644
index 000000000000..8ddba2f1a587
--- /dev/null
+++ b/R-package/man/xgb.cb.save.model.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/callbacks.R
+\name{xgb.cb.save.model}
+\alias{xgb.cb.save.model}
+\title{Callback for saving a model file.}
+\usage{
+xgb.cb.save.model(save_period = 0, save_name = "xgboost.ubj")
+}
+\arguments{
+\item{save_period}{Save the model to disk after every
+\code{save_period} iterations; 0 means save the model at the end.}
+
+\item{save_name}{The name or path for the saved model file.
+It can contain a \code{\link[base]{sprintf}} formatting specifier
+to include the integer iteration number in the file name.
+E.g., with \code{save_name} = 'xgboost_\%04d.model',
+the file saved at iteration 50 would be named "xgboost_0050.model".}
+}
+\value{
+An \code{xgb.Callback} object, which can be passed to \link{xgb.train},
+but \bold{not} to \link{xgb.cv}.
+}
+\description{
+This callback function allows to save an xgb-model file, either periodically
+after each \code{save_period}'s or at the end.
+
+Does not leave any attribute in the booster.
+}
diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd
index 68b5619970f9..995c27459a5e 100644
--- a/R-package/man/xgb.create.features.Rd
+++ b/R-package/man/xgb.create.features.Rd
@@ -82,7 +82,6 @@ new.dtrain <- xgb.DMatrix(
new.dtest <- xgb.DMatrix(
data = new.features.test, label = agaricus.test$label, nthread = 2
)
-watchlist <- list(train = new.dtrain)
bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2)
# Model accuracy with new features
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 9f6103a52762..cede67570683 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -9,14 +9,12 @@ xgb.cv(
data,
nrounds,
nfold,
- label = NULL,
- missing = NA,
prediction = FALSE,
showsd = TRUE,
metrics = list(),
obj = NULL,
feval = NULL,
- stratified = TRUE,
+ stratified = "auto",
folds = NULL,
train_folds = NULL,
verbose = TRUE,
@@ -44,22 +42,25 @@ is a shorter summary:
}
See \code{\link{xgb.train}} for further details.
-See also demo/ for walkthrough example in R.}
+See also demo/ for walkthrough example in R.
-\item{data}{takes an \code{xgb.DMatrix}, \code{matrix}, or \code{dgCMatrix} as the input.}
+Note that, while \code{params} accepts a \code{seed} entry and will use such parameter for model training if
+supplied, this seed is not used for creation of train-test splits, which instead rely on R's own RNG
+system - thus, for reproducible results, one needs to call the \code{set.seed} function beforehand.}
-\item{nrounds}{the max number of iterations}
+\item{data}{An \code{xgb.DMatrix} object, with corresponding fields like \code{label} or bounds as required
+for model training by the objective.
-\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
+\if{html}{\out{}}\preformatted{ Note that only the basic `xgb.DMatrix` class is supported - variants such as `xgb.QuantileDMatrix`
+ or `xgb.ExternalDMatrix` are not supported here.
+}\if{html}{\out{
}}}
-\item{label}{vector of response values. Should be provided only when data is an R-matrix.}
+\item{nrounds}{the max number of iterations}
-\item{missing}{is only used when input is a dense matrix. By default is set to NA, which means
-that NA values should be considered as 'missing' by the algorithm.
-Sometimes, 0 or other extreme value might be used to represent missing values.}
+\item{nfold}{the original dataset is randomly partitioned into \code{nfold} equal size subsamples.}
\item{prediction}{A logical value indicating whether to return the test fold predictions
-from each CV model. This parameter engages the \code{\link{cb.cv.predict}} callback.}
+from each CV model. This parameter engages the \code{\link{xgb.cb.cv.predict}} callback.}
\item{showsd}{\code{boolean}, whether to show standard deviation of cross validation}
@@ -84,34 +85,54 @@ gradient with given prediction and dtrain.}
\code{list(metric='metric-name', value='metric-value')} with given
prediction and dtrain.}
-\item{stratified}{a \code{boolean} indicating whether sampling of folds should be stratified
-by the values of outcome labels.}
+\item{stratified}{A \code{boolean} indicating whether sampling of folds should be stratified
+by the values of outcome labels. For real-valued labels in regression objectives,
+stratification will be done by discretizing the labels into up to 5 buckets beforehand.
+
+\if{html}{\out{}}\preformatted{ If passing "auto", will be set to `TRUE` if the objective in `params` is a classification
+ objective (from XGBoost's built-in objectives, doesn't apply to custom ones), and to
+ `FALSE` otherwise.
+
+ This parameter is ignored when `data` has a `group` field - in such case, the splitting
+ will be based on whole groups (note that this might make the folds have different sizes).
+
+ Value `TRUE` here is \\bold\{not\} supported for custom objectives.
+}\if{html}{\out{
}}}
\item{folds}{\code{list} provides a possibility to use a list of pre-defined CV folds
(each element must be a vector of test fold's indices). When folds are supplied,
-the \code{nfold} and \code{stratified} parameters are ignored.}
+the \code{nfold} and \code{stratified} parameters are ignored.
+
+\if{html}{\out{}}\preformatted{ If `data` has a `group` field and the objective requires this field, each fold (list element)
+ must additionally have two attributes (retrievable through \link{attributes}) named `group_test`
+ and `group_train`, which should hold the `group` to assign through \link{setinfo.xgb.DMatrix} to
+ the resulting DMatrices.
+}\if{html}{\out{
}}}
\item{train_folds}{\code{list} list specifying which indicies to use for training. If \code{NULL}
-(the default) all indices not specified in \code{folds} will be used for training.}
+(the default) all indices not specified in \code{folds} will be used for training.
+
+\if{html}{\out{}}\preformatted{ This is not supported when `data` has `group` field.
+}\if{html}{\out{
}}}
\item{verbose}{\code{boolean}, print the statistics during the process}
\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}
\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
If set to an integer \code{k}, training with a validation set will stop if the performance
doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}
\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
then this parameter must be set as well.
When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}
\item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
parameters' values. User can provide either existing or their own callback methods in order
to customize the training process.}
@@ -122,27 +143,27 @@ An object of class \code{xgb.cv.synchronous} with the following elements:
\itemize{
\item \code{call} a function call.
\item \code{params} parameters that were passed to the xgboost library. Note that it does not
-capture parameters changed by the \code{\link{cb.reset.parameters}} callback.
-\item \code{callbacks} callback functions that were either automatically assigned or
-explicitly passed.
+capture parameters changed by the \code{\link{xgb.cb.reset.parameters}} callback.
\item \code{evaluation_log} evaluation history stored as a \code{data.table} with the
first column corresponding to iteration number and the rest corresponding to the
CV-based evaluation means and standard deviations for the training and test CV-sets.
-It is created by the \code{\link{cb.evaluation.log}} callback.
+It is created by the \code{\link{xgb.cb.evaluation.log}} callback.
\item \code{niter} number of boosting iterations.
\item \code{nfeatures} number of features in training data.
\item \code{folds} the list of CV folds' indices - either those passed through the \code{folds}
parameter or randomly generated.
\item \code{best_iteration} iteration number with the best evaluation metric value
(only available with early stopping).
-\item \code{pred} CV prediction values available when \code{prediction} is set.
-It is either vector or matrix (see \code{\link{cb.cv.predict}}).
-\item \code{models} a list of the CV folds' models. It is only available with the explicit
-setting of the \code{cb.cv.predict(save_models = TRUE)} callback.
}
+
+Plus other potential elements that are the result of callbacks, such as a list \code{cv_predict} with
+a sub-element \code{pred} when passing \code{prediction = TRUE}, which is added by the \link{xgb.cb.cv.predict}
+callback (note that one can also pass it manually under \code{callbacks} with different settings,
+such as saving also the models created during cross validation); or a list \code{early_stop} which
+will contain elements such as \code{best_iteration} when using the early stopping callback (\link{xgb.cb.early.stop}).
}
\description{
-The cross validation function of xgboost
+The cross validation function of xgboost.
}
\details{
The original sample is randomly partitioned into \code{nfold} equal size subsamples.
diff --git a/R-package/man/xgb.gblinear.history.Rd b/R-package/man/xgb.gblinear.history.Rd
index 103be16f11a9..25aef7163e40 100644
--- a/R-package/man/xgb.gblinear.history.Rd
+++ b/R-package/man/xgb.gblinear.history.Rd
@@ -8,7 +8,7 @@ xgb.gblinear.history(model, class_index = NULL)
}
\arguments{
\item{model}{either an \code{xgb.Booster} or a result of \code{xgb.cv()}, trained
-using the \code{cb.gblinear.history()} callback, but \bold{not} a booster
+using the \link{xgb.cb.gblinear.history} callback, but \bold{not} a booster
loaded from \link{xgb.load} or \link{xgb.load.raw}.}
\item{class_index}{zero-based class index to extract the coefficients for only that
@@ -16,23 +16,31 @@ specific class in a multinomial multiclass model. When it is NULL, all the
coefficients are returned. Has no effect in non-multiclass models.}
}
\value{
-For an \code{xgb.train} result, a matrix (either dense or sparse) with the columns
-corresponding to iteration's coefficients (in the order as \code{xgb.dump()} would
-return) and the rows corresponding to boosting iterations.
+For an \link{xgb.train} result, a matrix (either dense or sparse) with the columns
+corresponding to iteration's coefficients and the rows corresponding to boosting iterations.
-For an \code{xgb.cv} result, a list of such matrices is returned with the elements
+For an \link{xgb.cv} result, a list of such matrices is returned with the elements
corresponding to CV folds.
+
+When there is more than one coefficient per feature (e.g. multi-class classification)
+and \code{class_index} is not provided,
+the result will be reshaped into a vector where coefficients are arranged first by features and
+then by class (e.g. first 1 through N coefficients will be for the first class, then
+coefficients N+1 through 2N for the second class, and so on).
}
\description{
A helper function to extract the matrix of linear coefficients' history
-from a gblinear model created while using the \code{cb.gblinear.history()}
-callback.
+from a gblinear model created while using the \link{xgb.cb.gblinear.history}
+callback (which must be added manually as by default it's not used).
}
\details{
Note that this is an R-specific function that relies on R attributes that
are not saved when using xgboost's own serialization functions like \link{xgb.load}
or \link{xgb.load.raw}.
-In order for a serialized model to be accepted by tgis function, one must use R
+In order for a serialized model to be accepted by this function, one must use R
serializers such as \link{saveRDS}.
}
+\seealso{
+\link{xgb.cb.gblinear.history}, \link{coef.xgb.Booster}.
+}
diff --git a/R-package/man/xgb.load.Rd b/R-package/man/xgb.load.Rd
index 1fbe0055ed9d..e18a900e3f13 100644
--- a/R-package/man/xgb.load.Rd
+++ b/R-package/man/xgb.load.Rd
@@ -17,7 +17,7 @@ Load xgboost model from the binary model file.
}
\details{
The input file is expected to contain a model saved in an xgboost model format
-using either \code{\link{xgb.save}} or \code{\link{cb.save.model}} in R, or using some
+using either \code{\link{xgb.save}} or \code{\link{xgb.cb.save.model}} in R, or using some
appropriate methods from other xgboost interfaces. E.g., a model trained in Python and
saved from there in xgboost format, could be loaded from R.
diff --git a/R-package/man/xgb.slice.DMatrix.Rd b/R-package/man/xgb.slice.DMatrix.Rd
index c9695996b66f..c4f7765943bb 100644
--- a/R-package/man/xgb.slice.DMatrix.Rd
+++ b/R-package/man/xgb.slice.DMatrix.Rd
@@ -6,14 +6,18 @@
\title{Get a new DMatrix containing the specified rows of
original xgb.DMatrix object}
\usage{
-xgb.slice.DMatrix(object, idxset)
+xgb.slice.DMatrix(object, idxset, allow_groups = FALSE)
\method{[}{xgb.DMatrix}(object, idxset, colset = NULL)
}
\arguments{
-\item{object}{Object of class "xgb.DMatrix"}
+\item{object}{Object of class "xgb.DMatrix".}
-\item{idxset}{a integer vector of indices of rows needed}
+\item{idxset}{An integer vector of indices of rows needed (base-1 indexing).}
+
+\item{allow_groups}{Whether to allow slicing an \code{xgb.DMatrix} with \code{group} (or
+equivalently \code{qid}) field. Note that in such case, the result will not have
+the groups anymore - they need to be set manually through \code{setinfo}.}
\item{colset}{currently not used (columns subsetting is not available)}
}
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 21c5fe7eebe4..21c8dbe16413 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -9,7 +9,7 @@ xgb.train(
params = list(),
data,
nrounds,
- watchlist = list(),
+ evals = list(),
obj = NULL,
feval = NULL,
verbose = 1,
@@ -158,13 +158,13 @@ List is provided in detail section.}
\item{nrounds}{max number of boosting iterations.}
-\item{watchlist}{named list of xgb.DMatrix datasets to use for evaluating model performance.
+\item{evals}{Named list of \code{xgb.DMatrix} datasets to use for evaluating model performance.
Metrics specified in either \code{eval_metric} or \code{feval} will be computed for each
of these datasets during each boosting iteration, and stored in the end as a field named
\code{evaluation_log} in the resulting object. When either \code{verbose>=1} or
-\code{\link{cb.print.evaluation}} callback is engaged, the performance results are continuously
+\code{\link{xgb.cb.print.evaluation}} callback is engaged, the performance results are continuously
printed out during the training.
-E.g., specifying \code{watchlist=list(validation1=mat1, validation2=mat2)} allows to track
+E.g., specifying \code{evals=list(validation1=mat1, validation2=mat2)} allows to track
the performance of each round's model on mat1 and mat2.}
\item{obj}{customized objective function. Returns gradient and second order
@@ -177,24 +177,24 @@ prediction and dtrain.}
\item{verbose}{If 0, xgboost will stay silent. If 1, it will print information about performance.
If 2, some additional information will be printed out.
Note that setting \code{verbose > 0} automatically engages the
-\code{cb.print.evaluation(period=1)} callback function.}
+\code{xgb.cb.print.evaluation(period=1)} callback function.}
\item{print_every_n}{Print each n-th iteration evaluation messages when \code{verbose>0}.
Default is 1 which means all messages are printed. This parameter is passed to the
-\code{\link{cb.print.evaluation}} callback.}
+\code{\link{xgb.cb.print.evaluation}} callback.}
\item{early_stopping_rounds}{If \code{NULL}, the early stopping function is not triggered.
If set to an integer \code{k}, training with a validation set will stop if the performance
doesn't improve for \code{k} rounds.
-Setting this parameter engages the \code{\link{cb.early.stop}} callback.}
+Setting this parameter engages the \code{\link{xgb.cb.early.stop}} callback.}
\item{maximize}{If \code{feval} and \code{early_stopping_rounds} are set,
then this parameter must be set as well.
When it is \code{TRUE}, it means the larger the evaluation score the better.
-This parameter is passed to the \code{\link{cb.early.stop}} callback.}
+This parameter is passed to the \code{\link{xgb.cb.early.stop}} callback.}
\item{save_period}{when it is non-NULL, model is saved to disk after every \code{save_period} rounds,
-0 means save at the end. The saving is handled by the \code{\link{cb.save.model}} callback.}
+0 means save at the end. The saving is handled by the \code{\link{xgb.cb.save.model}} callback.}
\item{save_name}{the name or path for periodically saved model file.}
@@ -203,12 +203,13 @@ Could be either an object of class \code{xgb.Booster}, or its raw data, or the n
file with a previously saved model.}
\item{callbacks}{a list of callback functions to perform various task during boosting.
-See \code{\link{callbacks}}. Some of the callbacks are automatically created depending on the
+See \code{\link{xgb.Callback}}. Some of the callbacks are automatically created depending on the
parameters' values. User can provide either existing or their own callback methods in order
to customize the training process.
-\if{html}{\out{}}\preformatted{ Note that some callbacks might try to set an evaluation log - be aware that these evaluation logs
- are kept as R attributes, and thus do not get saved when using non-R serializaters like
+\if{html}{\out{
}}\preformatted{ Note that some callbacks might try to leave attributes in the resulting model object,
+ such as an evaluation log (a `data.table` object) - be aware that these objects are kept
+ as R attributes, and thus do not get saved when using XGBoost's own serializaters like
\link{xgb.save} (but are kept when using R serializers like \link{saveRDS}).
}\if{html}{\out{
}}}
@@ -233,7 +234,7 @@ The \code{xgboost} function is a simpler wrapper for \code{xgb.train}.
\details{
These are the training functions for \code{xgboost}.
-The \code{xgb.train} interface supports advanced features such as \code{watchlist},
+The \code{xgb.train} interface supports advanced features such as \code{evals},
customized objective and evaluation metric functions, therefore it is more flexible
than the \code{xgboost} interface.
@@ -269,18 +270,19 @@ Different threshold (e.g., 0.) could be specified as "error@0."
The following callbacks are automatically created when certain parameters are set:
\itemize{
-\item \code{cb.print.evaluation} is turned on when \code{verbose > 0};
+\item \code{xgb.cb.print.evaluation} is turned on when \code{verbose > 0};
and the \code{print_every_n} parameter is passed to it.
-\item \code{cb.evaluation.log} is on when \code{watchlist} is present.
-\item \code{cb.early.stop}: when \code{early_stopping_rounds} is set.
-\item \code{cb.save.model}: when \code{save_period > 0} is set.
+\item \code{xgb.cb.evaluation.log} is on when \code{evals} is present.
+\item \code{xgb.cb.early.stop}: when \code{early_stopping_rounds} is set.
+\item \code{xgb.cb.save.model}: when \code{save_period > 0} is set.
}
Note that objects of type \code{xgb.Booster} as returned by this function behave a bit differently
from typical R objects (it's an 'altrep' list class), and it makes a separation between
internal booster attributes (restricted to jsonifyable data), accessed through \link{xgb.attr}
and shared between interfaces through serialization functions like \link{xgb.save}; and
-R-specific attributes, accessed through \link{attributes} and \link{attr}, which are otherwise
+R-specific attributes (typically the result from a callback), accessed through \link{attributes}
+and \link{attr}, which are otherwise
only used in the R interface, only kept when using R's serializers like \link{saveRDS}, and
not anyhow used by functions like \link{predict.xgb.Booster}.
@@ -305,12 +307,12 @@ dtrain <- with(
dtest <- with(
agaricus.test, xgb.DMatrix(data, label = label, nthread = nthread)
)
-watchlist <- list(train = dtrain, eval = dtest)
+evals <- list(train = dtrain, eval = dtest)
## A simple xgb.train example:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
## An xgb.train example where custom objective and evaluation metric are
## used:
@@ -331,15 +333,15 @@ evalerror <- function(preds, dtrain) {
# as 'objective' and 'eval_metric' parameters in the params list:
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = logregobj, eval_metric = evalerror)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
# or through the ... arguments:
param <- list(max_depth = 2, eta = 1, nthread = nthread)
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
objective = logregobj, eval_metric = evalerror)
# or as dedicated 'obj' and 'feval' parameters of xgb.train:
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals,
obj = logregobj, feval = evalerror)
@@ -347,11 +349,11 @@ bst <- xgb.train(param, dtrain, nrounds = 2, watchlist,
param <- list(max_depth = 2, eta = 1, nthread = nthread,
objective = "binary:logistic", eval_metric = "auc")
my_etas <- list(eta = c(0.5, 0.1))
-bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_etas)))
+bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_etas)))
## Early stopping:
-bst <- xgb.train(param, dtrain, nrounds = 25, watchlist,
+bst <- xgb.train(param, dtrain, nrounds = 25, evals = evals,
early_stopping_rounds = 3)
## An 'xgboost' interface example:
@@ -366,7 +368,7 @@ Tianqi Chen and Carlos Guestrin, "XGBoost: A Scalable Tree Boosting System",
22nd SIGKDD Conference on Knowledge Discovery and Data Mining, 2016, \url{https://arxiv.org/abs/1603.02754}
}
\seealso{
-\code{\link{callbacks}},
+\code{\link{xgb.Callback}},
\code{\link{predict.xgb.Booster}},
\code{\link{xgb.cv}}
}
diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in
index 0f4b3ac6f6a7..93cfb8e5b4c1 100644
--- a/R-package/src/Makevars.in
+++ b/R-package/src/Makevars.in
@@ -99,15 +99,14 @@ OBJECTS= \
$(PKGROOT)/src/context.o \
$(PKGROOT)/src/logging.o \
$(PKGROOT)/src/global_config.o \
+ $(PKGROOT)/src/collective/result.o \
$(PKGROOT)/src/collective/allgather.o \
$(PKGROOT)/src/collective/allreduce.o \
$(PKGROOT)/src/collective/broadcast.o \
$(PKGROOT)/src/collective/comm.o \
+ $(PKGROOT)/src/collective/comm_group.o \
$(PKGROOT)/src/collective/coll.o \
- $(PKGROOT)/src/collective/communicator-inl.o \
$(PKGROOT)/src/collective/tracker.o \
- $(PKGROOT)/src/collective/communicator.o \
- $(PKGROOT)/src/collective/in_memory_communicator.o \
$(PKGROOT)/src/collective/in_memory_handler.o \
$(PKGROOT)/src/collective/loop.o \
$(PKGROOT)/src/collective/socket.o \
@@ -132,7 +131,4 @@ OBJECTS= \
$(PKGROOT)/src/common/version.o \
$(PKGROOT)/src/c_api/c_api.o \
$(PKGROOT)/src/c_api/c_api_error.o \
- $(PKGROOT)/amalgamation/dmlc-minimum0.o \
- $(PKGROOT)/rabit/src/engine.o \
- $(PKGROOT)/rabit/src/rabit_c_api.o \
- $(PKGROOT)/rabit/src/allreduce_base.o
+ $(PKGROOT)/amalgamation/dmlc-minimum0.o
diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win
index 0c2084de940c..f160930e8a4a 100644
--- a/R-package/src/Makevars.win
+++ b/R-package/src/Makevars.win
@@ -99,15 +99,14 @@ OBJECTS= \
$(PKGROOT)/src/context.o \
$(PKGROOT)/src/logging.o \
$(PKGROOT)/src/global_config.o \
+ $(PKGROOT)/src/collective/result.o \
$(PKGROOT)/src/collective/allgather.o \
$(PKGROOT)/src/collective/allreduce.o \
$(PKGROOT)/src/collective/broadcast.o \
$(PKGROOT)/src/collective/comm.o \
+ $(PKGROOT)/src/collective/comm_group.o \
$(PKGROOT)/src/collective/coll.o \
- $(PKGROOT)/src/collective/communicator-inl.o \
$(PKGROOT)/src/collective/tracker.o \
- $(PKGROOT)/src/collective/communicator.o \
- $(PKGROOT)/src/collective/in_memory_communicator.o \
$(PKGROOT)/src/collective/in_memory_handler.o \
$(PKGROOT)/src/collective/loop.o \
$(PKGROOT)/src/collective/socket.o \
@@ -132,7 +131,4 @@ OBJECTS= \
$(PKGROOT)/src/common/version.o \
$(PKGROOT)/src/c_api/c_api.o \
$(PKGROOT)/src/c_api/c_api_error.o \
- $(PKGROOT)/amalgamation/dmlc-minimum0.o \
- $(PKGROOT)/rabit/src/engine.o \
- $(PKGROOT)/rabit/src/rabit_c_api.o \
- $(PKGROOT)/rabit/src/allreduce_base.o
+ $(PKGROOT)/amalgamation/dmlc-minimum0.o
diff --git a/R-package/src/init.c b/R-package/src/init.c
index f2635742ebd7..5db3218b4e1b 100644
--- a/R-package/src/init.c
+++ b/R-package/src/init.c
@@ -71,11 +71,12 @@ extern SEXP XGDMatrixGetDataAsCSR_R(SEXP);
extern SEXP XGDMatrixSaveBinary_R(SEXP, SEXP, SEXP);
extern SEXP XGDMatrixSetInfo_R(SEXP, SEXP, SEXP);
extern SEXP XGDMatrixSetStrFeatureInfo_R(SEXP, SEXP, SEXP);
-extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP);
+extern SEXP XGDMatrixSliceDMatrix_R(SEXP, SEXP, SEXP);
extern SEXP XGBSetGlobalConfig_R(SEXP);
extern SEXP XGBGetGlobalConfig_R(void);
extern SEXP XGBoosterFeatureScore_R(SEXP, SEXP);
extern SEXP XGBoosterSlice_R(SEXP, SEXP, SEXP, SEXP);
+extern SEXP XGBoosterSliceAndReplace_R(SEXP, SEXP, SEXP, SEXP);
static const R_CallMethodDef CallEntries[] = {
{"XGDuplicate_R", (DL_FUNC) &XGDuplicate_R, 1},
@@ -133,11 +134,12 @@ static const R_CallMethodDef CallEntries[] = {
{"XGDMatrixSaveBinary_R", (DL_FUNC) &XGDMatrixSaveBinary_R, 3},
{"XGDMatrixSetInfo_R", (DL_FUNC) &XGDMatrixSetInfo_R, 3},
{"XGDMatrixSetStrFeatureInfo_R", (DL_FUNC) &XGDMatrixSetStrFeatureInfo_R, 3},
- {"XGDMatrixSliceDMatrix_R", (DL_FUNC) &XGDMatrixSliceDMatrix_R, 2},
+ {"XGDMatrixSliceDMatrix_R", (DL_FUNC) &XGDMatrixSliceDMatrix_R, 3},
{"XGBSetGlobalConfig_R", (DL_FUNC) &XGBSetGlobalConfig_R, 1},
{"XGBGetGlobalConfig_R", (DL_FUNC) &XGBGetGlobalConfig_R, 0},
{"XGBoosterFeatureScore_R", (DL_FUNC) &XGBoosterFeatureScore_R, 2},
{"XGBoosterSlice_R", (DL_FUNC) &XGBoosterSlice_R, 4},
+ {"XGBoosterSliceAndReplace_R", (DL_FUNC) &XGBoosterSliceAndReplace_R, 4},
{NULL, NULL, 0}
};
diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc
index 5baf8d41282e..cdb9ba65c3ef 100644
--- a/R-package/src/xgboost_R.cc
+++ b/R-package/src/xgboost_R.cc
@@ -512,7 +512,7 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
return ret;
}
-XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
+XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups) {
SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
R_API_BEGIN();
R_xlen_t len = Rf_xlength(idxset);
@@ -531,7 +531,7 @@ XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset) {
res_code = XGDMatrixSliceDMatrixEx(R_ExternalPtrAddr(handle),
BeginPtr(idxvec), len,
&res,
- 0);
+ Rf_asLogical(allow_groups));
}
CHECK_CALL(res_code);
R_SetExternalPtrAddr(ret, res);
@@ -1674,3 +1674,18 @@ XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEX
Rf_unprotect(1);
return out;
}
+
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step) {
+ R_API_BEGIN();
+ BoosterHandle old_handle = R_ExternalPtrAddr(handle);
+ BoosterHandle new_handle = nullptr;
+ CHECK_CALL(XGBoosterSlice(old_handle,
+ Rf_asInteger(begin_layer),
+ Rf_asInteger(end_layer),
+ Rf_asInteger(step),
+ &new_handle));
+ R_SetExternalPtrAddr(handle, new_handle);
+ CHECK_CALL(XGBoosterFree(old_handle));
+ R_API_END();
+ return R_NilValue;
+}
diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h
index 70fd885e7f12..62be5022a3d2 100644
--- a/R-package/src/xgboost_R.h
+++ b/R-package/src/xgboost_R.h
@@ -112,9 +112,10 @@ XGB_DLL SEXP XGDMatrixCreateFromCSR_R(SEXP indptr, SEXP indices, SEXP data, SEXP
* \brief create a new dmatrix from sliced content of existing matrix
* \param handle instance of data matrix to be sliced
* \param idxset index set
+ * \param allow_groups Whether to allow slicing the DMatrix if it has a 'group' field
* \return a sliced new matrix
*/
-XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset);
+XGB_DLL SEXP XGDMatrixSliceDMatrix_R(SEXP handle, SEXP idxset, SEXP allow_groups);
/*!
* \brief load a data matrix into binary file
@@ -535,4 +536,14 @@ XGB_DLL SEXP XGBoosterFeatureScore_R(SEXP handle, SEXP json_config);
*/
XGB_DLL SEXP XGBoosterSlice_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
+/*!
+ * \brief Slice a fitted booster model (by rounds), and replace its handle with the result
+ * \param handle handle to the fitted booster
+ * \param begin_layer start of the slice
+ * \param end_later end of the slice; end_layer=0 is equivalent to end_layer=num_boost_round
+ * \param step step size of the slice
+ * \return NULL
+ */
+XGB_DLL SEXP XGBoosterSliceAndReplace_R(SEXP handle, SEXP begin_layer, SEXP end_layer, SEXP step);
+
#endif // XGBOOST_WRAPPER_R_H_ // NOLINT(*)
diff --git a/R-package/tests/testthat.R b/R-package/tests/testthat.R
index 7cf711292c48..bad6c1df3915 100644
--- a/R-package/tests/testthat.R
+++ b/R-package/tests/testthat.R
@@ -1,5 +1,6 @@
library(testthat)
library(xgboost)
+library(Matrix)
test_check("xgboost", reporter = ProgressReporter)
RhpcBLASctl::omp_set_num_threads(1)
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index 5438c8bb2235..bbb8fb323478 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -20,7 +20,7 @@ test_that("train and predict binary classification", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = nrounds,
objective = "binary:logistic", eval_metric = "error",
- watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+ evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error"
)
@@ -152,7 +152,7 @@ test_that("train and predict softprob", {
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softprob", num_class = 3, eval_metric = "merror",
- watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+ evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@@ -203,7 +203,7 @@ test_that("train and predict softmax", {
data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb),
max_depth = 3, eta = 0.5, nthread = n_threads, nrounds = 5,
objective = "multi:softmax", num_class = 3, eval_metric = "merror",
- watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+ evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
),
"train-merror"
)
@@ -226,7 +226,7 @@ test_that("train and predict RF", {
nthread = n_threads,
nrounds = 1, objective = "binary:logistic", eval_metric = "error",
num_parallel_tree = 20, subsample = 0.6, colsample_bytree = 0.1,
- watchlist = list(train = xgb.DMatrix(train$data, label = lb))
+ evals = list(train = xgb.DMatrix(train$data, label = lb))
)
expect_equal(xgb.get.num.boosted.rounds(bst), 1)
@@ -250,7 +250,7 @@ test_that("train and predict RF with softprob", {
objective = "multi:softprob", eval_metric = "merror",
num_class = 3, verbose = 0,
num_parallel_tree = 4, subsample = 0.5, colsample_bytree = 0.5,
- watchlist = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
+ evals = list(train = xgb.DMatrix(as.matrix(iris[, -5]), label = lb))
)
expect_equal(xgb.get.num.boosted.rounds(bst), 15)
# predict for all iterations:
@@ -271,7 +271,7 @@ test_that("use of multiple eval metrics works", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", eval_metric = "auc", eval_metric = "logloss",
- watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+ evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@@ -283,7 +283,7 @@ test_that("use of multiple eval metrics works", {
data = xgb.DMatrix(train$data, label = train$label), max_depth = 2,
eta = 1, nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = list("error", "auc", "logloss"),
- watchlist = list(train = xgb.DMatrix(train$data, label = train$label))
+ evals = list(train = xgb.DMatrix(train$data, label = train$label))
),
"train-error.*train-auc.*train-logloss"
)
@@ -295,19 +295,19 @@ test_that("use of multiple eval metrics works", {
test_that("training continuation works", {
dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads)
- watchlist <- list(train = dtrain)
+ evals <- list(train = dtrain)
param <- list(
objective = "binary:logistic", max_depth = 2, eta = 1, nthread = n_threads
)
# for the reference, use 4 iterations at once:
set.seed(11)
- bst <- xgb.train(param, dtrain, nrounds = 4, watchlist, verbose = 0)
+ bst <- xgb.train(param, dtrain, nrounds = 4, evals = evals, verbose = 0)
# first two iterations:
set.seed(11)
- bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0)
+ bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0)
# continue for two more:
- bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = bst1)
+ bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = bst1)
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@@ -315,7 +315,7 @@ test_that("training continuation works", {
expect_equal(dim(attributes(bst2)$evaluation_log), c(4, 2))
expect_equal(attributes(bst2)$evaluation_log, attributes(bst)$evaluation_log)
# test continuing from raw model data
- bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+ bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@@ -323,7 +323,7 @@ test_that("training continuation works", {
# test continuing from a model in file
fname <- file.path(tempdir(), "xgboost.json")
xgb.save(bst1, fname)
- bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, xgb_model = fname)
+ bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0, xgb_model = fname)
if (!windows_flag && !solaris_flag) {
expect_equal(xgb.save.raw(bst), xgb.save.raw(bst2))
}
@@ -334,7 +334,7 @@ test_that("xgb.cv works", {
set.seed(11)
expect_output(
cv <- xgb.cv(
- data = train$data, label = train$label, max_depth = 2, nfold = 5,
+ data = xgb.DMatrix(train$data, label = train$label), max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
eval_metric = "error", verbose = TRUE
),
@@ -348,7 +348,6 @@ test_that("xgb.cv works", {
expect_false(is.null(cv$folds) && is.list(cv$folds))
expect_length(cv$folds, 5)
expect_false(is.null(cv$params) && is.list(cv$params))
- expect_false(is.null(cv$callbacks))
expect_false(is.null(cv$call))
})
@@ -358,13 +357,13 @@ test_that("xgb.cv works with stratified folds", {
cv <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
- verbose = TRUE, stratified = FALSE
+ verbose = FALSE, stratified = FALSE
)
set.seed(314159)
cv2 <- xgb.cv(
data = dtrain, max_depth = 2, nfold = 5,
eta = 1., nthread = n_threads, nrounds = 2, objective = "binary:logistic",
- verbose = TRUE, stratified = TRUE
+ verbose = FALSE, stratified = TRUE
)
# Stratified folds should result in a different evaluation logs
expect_true(all(cv$evaluation_log[, test_logloss_mean] != cv2$evaluation_log[, test_logloss_mean]))
@@ -418,7 +417,7 @@ test_that("max_delta_step works", {
dtrain <- xgb.DMatrix(
agaricus.train$data, label = agaricus.train$label, nthread = n_threads
)
- watchlist <- list(train = dtrain)
+ evals <- list(train = dtrain)
param <- list(
objective = "binary:logistic", eval_metric = "logloss", max_depth = 2,
nthread = n_threads,
@@ -426,9 +425,9 @@ test_that("max_delta_step works", {
)
nrounds <- 5
# model with no restriction on max_delta_step
- bst1 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1)
+ bst1 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1)
# model with restricted max_delta_step
- bst2 <- xgb.train(param, dtrain, nrounds, watchlist, verbose = 1, max_delta_step = 1)
+ bst2 <- xgb.train(param, dtrain, nrounds, evals = evals, verbose = 1, max_delta_step = 1)
# the no-restriction model is expected to have consistently lower loss during the initial iterations
expect_true(all(attributes(bst1)$evaluation_log$train_logloss < attributes(bst2)$evaluation_log$train_logloss))
expect_lt(mean(attributes(bst1)$evaluation_log$train_logloss) / mean(attributes(bst2)$evaluation_log$train_logloss), 0.8)
@@ -445,7 +444,7 @@ test_that("colsample_bytree works", {
colnames(test_x) <- paste0("Feature_", sprintf("%03d", 1:100))
dtrain <- xgb.DMatrix(train_x, label = train_y, nthread = n_threads)
dtest <- xgb.DMatrix(test_x, label = test_y, nthread = n_threads)
- watchlist <- list(train = dtrain, eval = dtest)
+ evals <- list(train = dtrain, eval = dtest)
## Use colsample_bytree = 0.01, so that roughly one out of 100 features is chosen for
## each tree
param <- list(
@@ -454,7 +453,7 @@ test_that("colsample_bytree works", {
eval_metric = "auc"
)
set.seed(2)
- bst <- xgb.train(param, dtrain, nrounds = 100, watchlist, verbose = 0)
+ bst <- xgb.train(param, dtrain, nrounds = 100, evals = evals, verbose = 0)
xgb.importance(model = bst)
# If colsample_bytree works properly, a variety of features should be used
# in the 100 trees
@@ -886,3 +885,57 @@ test_that("Seed in params override PRNG from R", {
)
)
})
+
+test_that("xgb.cv works for AFT", {
+ X <- matrix(c(1, -1, -1, 1, 0, 1, 1, 0), nrow = 4, byrow = TRUE) # 4x2 matrix
+ dtrain <- xgb.DMatrix(X, nthread = n_threads)
+
+ params <- list(objective = 'survival:aft', learning_rate = 0.2, max_depth = 2L)
+
+ # data must have bounds
+ expect_error(
+ xgb.cv(
+ params = params,
+ data = dtrain,
+ nround = 5L,
+ nfold = 4L,
+ nthread = n_threads
+ )
+ )
+
+ setinfo(dtrain, 'label_lower_bound', c(2, 3, 0, 4))
+ setinfo(dtrain, 'label_upper_bound', c(2, Inf, 4, 5))
+
+ # automatic stratified splitting is turned off
+ expect_warning(
+ xgb.cv(
+ params = params, data = dtrain, nround = 5L, nfold = 4L,
+ nthread = n_threads, stratified = TRUE, verbose = FALSE
+ )
+ )
+
+ # this works without any issue
+ expect_no_warning(
+ xgb.cv(params = params, data = dtrain, nround = 5L, nfold = 4L, verbose = FALSE)
+ )
+})
+
+test_that("xgb.cv works for ranking", {
+ data(iris)
+ x <- iris[, -(4:5)]
+ y <- as.integer(iris$Petal.Width)
+ group <- rep(50, 3)
+ dm <- xgb.DMatrix(x, label = y, group = group)
+ res <- xgb.cv(
+ data = dm,
+ params = list(
+ objective = "rank:pairwise",
+ max_depth = 3
+ ),
+ nrounds = 3,
+ nfold = 2,
+ verbose = FALSE,
+ stratified = FALSE
+ )
+ expect_equal(length(res$folds), 2L)
+})
diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R
index c60d0c246f81..bf95a170dcfc 100644
--- a/R-package/tests/testthat/test_callbacks.R
+++ b/R-package/tests/testthat/test_callbacks.R
@@ -19,7 +19,7 @@ ltrain <- add.noise(train$label, 0.2)
ltest <- add.noise(test$label, 0.2)
dtrain <- xgb.DMatrix(train$data, label = ltrain, nthread = n_threads)
dtest <- xgb.DMatrix(test$data, label = ltest, nthread = n_threads)
-watchlist <- list(train = dtrain, test = dtest)
+evals <- list(train = dtrain, test = dtest)
err <- function(label, pr) sum((pr > 0.5) != label) / length(label)
@@ -28,79 +28,125 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
max_depth = 2, nthread = n_threads)
-test_that("cb.print.evaluation works as expected", {
-
- bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
- bst_evaluation_err <- NULL
- begin_iteration <- 1
- end_iteration <- 7
-
- f0 <- cb.print.evaluation(period = 0)
- f1 <- cb.print.evaluation(period = 1)
- f5 <- cb.print.evaluation(period = 5)
-
- expect_false(is.null(attr(f1, 'call')))
- expect_equal(attr(f1, 'name'), 'cb.print.evaluation')
-
- iteration <- 1
- expect_silent(f0())
- expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
- expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
- expect_null(f1())
+test_that("xgb.cb.print.evaluation works as expected for xgb.train", {
+ logs1 <- capture.output({
+ model <- xgb.train(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ evals = list(train = dtrain, test = dtest),
+ callbacks = list(xgb.cb.print.evaluation(period = 1))
+ )
+ })
+ expect_equal(length(logs1), 10)
+ expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs1)))
+ lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))
+
+ logs2 <- capture.output({
+ model <- xgb.train(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ evals = list(train = dtrain, test = dtest),
+ callbacks = list(xgb.cb.print.evaluation(period = 2))
+ )
+ })
+ expect_equal(length(logs2), 6)
+ expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+\ttest-auc:0\\.\\d+\\s*$", logs2)))
+ seq_matches <- c(seq(1, 10, 2), 10)
+ lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
+})
- iteration <- 2
- expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
- expect_silent(f5())
+test_that("xgb.cb.print.evaluation works as expected for xgb.cv", {
+ logs1 <- capture.output({
+ model <- xgb.cv(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ nfold = 3,
+ callbacks = list(xgb.cb.print.evaluation(period = 1, showsd = TRUE))
+ )
+ })
+ expect_equal(length(logs1), 10)
+ expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs1)))
+ lapply(seq(1, 10), function(x) expect_true(grepl(paste0("^\\[", x), logs1[x])))
+
+ logs2 <- capture.output({
+ model <- xgb.cv(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ nfold = 3,
+ callbacks = list(xgb.cb.print.evaluation(period = 2, showsd = TRUE))
+ )
+ })
+ expect_equal(length(logs2), 6)
+ expect_true(all(grepl("^\\[\\d{1,2}\\]\ttrain-auc:0\\.\\d+±0\\.\\d+\ttest-auc:0\\.\\d+±0\\.\\d+\\s*$", logs2)))
+ seq_matches <- c(seq(1, 10, 2), 10)
+ lapply(seq_along(seq_matches), function(x) expect_true(grepl(paste0("^\\[", seq_matches[x]), logs2[x])))
+})
- iteration <- 7
- expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
- expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000")
+test_that("xgb.cb.evaluation.log works as expected for xgb.train", {
+ model <- xgb.train(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ verbose = FALSE,
+ evals = list(train = dtrain, test = dtest),
+ callbacks = list(xgb.cb.evaluation.log())
+ )
+ logs <- attributes(model)$evaluation_log
- bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2)
- expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000±0.100000\ttest-auc:0.800000±0.200000")
+ expect_equal(nrow(logs), 10)
+ expect_equal(colnames(logs), c("iter", "train_auc", "test_auc"))
})
-test_that("cb.evaluation.log works as expected", {
-
- bst_evaluation <- c('train-auc' = 0.9, 'test-auc' = 0.8)
- bst_evaluation_err <- NULL
-
- evaluation_log <- list()
- f <- cb.evaluation.log()
-
- expect_false(is.null(attr(f, 'call')))
- expect_equal(attr(f, 'name'), 'cb.evaluation.log')
-
- iteration <- 1
- expect_silent(f())
- expect_equal(evaluation_log,
- list(c(iter = 1, bst_evaluation)))
- iteration <- 2
- expect_silent(f())
- expect_equal(evaluation_log,
- list(c(iter = 1, bst_evaluation), c(iter = 2, bst_evaluation)))
- expect_silent(f(finalize = TRUE))
- expect_equal(evaluation_log,
- data.table::data.table(iter = 1:2, train_auc = c(0.9, 0.9), test_auc = c(0.8, 0.8)))
-
- bst_evaluation_err <- c('train-auc' = 0.1, 'test-auc' = 0.2)
- evaluation_log <- list()
- f <- cb.evaluation.log()
-
- iteration <- 1
- expect_silent(f())
- expect_equal(evaluation_log,
- list(c(iter = 1, c(bst_evaluation, bst_evaluation_err))))
- iteration <- 2
- expect_silent(f())
- expect_equal(evaluation_log,
- list(c(iter = 1, c(bst_evaluation, bst_evaluation_err)),
- c(iter = 2, c(bst_evaluation, bst_evaluation_err))))
- expect_silent(f(finalize = TRUE))
- expect_equal(evaluation_log,
- data.table::data.table(iter = 1:2,
- train_auc_mean = c(0.9, 0.9), train_auc_std = c(0.1, 0.1),
- test_auc_mean = c(0.8, 0.8), test_auc_std = c(0.2, 0.2)))
+test_that("xgb.cb.evaluation.log works as expected for xgb.cv", {
+ model <- xgb.cv(
+ data = dtrain,
+ params = list(
+ objective = "binary:logistic",
+ eval_metric = "auc",
+ max_depth = 2,
+ nthread = n_threads
+ ),
+ nrounds = 10,
+ verbose = FALSE,
+ nfold = 3,
+ callbacks = list(xgb.cb.evaluation.log())
+ )
+ logs <- model$evaluation_log
+
+ expect_equal(nrow(logs), 10)
+ expect_equal(
+ colnames(logs),
+ c("iter", "train_auc_mean", "train_auc_std", "test_auc_mean", "test_auc_std")
+ )
})
@@ -109,26 +155,26 @@ param <- list(objective = "binary:logistic", eval_metric = "error",
test_that("can store evaluation_log without printing", {
expect_silent(
- bst <- xgb.train(param, dtrain, nrounds = 10, watchlist, eta = 1, verbose = 0)
+ bst <- xgb.train(param, dtrain, nrounds = 10, evals = evals, eta = 1, verbose = 0)
)
expect_false(is.null(attributes(bst)$evaluation_log))
expect_false(is.null(attributes(bst)$evaluation_log$train_error))
expect_lt(attributes(bst)$evaluation_log[, min(train_error)], 0.2)
})
-test_that("cb.reset.parameters works as expected", {
+test_that("xgb.cb.reset.parameters works as expected", {
# fixed eta
set.seed(111)
- bst0 <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 0.9, verbose = 0)
+ bst0 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 0.9, verbose = 0)
expect_false(is.null(attributes(bst0)$evaluation_log))
expect_false(is.null(attributes(bst0)$evaluation_log$train_error))
# same eta but re-set as a vector parameter in the callback
set.seed(111)
my_par <- list(eta = c(0.9, 0.9))
- bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ bst1 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst1)$evaluation_log$train_error))
expect_equal(attributes(bst0)$evaluation_log$train_error,
attributes(bst1)$evaluation_log$train_error)
@@ -136,8 +182,8 @@ test_that("cb.reset.parameters works as expected", {
# same eta but re-set via a function in the callback
set.seed(111)
my_par <- list(eta = function(itr, itr_end) 0.9)
- bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ bst2 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst2)$evaluation_log$train_error))
expect_equal(attributes(bst0)$evaluation_log$train_error,
attributes(bst2)$evaluation_log$train_error)
@@ -145,39 +191,39 @@ test_that("cb.reset.parameters works as expected", {
# different eta re-set as a vector parameter in the callback
set.seed(111)
my_par <- list(eta = c(0.6, 0.5))
- bst3 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ bst3 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bst3)$evaluation_log$train_error))
expect_false(all(attributes(bst0)$evaluation_log$train_error == attributes(bst3)$evaluation_log$train_error))
# resetting multiple parameters at the same time runs with no error
my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8))
expect_error(
- bst4 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ bst4 <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
, NA) # NA = no error
# CV works as well
expect_error(
bst4 <- xgb.cv(param, dtrain, nfold = 2, nrounds = 2, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
, NA) # NA = no error
# expect no learning with 0 learning rate
my_par <- list(eta = c(0., 0.))
- bstX <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0,
- callbacks = list(cb.reset.parameters(my_par)))
+ bstX <- xgb.train(param, dtrain, nrounds = 2, evals = evals, verbose = 0,
+ callbacks = list(xgb.cb.reset.parameters(my_par)))
expect_false(is.null(attributes(bstX)$evaluation_log$train_error))
er <- unique(attributes(bstX)$evaluation_log$train_error)
expect_length(er, 1)
expect_gt(er, 0.4)
})
-test_that("cb.save.model works as expected", {
+test_that("xgb.cb.save.model works as expected", {
files <- c('xgboost_01.json', 'xgboost_02.json', 'xgboost.json')
files <- unname(sapply(files, function(f) file.path(tempdir(), f)))
for (f in files) if (file.exists(f)) file.remove(f)
- bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+ bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
save_period = 1, save_name = file.path(tempdir(), "xgboost_%02d.json"))
expect_true(file.exists(files[1]))
expect_true(file.exists(files[2]))
@@ -193,7 +239,7 @@ test_that("cb.save.model works as expected", {
expect_equal(xgb.save.raw(bst), xgb.save.raw(b2))
# save_period = 0 saves the last iteration's model
- bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0,
+ bst <- xgb.train(param, dtrain, nrounds = 2, evals = evals, eta = 1, verbose = 0,
save_period = 0, save_name = file.path(tempdir(), 'xgboost.json'))
expect_true(file.exists(files[3]))
b2 <- xgb.load(files[3])
@@ -206,7 +252,7 @@ test_that("cb.save.model works as expected", {
test_that("early stopping xgb.train works", {
set.seed(11)
expect_output(
- bst <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+ bst <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
early_stopping_rounds = 3, maximize = FALSE)
, "Stopping. Best iteration")
expect_false(is.null(xgb.attr(bst, "best_iteration")))
@@ -220,7 +266,7 @@ test_that("early stopping xgb.train works", {
set.seed(11)
expect_silent(
- bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3,
+ bst0 <- xgb.train(param, dtrain, nrounds = 20, evals = evals, eta = 0.3,
early_stopping_rounds = 3, maximize = FALSE, verbose = 0)
)
expect_equal(attributes(bst)$evaluation_log, attributes(bst0)$evaluation_log)
@@ -236,10 +282,10 @@ test_that("early stopping xgb.train works", {
test_that("early stopping using a specific metric works", {
set.seed(11)
expect_output(
- bst <- xgb.train(param[-2], dtrain, nrounds = 20, watchlist, eta = 0.6,
+ bst <- xgb.train(param[-2], dtrain, nrounds = 20, evals = evals, eta = 0.6,
eval_metric = "logloss", eval_metric = "auc",
- callbacks = list(cb.early.stop(stopping_rounds = 3, maximize = FALSE,
- metric_name = 'test_logloss')))
+ callbacks = list(xgb.cb.early.stop(stopping_rounds = 3, maximize = FALSE,
+ metric_name = 'test_logloss')))
, "Stopping. Best iteration")
expect_false(is.null(xgb.attr(bst, "best_iteration")))
expect_lt(xgb.attr(bst, "best_iteration"), 19)
@@ -269,7 +315,7 @@ test_that("early stopping works with titanic", {
nrounds = 100,
early_stopping_rounds = 3,
nthread = n_threads,
- watchlist = list(train = xgb.DMatrix(dtx, label = dty))
+ evals = list(train = xgb.DMatrix(dtx, label = dty))
)
expect_true(TRUE) # should not crash
@@ -281,10 +327,10 @@ test_that("early stopping xgb.cv works", {
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.3, nrounds = 20,
early_stopping_rounds = 3, maximize = FALSE)
, "Stopping. Best iteration")
- expect_false(is.null(cv$best_iteration))
- expect_lt(cv$best_iteration, 19)
+ expect_false(is.null(cv$early_stop$best_iteration))
+ expect_lt(cv$early_stop$best_iteration, 19)
# the best error is min error:
- expect_true(cv$evaluation_log[, test_error_mean[cv$best_iteration] == min(test_error_mean)])
+ expect_true(cv$evaluation_log[, test_error_mean[cv$early_stop$best_iteration] == min(test_error_mean)])
})
test_that("prediction in xgb.cv works", {
@@ -292,19 +338,19 @@ test_that("prediction in xgb.cv works", {
nrounds <- 4
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
- expect_false(is.null(cv$pred))
- expect_length(cv$pred, nrow(train$data))
- err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
+ expect_false(is.null(cv$cv_predict$pred))
+ expect_length(cv$cv_predict$pred, nrow(train$data))
+ err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
err_log <- cv$evaluation_log[nrounds, test_error_mean]
expect_equal(err_pred, err_log, tolerance = 1e-6)
# save CV models
set.seed(11)
cvx <- xgb.cv(param, dtrain, nfold = 5, eta = 0.5, nrounds = nrounds, prediction = TRUE, verbose = 0,
- callbacks = list(cb.cv.predict(save_models = TRUE)))
+ callbacks = list(xgb.cb.cv.predict(save_models = TRUE)))
expect_equal(cv$evaluation_log, cvx$evaluation_log)
- expect_length(cvx$models, 5)
- expect_true(all(sapply(cvx$models, class) == 'xgb.Booster'))
+ expect_length(cvx$cv_predict$models, 5)
+ expect_true(all(sapply(cvx$cv_predict$models, class) == 'xgb.Booster'))
})
test_that("prediction in xgb.cv works for gblinear too", {
@@ -312,8 +358,8 @@ test_that("prediction in xgb.cv works for gblinear too", {
p <- list(booster = 'gblinear', objective = "reg:logistic", nthread = n_threads)
cv <- xgb.cv(p, dtrain, nfold = 5, eta = 0.5, nrounds = 2, prediction = TRUE, verbose = 0)
expect_false(is.null(cv$evaluation_log))
- expect_false(is.null(cv$pred))
- expect_length(cv$pred, nrow(train$data))
+ expect_false(is.null(cv$cv_predict$pred))
+ expect_length(cv$cv_predict$pred, nrow(train$data))
})
test_that("prediction in early-stopping xgb.cv works", {
@@ -321,17 +367,17 @@ test_that("prediction in early-stopping xgb.cv works", {
expect_output(
cv <- xgb.cv(param, dtrain, nfold = 5, eta = 0.1, nrounds = 20,
early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE,
- prediction = TRUE, base_score = 0.5)
+ prediction = TRUE, base_score = 0.5, verbose = TRUE)
, "Stopping. Best iteration")
- expect_false(is.null(cv$best_iteration))
- expect_lt(cv$best_iteration, 19)
+ expect_false(is.null(cv$early_stop$best_iteration))
+ expect_lt(cv$early_stop$best_iteration, 19)
expect_false(is.null(cv$evaluation_log))
- expect_false(is.null(cv$pred))
- expect_length(cv$pred, nrow(train$data))
+ expect_false(is.null(cv$cv_predict$pred))
+ expect_length(cv$cv_predict$pred, nrow(train$data))
- err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))))
- err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean]
+ err_pred <- mean(sapply(cv$folds, function(f) mean(err(ltrain[f], cv$cv_predict$pred[f]))))
+ err_log <- cv$evaluation_log[cv$early_stop$best_iteration, test_error_mean]
expect_equal(err_pred, err_log, tolerance = 1e-6)
err_log_last <- cv$evaluation_log[cv$niter, test_error_mean]
expect_gt(abs(err_pred - err_log_last), 1e-4)
@@ -341,14 +387,55 @@ test_that("prediction in xgb.cv for softprob works", {
lb <- as.numeric(iris$Species) - 1
set.seed(11)
expect_warning(
- cv <- xgb.cv(data = as.matrix(iris[, -5]), label = lb, nfold = 4,
+ cv <- xgb.cv(data = xgb.DMatrix(as.matrix(iris[, -5]), label = lb), nfold = 4,
eta = 0.5, nrounds = 5, max_depth = 3, nthread = n_threads,
subsample = 0.8, gamma = 2, verbose = 0,
prediction = TRUE, objective = "multi:softprob", num_class = 3)
, NA)
- expect_false(is.null(cv$pred))
- expect_equal(dim(cv$pred), c(nrow(iris), 3))
- expect_lt(diff(range(rowSums(cv$pred))), 1e-6)
+ expect_false(is.null(cv$cv_predict$pred))
+ expect_equal(dim(cv$cv_predict$pred), c(nrow(iris), 3))
+ expect_lt(diff(range(rowSums(cv$cv_predict$pred))), 1e-6)
+})
+
+test_that("prediction in xgb.cv works for multi-quantile", {
+ data(mtcars)
+ y <- mtcars$mpg
+ x <- as.matrix(mtcars[, -1])
+ dm <- xgb.DMatrix(x, label = y, nthread = 1)
+ cv <- xgb.cv(
+ data = dm,
+ params = list(
+ objective = "reg:quantileerror",
+ quantile_alpha = c(0.1, 0.2, 0.5, 0.8, 0.9),
+ nthread = 1
+ ),
+ nrounds = 5,
+ nfold = 3,
+ prediction = TRUE,
+ verbose = 0
+ )
+ expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
+})
+
+test_that("prediction in xgb.cv works for multi-output", {
+ data(mtcars)
+ y <- mtcars$mpg
+ x <- as.matrix(mtcars[, -1])
+ dm <- xgb.DMatrix(x, label = cbind(y, -y), nthread = 1)
+ cv <- xgb.cv(
+ data = dm,
+ params = list(
+ tree_method = "hist",
+ multi_strategy = "multi_output_tree",
+ objective = "reg:squarederror",
+ nthread = n_threads
+ ),
+ nrounds = 5,
+ nfold = 3,
+ prediction = TRUE,
+ verbose = 0
+ )
+ expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
})
test_that("prediction in xgb.cv works for multi-quantile", {
@@ -368,7 +455,7 @@ test_that("prediction in xgb.cv works for multi-quantile", {
prediction = TRUE,
verbose = 0
)
- expect_equal(dim(cv$pred), c(nrow(x), 5))
+ expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 5))
})
test_that("prediction in xgb.cv works for multi-output", {
@@ -389,5 +476,5 @@ test_that("prediction in xgb.cv works for multi-output", {
prediction = TRUE,
verbose = 0
)
- expect_equal(dim(cv$pred), c(nrow(x), 2))
+ expect_equal(dim(cv$cv_predict$pred), c(nrow(x), 2))
})
diff --git a/R-package/tests/testthat/test_custom_objective.R b/R-package/tests/testthat/test_custom_objective.R
index c6503124682d..d3050b152aa0 100644
--- a/R-package/tests/testthat/test_custom_objective.R
+++ b/R-package/tests/testthat/test_custom_objective.R
@@ -12,7 +12,7 @@ dtrain <- xgb.DMatrix(
dtest <- xgb.DMatrix(
agaricus.test$data, label = agaricus.test$label, nthread = n_threads
)
-watchlist <- list(eval = dtest, train = dtrain)
+evals <- list(eval = dtest, train = dtrain)
logregobj <- function(preds, dtrain) {
labels <- getinfo(dtrain, "label")
@@ -33,7 +33,7 @@ param <- list(max_depth = 2, eta = 1, nthread = n_threads,
num_round <- 2
test_that("custom objective works", {
- bst <- xgb.train(param, dtrain, num_round, watchlist)
+ bst <- xgb.train(param, dtrain, num_round, evals)
expect_equal(class(bst), "xgb.Booster")
expect_false(is.null(attributes(bst)$evaluation_log))
expect_false(is.null(attributes(bst)$evaluation_log$eval_error))
@@ -48,7 +48,7 @@ test_that("custom objective in CV works", {
})
test_that("custom objective with early stop works", {
- bst <- xgb.train(param, dtrain, 10, watchlist)
+ bst <- xgb.train(param, dtrain, 10, evals)
expect_equal(class(bst), "xgb.Booster")
train_log <- attributes(bst)$evaluation_log$train_error
expect_true(all(diff(train_log) <= 0))
@@ -66,7 +66,7 @@ test_that("custom objective using DMatrix attr works", {
return(list(grad = grad, hess = hess))
}
param$objective <- logregobjattr
- bst <- xgb.train(param, dtrain, num_round, watchlist)
+ bst <- xgb.train(param, dtrain, num_round, evals)
expect_equal(class(bst), "xgb.Booster")
})
diff --git a/R-package/tests/testthat/test_dmatrix.R b/R-package/tests/testthat/test_dmatrix.R
index 0612406444ae..548afece378c 100644
--- a/R-package/tests/testthat/test_dmatrix.R
+++ b/R-package/tests/testthat/test_dmatrix.R
@@ -41,13 +41,13 @@ test_that("xgb.DMatrix: basic construction", {
params <- list(tree_method = "hist", nthread = n_threads)
bst_fd <- xgb.train(
- params, nrounds = 8, fd, watchlist = list(train = fd)
+ params, nrounds = 8, fd, evals = list(train = fd)
)
bst_dgr <- xgb.train(
- params, nrounds = 8, fdgr, watchlist = list(train = fdgr)
+ params, nrounds = 8, fdgr, evals = list(train = fdgr)
)
bst_dgc <- xgb.train(
- params, nrounds = 8, fdgc, watchlist = list(train = fdgc)
+ params, nrounds = 8, fdgc, evals = list(train = fdgc)
)
raw_fd <- xgb.save.raw(bst_fd, raw_format = "ubj")
@@ -243,7 +243,7 @@ test_that("xgb.DMatrix: print", {
txt <- capture.output({
print(dtrain)
})
- expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: label weight base_margin colnames: yes")
+ expect_equal(txt, "xgb.DMatrix dim: 6513 x 126 info: base_margin, label, weight colnames: yes")
# DMatrix with just features
dtrain <- xgb.DMatrix(
@@ -724,6 +724,44 @@ test_that("xgb.DMatrix: quantile cuts look correct", {
)
})
+test_that("xgb.DMatrix: slicing keeps field indicators", {
+ data(mtcars)
+ x <- as.matrix(mtcars[, -1])
+ y <- mtcars[, 1]
+ dm <- xgb.DMatrix(
+ data = x,
+ label_lower_bound = -y,
+ label_upper_bound = y,
+ nthread = 1
+ )
+ idx_take <- seq(1, 5)
+ dm_slice <- xgb.slice.DMatrix(dm, idx_take)
+
+ expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_lower_bound"))
+ expect_true(xgb.DMatrix.hasinfo(dm_slice, "label_upper_bound"))
+ expect_false(xgb.DMatrix.hasinfo(dm_slice, "label"))
+
+ expect_equal(getinfo(dm_slice, "label_lower_bound"), -y[idx_take], tolerance = 1e-6)
+ expect_equal(getinfo(dm_slice, "label_upper_bound"), y[idx_take], tolerance = 1e-6)
+})
+
+test_that("xgb.DMatrix: can slice with groups", {
+ data(iris)
+ x <- as.matrix(iris[, -5])
+ set.seed(123)
+ y <- sample(3, size = nrow(x), replace = TRUE)
+ group <- c(50, 50, 50)
+ dm <- xgb.DMatrix(x, label = y, group = group, nthread = 1)
+ idx_take <- seq(1, 50)
+ dm_slice <- xgb.slice.DMatrix(dm, idx_take, allow_groups = TRUE)
+
+ expect_true(xgb.DMatrix.hasinfo(dm_slice, "label"))
+ expect_false(xgb.DMatrix.hasinfo(dm_slice, "group"))
+ expect_false(xgb.DMatrix.hasinfo(dm_slice, "qid"))
+ expect_null(getinfo(dm_slice, "group"))
+ expect_equal(getinfo(dm_slice, "label"), y[idx_take], tolerance = 1e-6)
+})
+
test_that("xgb.DMatrix: can read CSV", {
txt <- paste(
"1,2,3",
diff --git a/R-package/tests/testthat/test_feature_weights.R b/R-package/tests/testthat/test_feature_weights.R
index 4ed78c9b6cfe..54fec67cfcf5 100644
--- a/R-package/tests/testthat/test_feature_weights.R
+++ b/R-package/tests/testthat/test_feature_weights.R
@@ -25,7 +25,7 @@ test_that("training with feature weights works", {
expect_lt(importance[1, Frequency], importance[9, Frequency])
}
- for (tm in c("hist", "approx", "exact")) {
+ for (tm in c("hist", "approx")) {
test(tm)
}
})
diff --git a/R-package/tests/testthat/test_glm.R b/R-package/tests/testthat/test_glm.R
index 349bcce8d1f5..b59de8b62f15 100644
--- a/R-package/tests/testthat/test_glm.R
+++ b/R-package/tests/testthat/test_glm.R
@@ -14,37 +14,37 @@ test_that("gblinear works", {
param <- list(objective = "binary:logistic", eval_metric = "error", booster = "gblinear",
nthread = n_threads, eta = 0.8, alpha = 0.0001, lambda = 0.0001)
- watchlist <- list(eval = dtest, train = dtrain)
+ evals <- list(eval = dtest, train = dtrain)
n <- 5 # iterations
ERR_UL <- 0.005 # upper limit for the test set error
VERB <- 0 # chatterbox switch
param$updater <- 'shotgun'
- bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+ bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
ypred <- predict(bst, dtest)
expect_equal(length(getinfo(dtest, 'label')), 1611)
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
- bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic',
- callbacks = list(cb.gblinear.history()))
+ bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic',
+ callbacks = list(xgb.cb.gblinear.history()))
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
h <- xgb.gblinear.history(bst)
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
expect_is(h, "matrix")
param$updater <- 'coord_descent'
- bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'cyclic')
+ bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'cyclic')
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
- bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'shuffle')
+ bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'shuffle')
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
- bst <- xgb.train(param, dtrain, 2, watchlist, verbose = VERB, feature_selector = 'greedy')
+ bst <- xgb.train(param, dtrain, 2, evals, verbose = VERB, feature_selector = 'greedy')
expect_lt(attributes(bst)$evaluation_log$eval_error[2], ERR_UL)
- bst <- xgb.train(param, dtrain, n, watchlist, verbose = VERB, feature_selector = 'thrifty',
- top_k = 50, callbacks = list(cb.gblinear.history(sparse = TRUE)))
+ bst <- xgb.train(param, dtrain, n, evals, verbose = VERB, feature_selector = 'thrifty',
+ top_k = 50, callbacks = list(xgb.cb.gblinear.history(sparse = TRUE)))
expect_lt(attributes(bst)$evaluation_log$eval_error[n], ERR_UL)
h <- xgb.gblinear.history(bst)
expect_equal(dim(h), c(n, ncol(dtrain) + 1))
diff --git a/R-package/tests/testthat/test_ranking.R b/R-package/tests/testthat/test_ranking.R
index e49a32025e0f..0e7db42da0b2 100644
--- a/R-package/tests/testthat/test_ranking.R
+++ b/R-package/tests/testthat/test_ranking.R
@@ -15,7 +15,7 @@ test_that('Test ranking with unweighted data', {
params <- list(eta = 1, tree_method = 'exact', objective = 'rank:pairwise', max_depth = 1,
eval_metric = 'auc', eval_metric = 'aucpr', nthread = n_threads)
- bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+ bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
@@ -39,7 +39,7 @@ test_that('Test ranking with weighted data', {
eta = 1, tree_method = "exact", objective = "rank:pairwise", max_depth = 1,
eval_metric = "auc", eval_metric = "aucpr", nthread = n_threads
)
- bst <- xgb.train(params, dtrain, nrounds = 10, watchlist = list(train = dtrain))
+ bst <- xgb.train(params, dtrain, nrounds = 10, evals = list(train = dtrain))
# Check if the metric is monotone increasing
expect_true(all(diff(attributes(bst)$evaluation_log$train_auc) >= 0))
expect_true(all(diff(attributes(bst)$evaluation_log$train_aucpr) >= 0))
diff --git a/R-package/tests/testthat/test_update.R b/R-package/tests/testthat/test_update.R
index 3c88178e08d3..7fdc6eb84bb3 100644
--- a/R-package/tests/testthat/test_update.R
+++ b/R-package/tests/testthat/test_update.R
@@ -17,7 +17,7 @@ dtest <- xgb.DMatrix(
win32_flag <- .Platform$OS.type == "windows" && .Machine$sizeof.pointer != 8
test_that("updating the model works", {
- watchlist <- list(train = dtrain, test = dtest)
+ evals <- list(train = dtrain, test = dtest)
# no-subsampling
p1 <- list(
@@ -25,19 +25,19 @@ test_that("updating the model works", {
updater = "grow_colmaker,prune"
)
set.seed(11)
- bst1 <- xgb.train(p1, dtrain, nrounds = 10, watchlist, verbose = 0)
+ bst1 <- xgb.train(p1, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr1 <- xgb.model.dt.tree(model = bst1)
# with subsampling
p2 <- modifyList(p1, list(subsample = 0.1))
set.seed(11)
- bst2 <- xgb.train(p2, dtrain, nrounds = 10, watchlist, verbose = 0)
+ bst2 <- xgb.train(p2, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr2 <- xgb.model.dt.tree(model = bst2)
# the same no-subsampling boosting with an extra 'refresh' updater:
p1r <- modifyList(p1, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
set.seed(11)
- bst1r <- xgb.train(p1r, dtrain, nrounds = 10, watchlist, verbose = 0)
+ bst1r <- xgb.train(p1r, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr1r <- xgb.model.dt.tree(model = bst1r)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1r)$evaluation_log)
@@ -53,7 +53,7 @@ test_that("updating the model works", {
# the same boosting with subsampling with an extra 'refresh' updater:
p2r <- modifyList(p2, list(updater = 'grow_colmaker,prune,refresh', refresh_leaf = FALSE))
set.seed(11)
- bst2r <- xgb.train(p2r, dtrain, nrounds = 10, watchlist, verbose = 0)
+ bst2r <- xgb.train(p2r, dtrain, nrounds = 10, evals = evals, verbose = 0)
tr2r <- xgb.model.dt.tree(model = bst2r)
# should be the same evaluation but different gains and larger cover
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2r)$evaluation_log)
@@ -66,7 +66,7 @@ test_that("updating the model works", {
# process type 'update' for no-subsampling model, refreshing the tree stats AND leaves from training data:
set.seed(123)
p1u <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = TRUE))
- bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+ bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
tr1u <- xgb.model.dt.tree(model = bst1u)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -79,7 +79,7 @@ test_that("updating the model works", {
# same thing but with a serialized model
set.seed(123)
- bst1u <- xgb.train(p1u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = xgb.save.raw(bst1))
+ bst1u <- xgb.train(p1u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = xgb.save.raw(bst1))
tr1u <- xgb.model.dt.tree(model = bst1u)
# all should be the same when no subsampling
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1u)$evaluation_log)
@@ -87,7 +87,7 @@ test_that("updating the model works", {
# process type 'update' for model with subsampling, refreshing only the tree stats from training data:
p2u <- modifyList(p2, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
- bst2u <- xgb.train(p2u, dtrain, nrounds = 10, watchlist, verbose = 0, xgb_model = bst2)
+ bst2u <- xgb.train(p2u, dtrain, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst2)
tr2u <- xgb.model.dt.tree(model = bst2u)
# should be the same evaluation but different gains and larger cover
expect_equal(attributes(bst2)$evaluation_log, attributes(bst2u)$evaluation_log)
@@ -102,7 +102,7 @@ test_that("updating the model works", {
# process type 'update' for no-subsampling model, refreshing only the tree stats from TEST data:
p1ut <- modifyList(p1, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
- bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, watchlist, verbose = 0, xgb_model = bst1)
+ bst1ut <- xgb.train(p1ut, dtest, nrounds = 10, evals = evals, verbose = 0, xgb_model = bst1)
tr1ut <- xgb.model.dt.tree(model = bst1ut)
# should be the same evaluations but different gains and smaller cover (test data is smaller)
expect_equal(attributes(bst1)$evaluation_log, attributes(bst1ut)$evaluation_log)
@@ -115,18 +115,18 @@ test_that("updating works for multiclass & multitree", {
dtr <- xgb.DMatrix(
as.matrix(iris[, -5]), label = as.numeric(iris$Species) - 1, nthread = n_threads
)
- watchlist <- list(train = dtr)
+ evals <- list(train = dtr)
p0 <- list(max_depth = 2, eta = 0.5, nthread = n_threads, subsample = 0.6,
objective = "multi:softprob", num_class = 3, num_parallel_tree = 2,
base_score = 0)
set.seed(121)
- bst0 <- xgb.train(p0, dtr, 5, watchlist, verbose = 0)
+ bst0 <- xgb.train(p0, dtr, 5, evals = evals, verbose = 0)
tr0 <- xgb.model.dt.tree(model = bst0)
# run update process for an original model with subsampling
p0u <- modifyList(p0, list(process_type = 'update', updater = 'refresh', refresh_leaf = FALSE))
bst0u <- xgb.train(p0u, dtr, nrounds = xgb.get.num.boosted.rounds(bst0),
- watchlist, xgb_model = bst0, verbose = 0)
+ evals = evals, xgb_model = bst0, verbose = 0)
tr0u <- xgb.model.dt.tree(model = bst0u)
# should be the same evaluation but different gains and larger cover
diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd
index 0a6432d5f9cf..fc49adc0fcee 100644
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -341,10 +341,10 @@ One way to measure progress in learning of a model is to provide to **XGBoost**
> in some way it is similar to what we have done above with the average error. The main difference is that below it was after building the model, and now it is during the construction that we measure errors.
-For the purpose of this example, we use `watchlist` parameter. It is a list of `xgb.DMatrix`, each of them tagged with a name.
+For the purpose of this example, we use the `evals` parameter. It is a list of `xgb.DMatrix` objects, each of them tagged with a name.
-```{r watchlist, message=F, warning=F}
-watchlist <- list(train = dtrain, test = dtest)
+```{r evals, message=F, warning=F}
+evals <- list(train = dtrain, test = dtest)
bst <- xgb.train(
data = dtrain
@@ -355,7 +355,7 @@ bst <- xgb.train(
, objective = "binary:logistic"
)
, nrounds = 2
- , watchlist = watchlist
+ , evals = evals
)
```
@@ -367,7 +367,7 @@ If with your own dataset you have not such results, you should think about how y
For a better understanding of the learning progression, you may want to have some specific metric or even use multiple evaluation metrics.
-```{r watchlist2, message=F, warning=F}
+```{r evals2, message=F, warning=F}
bst <- xgb.train(
data = dtrain
, max_depth = 2
@@ -379,7 +379,7 @@ bst <- xgb.train(
, eval_metric = "logloss"
)
, nrounds = 2
- , watchlist = watchlist
+ , evals = evals
)
```
@@ -401,7 +401,7 @@ bst <- xgb.train(
, eval_metric = "logloss"
)
, nrounds = 2
- , watchlist = watchlist
+ , evals = evals
)
```
@@ -430,7 +430,7 @@ bst <- xgb.train(
, objective = "binary:logistic"
)
, nrounds = 2
- , watchlist = watchlist
+ , evals = evals
)
```
diff --git a/README.md b/README.md
index 063b291259d8..234bd7dba76e 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
-
eXtreme Gradient Boosting
+
eXtreme Gradient Boosting
===========
+
[![Build Status](https://badge.buildkite.com/aca47f40a32735c00a8550540c5eeff6a4c1d246a580cae9b0.svg?branch=master)](https://buildkite.com/xgboost/xgboost-ci)
[![XGBoost-CI](https://github.com/dmlc/xgboost/workflows/XGBoost-CI/badge.svg?branch=master)](https://github.com/dmlc/xgboost/actions)
[![Documentation Status](https://readthedocs.org/projects/xgboost/badge/?version=latest)](https://xgboost.readthedocs.org)
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index 9c373bb019ec..317a71c00d22 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -151,6 +151,7 @@ function(xgboost_set_cuda_flags target)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
+ ${xgboost_SOURCE_DIR}/rabit/include
${CUDAToolkit_INCLUDE_DIRS})
if(MSVC)
@@ -289,7 +290,7 @@ macro(xgboost_target_link_libraries target)
endif()
if(USE_NVTX)
- target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
+ target_link_libraries(${target} PRIVATE CUDA::nvtx3)
endif()
if(MINGW)
diff --git a/demo/dask/cpu_survival.py b/demo/dask/cpu_survival.py
index 8bf464ce21d3..44032bab207f 100644
--- a/demo/dask/cpu_survival.py
+++ b/demo/dask/cpu_survival.py
@@ -6,6 +6,7 @@
import os
+import dask.array as da
import dask.dataframe as dd
from dask.distributed import Client, LocalCluster
@@ -13,7 +14,7 @@
from xgboost.dask import DaskDMatrix
-def main(client):
+def main(client: Client) -> da.Array:
# Load an example survival data from CSV into a Dask data frame.
# The Veterans' Administration Lung Cancer Trial
# The Statistical Analysis of Failure Time Data by Kalbfleisch J. and Prentice R (1980)
diff --git a/demo/dask/cpu_training.py b/demo/dask/cpu_training.py
index 2bee444f7a89..b3a389458987 100644
--- a/demo/dask/cpu_training.py
+++ b/demo/dask/cpu_training.py
@@ -11,12 +11,12 @@
from xgboost.dask import DaskDMatrix
-def main(client):
+def main(client: Client) -> None:
# generate some random data for demonstration
m = 100000
n = 100
rng = da.random.default_rng(1)
- X = rng.normal(size=(m, n))
+ X = rng.normal(size=(m, n), chunks=(10000, -1))
y = X.sum(axis=1)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
@@ -40,7 +40,7 @@ def main(client):
# you can pass output directly into `predict` too.
prediction = dxgb.predict(client, bst, dtrain)
print("Evaluation history:", history)
- return prediction
+ print("Error:", da.sqrt((prediction - y) ** 2).mean().compute())
if __name__ == "__main__":
diff --git a/demo/dask/dask_callbacks.py b/demo/dask/dask_callbacks.py
index 4a7ec0f191cb..1a15b918a534 100644
--- a/demo/dask/dask_callbacks.py
+++ b/demo/dask/dask_callbacks.py
@@ -3,6 +3,8 @@
====================================
"""
+from typing import Any
+
import numpy as np
from dask.distributed import Client, LocalCluster
from dask_ml.datasets import make_regression
@@ -13,7 +15,7 @@
from xgboost.dask import DaskDMatrix
-def probability_for_going_backward(epoch):
+def probability_for_going_backward(epoch: int) -> float:
return 0.999 / (1.0 + 0.05 * np.log(1.0 + epoch))
@@ -23,7 +25,9 @@ class CustomEarlyStopping(xgb.callback.TrainingCallback):
In the beginning, allow the metric to become worse with a probability of 0.999.
As boosting progresses, the probability should be adjusted downward"""
- def __init__(self, *, validation_set, target_metric, maximize, seed):
+ def __init__(
+ self, *, validation_set: str, target_metric: str, maximize: bool, seed: int
+ ) -> None:
self.validation_set = validation_set
self.target_metric = target_metric
self.maximize = maximize
@@ -34,7 +38,9 @@ def __init__(self, *, validation_set, target_metric, maximize, seed):
else:
self.better = lambda x, y: x < y
- def after_iteration(self, model, epoch, evals_log):
+ def after_iteration(
+ self, model: Any, epoch: int, evals_log: xgb.callback.TrainingCallback.EvalsLog
+ ) -> bool:
metric_history = evals_log[self.validation_set][self.target_metric]
if len(metric_history) < 2 or self.better(
metric_history[-1], metric_history[-2]
@@ -42,7 +48,7 @@ def after_iteration(self, model, epoch, evals_log):
return False # continue training
p = probability_for_going_backward(epoch)
go_backward = self.rng.choice(2, size=(1,), replace=True, p=[1 - p, p]).astype(
- np.bool
+ np.bool_
)[0]
print(
"The validation metric went into the wrong direction. "
@@ -54,7 +60,7 @@ def after_iteration(self, model, epoch, evals_log):
return True # stop training
-def main(client):
+def main(client: Client) -> None:
m = 100000
n = 100
X, y = make_regression(n_samples=m, n_features=n, chunks=200, random_state=0)
diff --git a/demo/dask/sklearn_cpu_training.py b/demo/dask/sklearn_cpu_training.py
index e91babb8407b..38a53c6ca71c 100644
--- a/demo/dask/sklearn_cpu_training.py
+++ b/demo/dask/sklearn_cpu_training.py
@@ -9,7 +9,7 @@
from xgboost import dask as dxgb
-def main(client):
+def main(client: Client) -> dxgb.Booster:
# generate some random data for demonstration
n = 100
m = 10000
diff --git a/demo/dask/sklearn_gpu_training.py b/demo/dask/sklearn_gpu_training.py
index 7686909951e6..6161bf9a3402 100644
--- a/demo/dask/sklearn_gpu_training.py
+++ b/demo/dask/sklearn_gpu_training.py
@@ -12,7 +12,7 @@
from xgboost import dask as dxgb
-def main(client):
+def main(client: Client) -> dxgb.Booster:
# generate some random data for demonstration
n = 100
m = 1000000
diff --git a/demo/guide-python/external_memory.py b/demo/guide-python/external_memory.py
index e4d1895d1a1a..b19f550c9149 100644
--- a/demo/guide-python/external_memory.py
+++ b/demo/guide-python/external_memory.py
@@ -84,7 +84,7 @@ def main(tmpdir: str) -> xgboost.Booster:
it = Iterator(files)
# For non-data arguments, specify it here once instead of passing them by the `next`
# method.
- missing = np.NaN
+ missing = np.nan
Xy = xgboost.DMatrix(it, missing=missing, enable_categorical=False)
# ``approx`` is also supported, but less efficient due to sketching. GPU behaves
diff --git a/doc/conf.py b/doc/conf.py
index 68ec39181ba0..ec58c5a5d456 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -250,7 +250,7 @@ def is_readthedocs_build():
html_theme_options = {"logo_only": True}
-html_logo = "https://raw.githubusercontent.com/dmlc/dmlc.github.io/master/img/logo-m/xgboost.png"
+html_logo = "https://xgboost.ai/images/logo/xgboost-logo-ng.png"
html_css_files = ["css/custom.css"]
diff --git a/doc/contrib/unit_tests.rst b/doc/contrib/unit_tests.rst
index 662a632e27db..908e5ed99fa9 100644
--- a/doc/contrib/unit_tests.rst
+++ b/doc/contrib/unit_tests.rst
@@ -144,6 +144,14 @@ which provides higher flexibility. For example:
ctest --verbose
+If you need to debug errors on Windows using the debugger from VS, you can append the gtest flags in `test_main.cc`:
+
+.. code-block::
+
+ ::testing::GTEST_FLAG(filter) = "Suite.Test";
+ ::testing::GTEST_FLAG(repeat) = 10;
+
+
***********************************************
Sanitizers: Detect memory errors and data races
***********************************************
diff --git a/doc/index.rst b/doc/index.rst
index a2ae9bbd39da..7b241c0a17d2 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -28,7 +28,7 @@ Contents
Python Package
R Package
JVM Package
- Ruby Package
+ Ruby Package
Swift Package
Julia Package
C Package
diff --git a/doc/parameter.rst b/doc/parameter.rst
index 7898bb363549..00f0eaea6193 100644
--- a/doc/parameter.rst
+++ b/doc/parameter.rst
@@ -118,7 +118,7 @@ Parameters for Tree Booster
- All ``colsample_by*`` parameters have a range of (0, 1], the default value of 1, and specify the fraction of columns to be subsampled.
- ``colsample_bytree`` is the subsample ratio of columns when constructing each tree. Subsampling occurs once for every tree constructed.
- ``colsample_bylevel`` is the subsample ratio of columns for each level. Subsampling occurs once for every new depth level reached in a tree. Columns are subsampled from the set of columns chosen for the current tree.
- - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level.
+ - ``colsample_bynode`` is the subsample ratio of columns for each node (split). Subsampling occurs once every time a new split is evaluated. Columns are subsampled from the set of columns chosen for the current level. This is not supported by the exact tree method.
- ``colsample_by*`` parameters work cumulatively. For instance,
the combination ``{'colsample_bytree':0.5, 'colsample_bylevel':0.5,
'colsample_bynode':0.5}`` with 64 features will leave 8 features to choose from at
@@ -489,7 +489,7 @@ Parameters for learning to rank (``rank:ndcg``, ``rank:map``, ``rank:pairwise``)
These are parameters specific to learning to rank task. See :doc:`Learning to Rank ` for an in-depth explanation.
-* ``lambdarank_pair_method`` [default = ``mean``]
+* ``lambdarank_pair_method`` [default = ``topk``]
How to construct pairs for pair-wise learning.
@@ -500,7 +500,13 @@ These are parameters specific to learning to rank task. See :doc:`Learning to Ra
It specifies the number of pairs sampled for each document when pair method is ``mean``, or the truncation level for queries when the pair method is ``topk``. For example, to train with ``ndcg@6``, set ``lambdarank_num_pair_per_sample`` to :math:`6` and ``lambdarank_pair_method`` to ``topk``.
-* ``lambdarank_unbiased`` [default = ``false``]
+* ``lambdarank_normalization`` [default = ``true``]
+
+ .. versionadded:: 2.1.0
+
+ Whether to normalize the leaf value by lambda gradient. This can sometimes stagnate the training progress.
+
+* ``lambdarank_unbiased`` [default = ``false``]
Specify whether do we need to debias input click data.
diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst
index 4b145f9a95b2..3544f88b5731 100644
--- a/doc/tutorials/dask.rst
+++ b/doc/tutorials/dask.rst
@@ -237,41 +237,44 @@ For most of the use cases with GPUs, the `Dask-CUDA `_, for example, for GPUs and you can use Dask Cloud Provider to `deploy Dask clusters in the cloud `_. See the `Dask documentation for a more comprehensive list `_.
+Using Dask's ``LocalCluster`` is convenient for getting started quickly on a local machine. Once you're ready to scale your work, though, there are a number of ways to deploy Dask on a distributed cluster. You can use `Dask-CUDA `_, for example, for GPUs and you can use Dask Cloud Provider to `deploy Dask clusters in the cloud `_. See the `Dask documentation for a more comprehensive list `_.
In the example below, a ``KubeCluster`` is used for `deploying Dask on Kubernetes `_:
.. code-block:: python
- from dask_kubernetes import KubeCluster # Need to install the ``dask-kubernetes`` package
+ from dask_kubernetes.operator import KubeCluster # Need to install the ``dask-kubernetes`` package
+ from dask_kubernetes.operator.kubecluster.kubecluster import CreateMode
+
from dask.distributed import Client
from xgboost import dask as dxgb
- import dask
import dask.array as da
- dask.config.set({"kubernetes.scheduler-service-type": "LoadBalancer",
- "kubernetes.scheduler-service-wait-timeout": 360,
- "distributed.comm.timeouts.connect": 360})
-
def main():
- '''Connect to a remote kube cluster with GPU nodes and run training on it.'''
+ '''Connect to a remote kube cluster with GPU nodes and run training on it.'''
m = 1000
n = 10
kWorkers = 2 # assuming you have 2 GPU nodes on that cluster.
# You need to work out the worker-spec yourself. See document in dask_kubernetes for
# its usage. Here we just want to show that XGBoost works on various clusters.
- cluster = KubeCluster.from_yaml('worker-spec.yaml', deploy_mode='remote')
- cluster.scale(kWorkers) # scale to use all GPUs
- with Client(cluster) as client:
- X = da.random.random(size=(m, n), chunks=100)
- y = da.random.random(size=(m, ), chunks=100)
+ # See notes below for why we use pre-allocated cluster.
+ with KubeCluster(
+ name="xgboost-test",
+ image="my-image-name:latest",
+ n_workers=kWorkers,
+ create_mode=CreateMode.CONNECT_ONLY,
+ shutdown_on_close=False,
+ ) as cluster:
+ with Client(cluster) as client:
+ X = da.random.random(size=(m, n), chunks=100)
+ y = X.sum(axis=1)
- regressor = dxgb.DaskXGBRegressor(n_estimators=10, missing=0.0)
- regressor.client = client
- regressor.set_params(tree_method='hist', device="cuda")
- regressor.fit(X, y, eval_set=[(X, y)])
+ regressor = dxgb.DaskXGBRegressor(n_estimators=10, missing=0.0)
+ regressor.client = client
+ regressor.set_params(tree_method='hist', device="cuda")
+ regressor.fit(X, y, eval_set=[(X, y)])
if __name__ == '__main__':
@@ -279,11 +282,46 @@ In the example below, a ``KubeCluster`` is used for `deploying Dask on Kubernete
# main function will connect to that cluster and start training xgboost model.
main()
+
Different cluster classes might have subtle differences like network configuration, or
specific cluster implementation might contains bugs that we are not aware of. Open an
issue if such case is found and there's no documentation on how to resolve it in that
cluster implementation.
+An interesting aspect of the Kubernetes cluster is that the pods may become available
+after the Dask workflow has begun, which can cause issues with distributed XGBoost since
+XGBoost expects the nodes used by input data to remain unchanged during training. To use
+Kubernetes clusters, it is necessary to wait for all the pods to be online before
+submitting XGBoost tasks. One can either create a wait function in Python or simply
+pre-allocate a cluster with k8s tools (like ``kubectl``) before running dask workflows. To
+pre-allocate a cluster, we can first generate the cluster spec using dask kubernetes:
+
+.. code-block:: python
+
+ import json
+
+ from dask_kubernetes.operator import make_cluster_spec
+
+ spec = make_cluster_spec(name="xgboost-test", image="my-image-name:latest", n_workers=16)
+ with open("cluster-spec.json", "w") as fd:
+ json.dump(spec, fd, indent=2)
+
+.. code-block:: sh
+
+ kubectl apply -f ./cluster-spec.json
+
+
+Check whether the pods are available:
+
+.. code-block:: sh
+
+ kubectl get pods
+
+Once all pods have been initialized, the Dask XGBoost workflow can be run, as in the
+previous example. It is important to ensure that the cluster sets the parameter
+``create_mode=CreateMode.CONNECT_ONLY`` and optionally ``shutdown_on_close=False`` if you
+do not want to shut down the cluster after a single job.
+
*******
Threads
*******
diff --git a/doc/tutorials/learning_to_rank.rst b/doc/tutorials/learning_to_rank.rst
index 015f736e08eb..15a611bd0c32 100644
--- a/doc/tutorials/learning_to_rank.rst
+++ b/doc/tutorials/learning_to_rank.rst
@@ -48,11 +48,11 @@ Notice that the samples are sorted based on their query index in a non-decreasin
import xgboost as xgb
# Make a synthetic ranking dataset for demonstration
- seed = 1994
+ seed = 1994
X, y = make_classification(random_state=seed)
rng = np.random.default_rng(seed)
n_query_groups = 3
- qid = rng.integers(0, 3, size=X.shape[0])
+ qid = rng.integers(0, n_query_groups, size=X.shape[0])
# Sort the inputs based on query index
sorted_idx = np.argsort(qid)
@@ -65,14 +65,14 @@ The simplest way to train a ranking model is by using the scikit-learn estimator
.. code-block:: python
ranker = xgb.XGBRanker(tree_method="hist", lambdarank_num_pair_per_sample=8, objective="rank:ndcg", lambdarank_pair_method="topk")
- ranker.fit(X, y, qid=qid)
+ ranker.fit(X, y, qid=qid[sorted_idx])
Please note that, as of writing, there's no learning-to-rank interface in scikit-learn. As a result, the :py:class:`xgboost.XGBRanker` class does not fully conform the scikit-learn estimator guideline and can not be directly used with some of its utility functions. For instances, the ``auc_score`` and ``ndcg_score`` in scikit-learn don't consider query group information nor the pairwise loss. Most of the metrics are implemented as part of XGBoost, but to use scikit-learn utilities like :py:func:`sklearn.model_selection.cross_validation`, we need to make some adjustments in order to pass the ``qid`` as an additional parameter for :py:meth:`xgboost.XGBRanker.score`. Given a data frame ``X`` (either pandas or cuDF), add the column ``qid`` as follows:
.. code-block:: python
df = pd.DataFrame(X, columns=[str(i) for i in range(X.shape[1])])
- df["qid"] = qid
+ df["qid"] = qid[sorted_idx]
ranker.fit(df, y) # No need to pass qid as a separate argument
from sklearn.model_selection import StratifiedGroupKFold, cross_val_score
@@ -146,7 +146,8 @@ The consideration of effective pairs also applies to the choice of pair method (
When using the mean strategy for generating pairs, where the target metric (like ``NDCG``) is computed over the whole query list, users can specify how many pairs should be generated per each document, by setting the ``lambdarank_num_pair_per_sample``. XGBoost will randomly sample ``lambdarank_num_pair_per_sample`` pairs for each element in the query group (:math:`|pairs| = |query| \times num\_pairsample`). Often, setting it to 1 can produce reasonable results. In cases where performance is inadequate due to insufficient number of effective pairs being generated, set ``lambdarank_num_pair_per_sample`` to a higher value. As more document pairs are generated, more effective pairs will be generated as well.
-On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result.
+On the other hand, if you are prioritizing the top :math:`k` documents, the ``lambdarank_num_pair_per_sample`` should be set slightly higher than :math:`k` (with a few more documents) to obtain a good training result. Lastly, XGBoost employs additional regularization for learning to rank objectives, which can be disabled by setting the ``lambdarank_normalization`` to ``False``.
+
**Summary** If you have large amount of training data:
diff --git a/include/xgboost/base.h b/include/xgboost/base.h
index 1f94c9b2fd1d..9abe72b87859 100644
--- a/include/xgboost/base.h
+++ b/include/xgboost/base.h
@@ -1,20 +1,18 @@
/**
- * Copyright 2015-2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
* \file base.h
* \brief Defines configuration macros and basic types for xgboost.
*/
#ifndef XGBOOST_BASE_H_
#define XGBOOST_BASE_H_
-#include
-#include
+#include // for omp_uint, omp_ulong
-#include
-#include
-#include
-#include
-#include
-#include
+#include // for int32_t, uint64_t, int16_t
+#include // for ostream
+#include // for string
+#include // for pair
+#include // for vector
/*!
* \brief string flag for R library, to leave hooks when needed.
@@ -86,34 +84,31 @@
#endif // !defined(XGBOOST_MM_PREFETCH_PRESENT) && !defined()
-/*! \brief namespace of xgboost*/
namespace xgboost {
-
/*! \brief unsigned integer type used for feature index. */
-using bst_uint = uint32_t; // NOLINT
+using bst_uint = std::uint32_t; // NOLINT
/*! \brief unsigned long integers */
-using bst_ulong = uint64_t; // NOLINT
+using bst_ulong = std::uint64_t; // NOLINT
/*! \brief float type, used for storing statistics */
using bst_float = float; // NOLINT
/*! \brief Categorical value type. */
-using bst_cat_t = int32_t; // NOLINT
+using bst_cat_t = std::int32_t; // NOLINT
/*! \brief Type for data column (feature) index. */
-using bst_feature_t = uint32_t; // NOLINT
-/*! \brief Type for histogram bin index. */
-using bst_bin_t = int32_t; // NOLINT
-/*! \brief Type for data row index.
- *
- * Be careful `std::size_t' is implementation-defined. Meaning that the binary
- * representation of DMatrix might not be portable across platform. Booster model should
- * be portable as parameters are floating points.
+using bst_feature_t = std::uint32_t; // NOLINT
+/**
+ * @brief Type for histogram bin index. We sometimes use -1 to indicate invalid bin.
*/
-using bst_row_t = std::size_t; // NOLINT
+using bst_bin_t = std::int32_t; // NOLINT
+/**
+ * @brief Type for data row index (sample).
+ */
+using bst_idx_t = std::uint64_t; // NOLINT
/*! \brief Type for tree node index. */
using bst_node_t = std::int32_t; // NOLINT
/*! \brief Type for ranking group index. */
using bst_group_t = std::uint32_t; // NOLINT
/**
- * \brief Type for indexing into output targets.
+ * @brief Type for indexing into output targets.
*/
using bst_target_t = std::uint32_t; // NOLINT
/**
@@ -306,8 +301,7 @@ class GradientPairInt64 {
XGBOOST_DEVICE bool operator==(const GradientPairInt64 &rhs) const {
return grad_ == rhs.grad_ && hess_ == rhs.hess_;
}
- friend std::ostream &operator<<(std::ostream &os,
- const GradientPairInt64 &g) {
+ friend std::ostream &operator<<(std::ostream &os, const GradientPairInt64 &g) {
os << g.GetQuantisedGrad() << "/" << g.GetQuantisedHess();
return os;
}
@@ -323,7 +317,7 @@ using omp_ulong = dmlc::omp_ulong; // NOLINT
/*! \brief define unsigned int for openmp loop */
using bst_omp_uint = dmlc::omp_uint; // NOLINT
/*! \brief Type used for representing version number in binary form.*/
-using XGBoostVersionT = int32_t;
+using XGBoostVersionT = std::int32_t;
} // namespace xgboost
#endif // XGBOOST_BASE_H_
diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h
index 795c78946118..4b60fe01a546 100644
--- a/include/xgboost/c_api.h
+++ b/include/xgboost/c_api.h
@@ -1,5 +1,5 @@
/**
- * Copyright 2015~2023 by XGBoost Contributors
+ * Copyright 2015-2024, XGBoost Contributors
* \file c_api.h
* \author Tianqi Chen
* \brief C API of XGBoost, used for interfacing to other languages.
@@ -639,21 +639,14 @@ XGB_DLL int XGDMatrixSetInfoFromInterface(DMatrixHandle handle,
* \param len length of array
* \return 0 when success, -1 when failure happens
*/
-XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle,
- const char *field,
- const float *array,
+XGB_DLL int XGDMatrixSetFloatInfo(DMatrixHandle handle, const char *field, const float *array,
bst_ulong len);
-/*!
- * \brief set uint32 vector to a content in info
- * \param handle a instance of data matrix
- * \param field field name
- * \param array pointer to unsigned int vector
- * \param len length of array
- * \return 0 when success, -1 when failure happens
+/**
+ * @deprecated since 2.1.0
+ *
+ * Use @ref XGDMatrixSetInfoFromInterface instead.
*/
-XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle,
- const char *field,
- const unsigned *array,
+XGB_DLL int XGDMatrixSetUIntInfo(DMatrixHandle handle, const char *field, const unsigned *array,
bst_ulong len);
/*!
@@ -725,42 +718,13 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field,
bst_ulong *size,
const char ***out_features);
-/*!
- * \brief Set meta info from dense matrix. Valid field names are:
- *
- * - label
- * - weight
- * - base_margin
- * - group
- * - label_lower_bound
- * - label_upper_bound
- * - feature_weights
+/**
+ * @deprecated since 2.1.0
*
- * \param handle An instance of data matrix
- * \param field Field name
- * \param data Pointer to consecutive memory storing data.
- * \param size Size of the data, this is relative to size of type. (Meaning NOT number
- * of bytes.)
- * \param type Indicator of data type. This is defined in xgboost::DataType enum class.
- * - float = 1
- * - double = 2
- * - uint32_t = 3
- * - uint64_t = 4
- * \return 0 when success, -1 when failure happens
+ * Use @ref XGDMatrixSetInfoFromInterface instead.
*/
-XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field,
- void const *data, bst_ulong size, int type);
-
-/*!
- * \brief (deprecated) Use XGDMatrixSetUIntInfo instead. Set group of the training matrix
- * \param handle a instance of data matrix
- * \param group pointer to group size
- * \param len length of array
- * \return 0 when success, -1 when failure happens
- */
-XGB_DLL int XGDMatrixSetGroup(DMatrixHandle handle,
- const unsigned *group,
- bst_ulong len);
+XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data,
+ bst_ulong size, int type);
/*!
* \brief get float info vector from matrix.
@@ -1153,8 +1117,8 @@ XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *values,
*
* @return 0 when success, -1 when failure happens
*/
-XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *array_interface,
- char const *c_json_config, DMatrixHandle m,
+XGB_DLL int XGBoosterPredictFromColumnar(BoosterHandle handle, char const *values,
+ char const *config, DMatrixHandle m,
bst_ulong const **out_shape, bst_ulong *out_dim,
const float **out_result);
@@ -1550,16 +1514,37 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
*
* @brief Experimental support for exposing internal communicator in XGBoost.
*
+ * @note This is still under development.
+ *
+ * The collective communicator in XGBoost evolved from the `rabit` project of dmlc but has
+ * changed significantly since its adoption. It consists of a tracker and a set of
+ * workers. The tracker is responsible for bootstrapping the communication group and
+ * handling centralized tasks like logging. The workers are actual communicators
+ * performing collective tasks like allreduce.
+ *
+ * To use the collective implementation, one needs to first create a tracker with
+ * corresponding parameters, then get the arguments for workers using
+ * XGTrackerWorkerArgs(). The obtained arguments can then be passed to the
+ * XGCommunicatorInit() function. Call to XGCommunicatorInit() must be accompanied with a
+ * XGCommunicatorFinalize() call for cleanups. Please note that the communicator uses
+ * `std::thread` in C++, which has undefined behavior in a C++ destructor due to the
+ * runtime shutdown sequence. It's preferable to call XGCommunicatorFinalize() before the
+ * runtime is shutting down. This requirement is similar to a Python thread or socket,
+ * which should not be relied upon in a `__del__` function.
+ *
+ * Since it's used as a part of XGBoost, errors will be returned when a XGBoost function
+ * is called, for instance, training a booster might return a connection error.
+ *
* @{
*/
/**
- * @brief Handle to tracker.
+ * @brief Handle to the tracker.
*
* There are currently two types of tracker in XGBoost, first one is `rabit`, while the
- * other one is `federated`.
+ * other one is `federated`. `rabit` is used for normal collective communication, while
+ * `federated` is used for federated learning.
*
- * This is still under development.
*/
typedef void *TrackerHandle; /* NOLINT */
@@ -1568,17 +1553,23 @@ typedef void *TrackerHandle; /* NOLINT */
*
* @param config JSON encoded parameters.
*
- * - dmlc_communicator: String, the type of tracker to create. Available options are `rabit`
- * and `federated`.
+ * - dmlc_communicator: String, the type of tracker to create. Available options are
+ * `rabit` and `federated`. See @ref TrackerHandle for more info.
* - n_workers: Integer, the number of workers.
* - port: (Optional) Integer, the port this tracker should listen to.
- * - timeout: (Optional) Integer, timeout in seconds for various networking operations.
+ * - timeout: (Optional) Integer, timeout in seconds for various networking
+ operations. Default is 300 seconds.
*
* Some configurations are `rabit` specific:
+ *
* - host: (Optional) String, Used by the the `rabit` tracker to specify the address of the host.
+ * This can be useful when the communicator cannot reliably obtain the host address.
+ * - sortby: (Optional) Integer.
+ * + 0: Sort workers by their host name.
+ * + 1: Sort workers by task IDs.
*
* Some `federated` specific configurations:
- * - federated_secure: Boolean, whether this is a secure server.
+ * - federated_secure: Boolean, whether this is a secure server. False for testing.
* - server_key_path: Path to the server key. Used only if this is a secure server.
* - server_cert_path: Path to the server certificate. Used only if this is a secure server.
* - client_cert_path: Path to the client certificate. Used only if this is a secure server.
@@ -1591,7 +1582,7 @@ XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);
/**
* @brief Get the arguments needed for running workers. This should be called after
- * XGTrackerRun() and XGTrackerWait()
+ * XGTrackerRun().
*
* @param handle The handle to the tracker.
* @param args The arguments returned as a JSON document.
@@ -1601,16 +1592,19 @@ XGB_DLL int XGTrackerCreate(char const *config, TrackerHandle *handle);
XGB_DLL int XGTrackerWorkerArgs(TrackerHandle handle, char const **args);
/**
- * @brief Run the tracker.
+ * @brief Start the tracker. The tracker runs in the background and this function returns
+ * once the tracker is started.
*
* @param handle The handle to the tracker.
+ * @param config Unused at the moment, preserved for the future.
*
* @return 0 for success, -1 for failure.
*/
-XGB_DLL int XGTrackerRun(TrackerHandle handle);
+XGB_DLL int XGTrackerRun(TrackerHandle handle, char const *config);
/**
- * @brief Wait for the tracker to finish, should be called after XGTrackerRun().
+ * @brief Wait for the tracker to finish, should be called after XGTrackerRun(). This
+ * function will block until the tracker task is finished or timeout is reached.
*
* @param handle The handle to the tracker.
* @param config JSON encoded configuration. No argument is required yet, preserved for
@@ -1618,11 +1612,12 @@ XGB_DLL int XGTrackerRun(TrackerHandle handle);
*
* @return 0 for success, -1 for failure.
*/
-XGB_DLL int XGTrackerWait(TrackerHandle handle, char const *config);
+XGB_DLL int XGTrackerWaitFor(TrackerHandle handle, char const *config);
/**
- * @brief Free a tracker instance. XGTrackerWait() is called internally. If the tracker
- * cannot close properly, manual interruption is required.
+ * @brief Free a tracker instance. This should be called after XGTrackerWaitFor(). If the
+ * tracker is not properly waited, this function will shutdown all connections with
+ * the tracker, potentially leading to undefined behavior.
*
* @param handle The handle to the tracker.
*
@@ -1630,129 +1625,128 @@ XGB_DLL int XGTrackerWait(TrackerHandle handle, char const *config);
*/
XGB_DLL int XGTrackerFree(TrackerHandle handle);
-/*!
- * \brief Initialize the collective communicator.
+/**
+ * @brief Initialize the collective communicator.
*
* Currently the communicator API is experimental, function signatures may change in the future
* without notice.
*
- * Call this once before using anything.
- *
- * The additional configuration is not required. Usually the communicator will detect settings
- * from environment variables.
+ * Call this once in the worker process before using anything. Please make sure
+ * XGCommunicatorFinalize() is called after use. The initialized commuicator is a global
+ * thread-local variable.
*
- * \param config JSON encoded configuration. Accepted JSON keys are:
- * - xgboost_communicator: The type of the communicator. Can be set as an environment variable.
+ * @param config JSON encoded configuration. Accepted JSON keys are:
+ * - dmlc_communicator: The type of the communicator, this should match the tracker type.
* * rabit: Use Rabit. This is the default if the type is unspecified.
* * federated: Use the gRPC interface for Federated Learning.
- * Only applicable to the Rabit communicator (these are case-sensitive):
- * - rabit_tracker_uri: Hostname of the tracker.
- * - rabit_tracker_port: Port number of the tracker.
- * - rabit_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
- * - rabit_world_size: Total number of workers.
- * - rabit_timeout: Enable timeout.
- * - rabit_timeout_sec: Timeout in seconds.
- * Only applicable to the Rabit communicator (these are case-sensitive, and can be set as
- * environment variables):
- * - DMLC_TRACKER_URI: Hostname of the tracker.
- * - DMLC_TRACKER_PORT: Port number of the tracker.
- * - DMLC_TASK_ID: ID of the current task, can be used to obtain deterministic rank assignment.
- * - DMLC_WORKER_CONNECT_RETRY: Number of retries to connect to the tracker.
- * - dmlc_nccl_path: The path to NCCL shared object. Only used if XGBoost is compiled with
- * `USE_DLOPEN_NCCL`.
- * Only applicable to the Federated communicator (use upper case for environment variables, use
+ *
+ * Only applicable to the `rabit` communicator:
+ * - dmlc_tracker_uri: Hostname or IP address of the tracker.
+ * - dmlc_tracker_port: Port number of the tracker.
+ * - dmlc_task_id: ID of the current task, can be used to obtain deterministic rank assignment.
+ * - dmlc_retry: The number of retries for connection failure.
+ * - dmlc_timeout: Timeout in seconds.
+ * - dmlc_nccl_path: Path to the nccl shared library `libnccl.so`.
+ *
+ * Only applicable to the `federated` communicator (use upper case for environment variables, use
* lower case for runtime configuration):
* - federated_server_address: Address of the federated server.
* - federated_world_size: Number of federated workers.
* - federated_rank: Rank of the current worker.
- * - federated_server_cert: Server certificate file path. Only needed for the SSL mode.
- * - federated_client_key: Client key file path. Only needed for the SSL mode.
- * - federated_client_cert: Client certificate file path. Only needed for the SSL mode.
- * \return 0 for success, -1 for failure.
+ * - federated_server_cert_path: Server certificate file path. Only needed for the SSL mode.
+ * - federated_client_key_path: Client key file path. Only needed for the SSL mode.
+ * - federated_client_cert_path: Client certificate file path. Only needed for the SSL mode.
+ *
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorInit(char const* config);
-/*!
- * \brief Finalize the collective communicator.
+/**
+ * @brief Finalize the collective communicator.
*
- * Call this function after you finished all jobs.
+ * Call this function after you have finished all jobs.
*
- * \return 0 for success, -1 for failure.
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorFinalize(void);
-/*!
- * \brief Get rank of current process.
+/**
+ * @brief Get rank of the current process.
*
- * \return Rank of the worker.
+ * @return Rank of the worker.
*/
XGB_DLL int XGCommunicatorGetRank(void);
-/*!
- * \brief Get total number of processes.
+/**
+ * @brief Get the total number of processes.
*
- * \return Total world size.
+ * @return Total world size.
*/
XGB_DLL int XGCommunicatorGetWorldSize(void);
-/*!
- * \brief Get if the communicator is distributed.
+/**
+ * @brief Get if the communicator is distributed.
*
- * \return True if the communicator is distributed.
+ * @return True if the communicator is distributed.
*/
XGB_DLL int XGCommunicatorIsDistributed(void);
-/*!
- * \brief Print the message to the communicator.
+/**
+ * @brief Print the message to the tracker.
*
- * This function can be used to communicate the information of the progress to the user who monitors
- * the communicator.
+ * This function can be used to communicate the information of the progress to the user
+ * who monitors the tracker.
*
- * \param message The message to be printed.
- * \return 0 for success, -1 for failure.
+ * @param message The message to be printed.
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorPrint(char const *message);
-/*!
- * \brief Get the name of the processor.
+/**
+ * @brief Get the name of the processor.
*
- * \param name_str Pointer to received returned processor name.
- * \return 0 for success, -1 for failure.
+ * @param name_str Pointer to received returned processor name.
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorGetProcessorName(const char** name_str);
-/*!
- * \brief Broadcast a memory region to all others from root. This function is NOT thread-safe.
+/**
+ * @brief Broadcast a memory region to all others from root. This function is NOT
+ * thread-safe.
*
* Example:
- * \code
+ * @code
* int a = 1;
* Broadcast(&a, sizeof(a), root);
- * \endcode
+ * @endcode
*
- * \param send_receive_buffer Pointer to the send or receive buffer.
- * \param size Size of the data.
- * \param root The process rank to broadcast from.
- * \return 0 for success, -1 for failure.
+ * @param send_receive_buffer Pointer to the send or receive buffer.
+ * @param size Size of the data in bytes.
+ * @param root The process rank to broadcast from.
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorBroadcast(void *send_receive_buffer, size_t size, int root);
-/*!
- * \brief Perform in-place allreduce. This function is NOT thread-safe.
+/**
+ * @brief Perform in-place allreduce. This function is NOT thread-safe.
*
* Example Usage: the following code gives sum of the result
- * \code
- * vector data(10);
+ * @code
+ * enum class Op {
+ * kMax = 0, kMin = 1, kSum = 2, kBitwiseAND = 3, kBitwiseOR = 4, kBitwiseXOR = 5
+ * };
+ * std::vector data(10);
* ...
- * Allreduce(&data[0], data.size(), DataType:kInt32, Op::kSum);
+ * Allreduce(data.data(), data.size(), DataType:kInt32, Op::kSum);
* ...
- * \endcode
+ * @endcode
- * \param send_receive_buffer Buffer for both sending and receiving data.
- * \param count Number of elements to be reduced.
- * \param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.
- * \param op Enumeration of operation type, see xgboost::collective::Operation in communicator.h.
- * \return 0 for success, -1 for failure.
+ * @param send_receive_buffer Buffer for both sending and receiving data.
+ * @param count Number of elements to be reduced.
+ * @param data_type Enumeration of data type, see xgboost::collective::DataType in communicator.h.
+ * @param op Enumeration of operation type, see xgboost::collective::Operation in communicator.h.
+ *
+ * @return 0 for success, -1 for failure.
*/
XGB_DLL int XGCommunicatorAllreduce(void *send_receive_buffer, size_t count, int data_type, int op);
diff --git a/include/xgboost/collective/result.h b/include/xgboost/collective/result.h
index 919d3a902298..c126366a07a0 100644
--- a/include/xgboost/collective/result.h
+++ b/include/xgboost/collective/result.h
@@ -3,13 +3,11 @@
*/
#pragma once
-#include
-
-#include // for unique_ptr
-#include // for stringstream
-#include // for stack
-#include // for string
-#include // for move
+#include // for int32_t
+#include // for unique_ptr
+#include // for string
+#include // for error_code
+#include // for move
namespace xgboost::collective {
namespace detail {
@@ -48,48 +46,18 @@ struct ResultImpl {
return cur_eq;
}
- [[nodiscard]] std::string Report() {
- std::stringstream ss;
- ss << "\n- " << this->message;
- if (this->errc != std::error_code{}) {
- ss << " system error:" << this->errc.message();
- }
+ [[nodiscard]] std::string Report() const;
+ [[nodiscard]] std::error_code Code() const;
- auto ptr = prev.get();
- while (ptr) {
- ss << "\n- ";
- ss << ptr->message;
+ void Concat(std::unique_ptr rhs);
+};
- if (ptr->errc != std::error_code{}) {
- ss << " " << ptr->errc.message();
- }
- ptr = ptr->prev.get();
- }
+#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__MINGW32__)
+#define __builtin_FILE() nullptr
+#define __builtin_LINE() (-1)
+#endif
- return ss.str();
- }
- [[nodiscard]] auto Code() const {
- // Find the root error.
- std::stack stack;
- auto ptr = this;
- while (ptr) {
- stack.push(ptr);
- if (ptr->prev) {
- ptr = ptr->prev.get();
- } else {
- break;
- }
- }
- while (!stack.empty()) {
- auto frame = stack.top();
- stack.pop();
- if (frame->errc != std::error_code{}) {
- return frame->errc;
- }
- }
- return std::error_code{};
- }
-};
+std::string MakeMsg(std::string&& msg, char const* file, std::int32_t line);
} // namespace detail
/**
@@ -131,8 +99,21 @@ struct Result {
}
return *impl_ == *that.impl_;
}
+
+ friend Result operator+(Result&& lhs, Result&& rhs);
};
+[[nodiscard]] inline Result operator+(Result&& lhs, Result&& rhs) {
+ if (lhs.OK()) {
+ return std::forward(rhs);
+ }
+ if (rhs.OK()) {
+ return std::forward(lhs);
+ }
+ lhs.impl_->Concat(std::move(rhs.impl_));
+ return std::forward(lhs);
+}
+
/**
* @brief Return success.
*/
@@ -140,38 +121,43 @@ struct Result {
/**
* @brief Return failure.
*/
-[[nodiscard]] inline auto Fail(std::string msg) { return Result{std::move(msg)}; }
+[[nodiscard]] inline auto Fail(std::string msg, char const* file = __builtin_FILE(),
+ std::int32_t line = __builtin_LINE()) {
+ return Result{detail::MakeMsg(std::move(msg), file, line)};
+}
/**
* @brief Return failure with `errno`.
*/
-[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc) {
- return Result{std::move(msg), std::move(errc)};
+[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc,
+ char const* file = __builtin_FILE(),
+ std::int32_t line = __builtin_LINE()) {
+ return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc)};
}
/**
* @brief Return failure with a previous error.
*/
-[[nodiscard]] inline auto Fail(std::string msg, Result&& prev) {
- return Result{std::move(msg), std::forward(prev)};
+[[nodiscard]] inline auto Fail(std::string msg, Result&& prev, char const* file = __builtin_FILE(),
+ std::int32_t line = __builtin_LINE()) {
+ return Result{detail::MakeMsg(std::move(msg), file, line), std::forward(prev)};
}
/**
* @brief Return failure with a previous error and a new `errno`.
*/
-[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev) {
- return Result{std::move(msg), std::move(errc), std::forward(prev)};
+[[nodiscard]] inline auto Fail(std::string msg, std::error_code errc, Result&& prev,
+ char const* file = __builtin_FILE(),
+ std::int32_t line = __builtin_LINE()) {
+ return Result{detail::MakeMsg(std::move(msg), file, line), std::move(errc),
+ std::forward(prev)};
}
// We don't have monad, a simple helper would do.
template
-[[nodiscard]] Result operator<<(Result&& r, Fn&& fn) {
+[[nodiscard]] std::enable_if_t, Result> operator<<(Result&& r, Fn&& fn) {
if (!r.OK()) {
return std::forward(r);
}
return fn();
}
-inline void SafeColl(Result const& rc) {
- if (!rc.OK()) {
- LOG(FATAL) << rc.Report();
- }
-}
+void SafeColl(Result const& rc);
} // namespace xgboost::collective
diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h
index 84453411046e..c5dd977f6255 100644
--- a/include/xgboost/collective/socket.h
+++ b/include/xgboost/collective/socket.h
@@ -1,5 +1,5 @@
/**
- * Copyright (c) 2022-2023, XGBoost Contributors
+ * Copyright (c) 2022-2024, XGBoost Contributors
*/
#pragma once
@@ -12,11 +12,14 @@
#include // std::size_t
#include // std::int32_t, std::uint16_t
#include // memset
-#include // std::numeric_limits
#include // std::string
#include // std::error_code, std::system_category
#include // std::swap
+#if defined(__linux__)
+#include // for TIOCOUTQ, FIONREAD
+#endif // defined(__linux__)
+
#if !defined(xgboost_IS_MINGW)
#if defined(__MINGW32__)
@@ -125,6 +128,21 @@ inline std::int32_t CloseSocket(SocketT fd) {
#endif
}
+inline std::int32_t ShutdownSocket(SocketT fd) {
+#if defined(_WIN32)
+ auto rc = shutdown(fd, SD_BOTH);
+ if (rc != 0 && LastError() == WSANOTINITIALISED) {
+ return 0;
+ }
+#else
+ auto rc = shutdown(fd, SHUT_RDWR);
+ if (rc != 0 && LastError() == ENOTCONN) {
+ return 0;
+ }
+#endif
+ return rc;
+}
+
inline bool ErrorWouldBlock(std::int32_t errsv) noexcept(true) {
#ifdef _WIN32
return errsv == WSAEWOULDBLOCK;
@@ -305,7 +323,8 @@ class TCPSocket {
std::int32_t domain;
socklen_t len = sizeof(domain);
xgboost_CHECK_SYS_CALL(
- getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast(&domain), &len), 0);
+ getsockopt(this->Handle(), SOL_SOCKET, SO_DOMAIN, reinterpret_cast(&domain), &len),
+ 0);
return ret_iafamily(domain);
#else
struct sockaddr sa;
@@ -412,6 +431,35 @@ class TCPSocket {
return Success();
}
+ [[nodiscard]] Result SendBufSize(std::int32_t *n_bytes) {
+ socklen_t optlen;
+ auto rc = getsockopt(this->Handle(), SOL_SOCKET, SO_SNDBUF, reinterpret_cast(n_bytes),
+ &optlen);
+ if (rc != 0 || optlen != sizeof(std::int32_t)) {
+ return system::FailWithCode("getsockopt");
+ }
+ return Success();
+ }
+ [[nodiscard]] Result RecvBufSize(std::int32_t *n_bytes) {
+ socklen_t optlen;
+ auto rc = getsockopt(this->Handle(), SOL_SOCKET, SO_RCVBUF, reinterpret_cast(n_bytes),
+ &optlen);
+ if (rc != 0 || optlen != sizeof(std::int32_t)) {
+ return system::FailWithCode("getsockopt");
+ }
+ return Success();
+ }
+#if defined(__linux__)
+ [[nodiscard]] Result PendingSendSize(std::int32_t *n_bytes) const {
+ return ioctl(this->Handle(), TIOCOUTQ, n_bytes) == 0 ? Success()
+ : system::FailWithCode("ioctl");
+ }
+ [[nodiscard]] Result PendingRecvSize(std::int32_t *n_bytes) const {
+ return ioctl(this->Handle(), FIONREAD, n_bytes) == 0 ? Success()
+ : system::FailWithCode("ioctl");
+ }
+#endif // defined(__linux__)
+
[[nodiscard]] Result SetKeepAlive() {
std::int32_t keepalive = 1;
auto rc = setsockopt(handle_, SOL_SOCKET, SO_KEEPALIVE, reinterpret_cast(&keepalive),
@@ -422,10 +470,9 @@ class TCPSocket {
return Success();
}
- [[nodiscard]] Result SetNoDelay() {
- std::int32_t tcp_no_delay = 1;
- auto rc = setsockopt(handle_, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast(&tcp_no_delay),
- sizeof(tcp_no_delay));
+ [[nodiscard]] Result SetNoDelay(std::int32_t no_delay = 1) {
+ auto rc = setsockopt(handle_, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast(&no_delay),
+ sizeof(no_delay));
if (rc != 0) {
return system::FailWithCode("Failed to set TCP no delay.");
}
@@ -436,41 +483,62 @@ class TCPSocket {
* \brief Accept new connection, returns a new TCP socket for the new connection.
*/
TCPSocket Accept() {
- HandleT newfd = accept(Handle(), nullptr, nullptr);
+ SockAddress addr;
+ TCPSocket newsock;
+ auto rc = this->Accept(&newsock, &addr);
+ SafeColl(rc);
+ return newsock;
+ }
+
+ [[nodiscard]] Result Accept(TCPSocket *out, SockAddress *addr) {
#if defined(_WIN32)
auto interrupt = WSAEINTR;
#else
auto interrupt = EINTR;
#endif
- if (newfd == InvalidSocket() && system::LastError() != interrupt) {
- system::ThrowAtError("accept");
+ if (this->Domain() == SockDomain::kV4) {
+ struct sockaddr_in caddr;
+ socklen_t caddr_len = sizeof(caddr);
+ HandleT newfd = accept(Handle(), reinterpret_cast(&caddr), &caddr_len);
+ if (newfd == InvalidSocket() && system::LastError() != interrupt) {
+ return system::FailWithCode("Failed to accept.");
+ }
+ *addr = SockAddress{SockAddrV4{caddr}};
+ *out = TCPSocket{newfd};
+ } else {
+ struct sockaddr_in6 caddr;
+ socklen_t caddr_len = sizeof(caddr);
+ HandleT newfd = accept(Handle(), reinterpret_cast(&caddr), &caddr_len);
+ if (newfd == InvalidSocket() && system::LastError() != interrupt) {
+ return system::FailWithCode("Failed to accept.");
+ }
+ *addr = SockAddress{SockAddrV6{caddr}};
+ *out = TCPSocket{newfd};
}
- TCPSocket newsock{newfd};
- return newsock;
- }
-
- [[nodiscard]] Result Accept(TCPSocket *out, SockAddrV4 *addr) {
- struct sockaddr_in caddr;
- socklen_t caddr_len = sizeof(caddr);
- HandleT newfd = accept(Handle(), reinterpret_cast(&caddr), &caddr_len);
- if (newfd == InvalidSocket()) {
- return system::FailWithCode("Failed to accept.");
+ // On MacOS, this is automatically set to async socket if the parent socket is async
+ // We make sure all socket are blocking by default.
+ //
+ // On Windows, a closed socket is returned during shutdown. We guard against it when
+ // setting non-blocking.
+ if (!out->IsClosed()) {
+ return out->NonBlocking(false);
}
- *addr = SockAddrV4{caddr};
- *out = TCPSocket{newfd};
return Success();
}
~TCPSocket() {
if (!IsClosed()) {
- Close();
+ auto rc = this->Close();
+ if (!rc.OK()) {
+ LOG(WARNING) << rc.Report();
+ }
}
}
TCPSocket(TCPSocket const &that) = delete;
TCPSocket(TCPSocket &&that) noexcept(true) { std::swap(this->handle_, that.handle_); }
TCPSocket &operator=(TCPSocket const &that) = delete;
- TCPSocket &operator=(TCPSocket &&that) {
+ TCPSocket &operator=(TCPSocket &&that) noexcept(true) {
std::swap(this->handle_, that.handle_);
return *this;
}
@@ -479,36 +547,49 @@ class TCPSocket {
*/
[[nodiscard]] HandleT const &Handle() const { return handle_; }
/**
- * \brief Listen to incoming requests. Should be called after bind.
+ * @brief Listen to incoming requests. Should be called after bind.
*/
- void Listen(std::int32_t backlog = 16) { xgboost_CHECK_SYS_CALL(listen(handle_, backlog), 0); }
+ [[nodiscard]] Result Listen(std::int32_t backlog = 16) {
+ if (listen(handle_, backlog) != 0) {
+ return system::FailWithCode("Failed to listen.");
+ }
+ return Success();
+ }
/**
- * \brief Bind socket to INADDR_ANY, return the port selected by the OS.
+ * @brief Bind socket to INADDR_ANY, return the port selected by the OS.
*/
- [[nodiscard]] in_port_t BindHost() {
+ [[nodiscard]] Result BindHost(std::int32_t* p_out) {
+ // Use int32 instead of in_port_t for consistency. We take port as parameter from
+ // users using other languages, the port is usually stored and passed around as int.
if (Domain() == SockDomain::kV6) {
auto addr = SockAddrV6::InaddrAny();
auto handle = reinterpret_cast(&addr.Handle());
- xgboost_CHECK_SYS_CALL(
- bind(handle_, handle, sizeof(std::remove_reference_t)), 0);
+ if (bind(handle_, handle, sizeof(std::remove_reference_t)) != 0) {
+ return system::FailWithCode("bind failed.");
+ }
sockaddr_in6 res_addr;
socklen_t addrlen = sizeof(res_addr);
- xgboost_CHECK_SYS_CALL(
- getsockname(handle_, reinterpret_cast(&res_addr), &addrlen), 0);
- return ntohs(res_addr.sin6_port);
+ if (getsockname(handle_, reinterpret_cast(&res_addr), &addrlen) != 0) {
+ return system::FailWithCode("getsockname failed.");
+ }
+ *p_out = ntohs(res_addr.sin6_port);
} else {
auto addr = SockAddrV4::InaddrAny();
auto handle = reinterpret_cast(&addr.Handle());
- xgboost_CHECK_SYS_CALL(
- bind(handle_, handle, sizeof(std::remove_reference_t)), 0);
+ if (bind(handle_, handle, sizeof(std::remove_reference_t)) != 0) {
+ return system::FailWithCode("bind failed.");
+ }
sockaddr_in res_addr;
socklen_t addrlen = sizeof(res_addr);
- xgboost_CHECK_SYS_CALL(
- getsockname(handle_, reinterpret_cast(&res_addr), &addrlen), 0);
- return ntohs(res_addr.sin_port);
+ if (getsockname(handle_, reinterpret_cast(&res_addr), &addrlen) != 0) {
+ return system::FailWithCode("getsockname failed.");
+ }
+ *p_out = ntohs(res_addr.sin_port);
}
+
+ return Success();
}
[[nodiscard]] auto Port() const {
@@ -554,45 +635,47 @@ class TCPSocket {
}
/**
- * \brief Send data, without error then all data should be sent.
+ * @brief Send data, without error then all data should be sent.
*/
- [[nodiscard]] auto SendAll(void const *buf, std::size_t len) {
+ [[nodiscard]] Result SendAll(void const *buf, std::size_t len, std::size_t *n_sent) {
char const *_buf = reinterpret_cast(buf);
- std::size_t ndone = 0;
+ std::size_t &ndone = *n_sent;
+ ndone = 0;
while (ndone < len) {
ssize_t ret = send(handle_, _buf, len - ndone, 0);
if (ret == -1) {
if (system::LastErrorWouldBlock()) {
- return ndone;
+ return Success();
}
- system::ThrowAtError("send");
+ return system::FailWithCode("send");
}
_buf += ret;
ndone += ret;
}
- return ndone;
+ return Success();
}
/**
- * \brief Receive data, without error then all data should be received.
+ * @brief Receive data, without error then all data should be received.
*/
- [[nodiscard]] auto RecvAll(void *buf, std::size_t len) {
+ [[nodiscard]] Result RecvAll(void *buf, std::size_t len, std::size_t *n_recv) {
char *_buf = reinterpret_cast(buf);
- std::size_t ndone = 0;
+ std::size_t &ndone = *n_recv;
+ ndone = 0;
while (ndone < len) {
ssize_t ret = recv(handle_, _buf, len - ndone, MSG_WAITALL);
if (ret == -1) {
if (system::LastErrorWouldBlock()) {
- return ndone;
+ return Success();
}
- system::ThrowAtError("recv");
+ return system::FailWithCode("recv");
}
if (ret == 0) {
- return ndone;
+ return Success();
}
_buf += ret;
ndone += ret;
}
- return ndone;
+ return Success();
}
/**
* \brief Send data using the socket
@@ -621,26 +704,49 @@ class TCPSocket {
*/
std::size_t Send(StringView str);
/**
- * \brief Receive string, format is matched with the Python socket wrapper in RABIT.
+ * @brief Receive string, format is matched with the Python socket wrapper in RABIT.
*/
- std::size_t Recv(std::string *p_str);
+ [[nodiscard]] Result Recv(std::string *p_str);
/**
- * \brief Close the socket, called automatically in destructor if the socket is not closed.
+ * @brief Close the socket, called automatically in destructor if the socket is not closed.
*/
- void Close() {
+ [[nodiscard]] Result Close() {
if (InvalidSocket() != handle_) {
-#if defined(_WIN32)
auto rc = system::CloseSocket(handle_);
+#if defined(_WIN32)
// it's possible that we close TCP sockets after finalizing WSA due to detached thread.
if (rc != 0 && system::LastError() != WSANOTINITIALISED) {
- system::ThrowAtError("close", rc);
+ return system::FailWithCode("Failed to close the socket.");
}
#else
- xgboost_CHECK_SYS_CALL(system::CloseSocket(handle_), 0);
+ if (rc != 0) {
+ return system::FailWithCode("Failed to close the socket.");
+ }
#endif
handle_ = InvalidSocket();
}
+ return Success();
}
+ /**
+ * @brief Call shutdown on the socket.
+ */
+ [[nodiscard]] Result Shutdown() {
+ if (this->IsClosed()) {
+ return Success();
+ }
+ auto rc = system::ShutdownSocket(this->Handle());
+#if defined(_WIN32)
+ // Windows cannot shutdown a socket if it's not connected.
+ if (rc == -1 && system::LastError() == WSAENOTCONN) {
+ return Success();
+ }
+#endif
+ if (rc != 0) {
+ return system::FailWithCode("Failed to shutdown socket.");
+ }
+ return Success();
+ }
+
/**
* \brief Create a TCP socket on specified domain.
*/
diff --git a/include/xgboost/data.h b/include/xgboost/data.h
index c449164ca572..05e2cb0080f0 100644
--- a/include/xgboost/data.h
+++ b/include/xgboost/data.h
@@ -19,7 +19,6 @@
#include
#include
#include
-#include
#include
#include
#include
@@ -137,14 +136,6 @@ class MetaInfo {
* \param fo The output stream.
*/
void SaveBinary(dmlc::Stream* fo) const;
- /*!
- * \brief Set information in the meta info.
- * \param key The key of the information.
- * \param dptr The data pointer of the source array.
- * \param dtype The type of the source data.
- * \param num Number of elements in the source array.
- */
- void SetInfo(Context const& ctx, const char* key, const void* dptr, DataType dtype, size_t num);
/*!
* \brief Set information in the meta info with array interface.
* \param key The key of the information.
@@ -320,7 +311,7 @@ struct BatchParam {
struct HostSparsePageView {
using Inst = common::Span;
- common::Span offset;
+ common::Span offset;
common::Span data;
Inst operator[](size_t i) const {
@@ -338,7 +329,7 @@ struct HostSparsePageView {
class SparsePage {
public:
// Offset for each row.
- HostDeviceVector offset;
+ HostDeviceVector offset;
/*! \brief the data of the segments */
HostDeviceVector data;
@@ -522,10 +513,6 @@ class DMatrix {
DMatrix() = default;
/*! \brief meta information of the dataset */
virtual MetaInfo& Info() = 0;
- virtual void SetInfo(const char* key, const void* dptr, DataType dtype, size_t num) {
- auto const& ctx = *this->Ctx();
- this->Info().SetInfo(ctx, key, dptr, dtype, num);
- }
virtual void SetInfo(const char* key, std::string const& interface_str) {
auto const& ctx = *this->Ctx();
this->Info().SetInfo(ctx, key, StringView{interface_str});
diff --git a/include/xgboost/json.h b/include/xgboost/json.h
index 77ca6a510c96..1416b8899785 100644
--- a/include/xgboost/json.h
+++ b/include/xgboost/json.h
@@ -60,9 +60,7 @@ class Value {
virtual Json& operator[](int ind);
virtual bool operator==(Value const& rhs) const = 0;
-#if !defined(__APPLE__)
virtual Value& operator=(Value const& rhs) = delete;
-#endif // !defined(__APPLE__)
std::string TypeStr() const;
@@ -105,6 +103,7 @@ class JsonString : public Value {
std::string& GetString() & { return str_; }
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kString;
@@ -134,6 +133,7 @@ class JsonArray : public Value {
std::vector& GetArray() & { return vec_; }
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kArray;
@@ -158,6 +158,7 @@ class JsonTypedArray : public Value {
JsonTypedArray(JsonTypedArray&& that) noexcept : Value{kind}, vec_{std::move(that.vec_)} {}
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
void Set(size_t i, T v) { vec_[i] = v; }
size_t Size() const { return vec_.size(); }
@@ -216,6 +217,7 @@ class JsonObject : public Value {
Map& GetObject() & { return object_; }
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) { return value->Type() == ValueKind::kObject; }
~JsonObject() override = default;
@@ -249,6 +251,7 @@ class JsonNumber : public Value {
Float& GetNumber() & { return number_; }
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kNumber;
@@ -287,6 +290,7 @@ class JsonInteger : public Value {
: Value{ValueKind::kInteger}, integer_{that.integer_} {}
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
Int const& GetInteger() && { return integer_; }
Int const& GetInteger() const & { return integer_; }
@@ -307,6 +311,7 @@ class JsonNull : public Value {
void Save(JsonWriter* writer) const override;
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kNull;
@@ -336,6 +341,7 @@ class JsonBoolean : public Value {
bool& GetBoolean() & { return boolean_; }
bool operator==(Value const& rhs) const override;
+ Value& operator=(Value const& rhs) override = delete;
static bool IsClassOf(Value const* value) {
return value->Type() == ValueKind::kBoolean;
diff --git a/include/xgboost/linalg.h b/include/xgboost/linalg.h
index 581b2f0804c9..cb7668f4cdd1 100644
--- a/include/xgboost/linalg.h
+++ b/include/xgboost/linalg.h
@@ -190,13 +190,14 @@ constexpr auto ArrToTuple(T (&arr)[N]) {
// uint division optimization inspired by the CIndexer in cupy. Division operation is
// slow on both CPU and GPU, especially 64 bit integer. So here we first try to avoid 64
// bit when the index is smaller, then try to avoid division when it's exp of 2.
-template
+template
LINALG_HD auto UnravelImpl(I idx, common::Span shape) {
- size_t index[D]{0};
+ std::size_t index[D]{0};
static_assert(std::is_signed::value,
"Don't change the type without changing the for loop.");
+ auto const sptr = shape.data();
for (int32_t dim = D; --dim > 0;) {
- auto s = static_cast>>(shape[dim]);
+ auto s = static_cast>>(sptr[dim]);
if (s & (s - 1)) {
auto t = idx / s;
index[dim] = idx - t * s;
@@ -295,6 +296,9 @@ class TensorView {
using ShapeT = std::size_t[kDim];
using StrideT = ShapeT;
+ using element_type = T; // NOLINT
+ using value_type = std::remove_cv_t; // NOLINT
+
private:
StrideT stride_{1};
ShapeT shape_{0};
@@ -314,7 +318,7 @@ class TensorView {
}
template
- LINALG_HD size_t MakeSliceDim(size_t new_shape[D], size_t new_stride[D],
+ LINALG_HD size_t MakeSliceDim(std::size_t new_shape[D], std::size_t new_stride[D],
detail::RangeTag &&range) const {
static_assert(new_dim < D);
static_assert(old_dim < kDim);
@@ -528,9 +532,10 @@ class TensorView {
LINALG_HD auto Stride(size_t i) const { return stride_[i]; }
/**
- * \brief Number of items in the tensor.
+ * @brief Number of items in the tensor.
*/
[[nodiscard]] LINALG_HD std::size_t Size() const { return size_; }
+ [[nodiscard]] bool Empty() const { return Size() == 0; }
/**
* \brief Whether this is a contiguous array, both C and F contiguous returns true.
*/
@@ -741,6 +746,14 @@ auto ArrayInterfaceStr(TensorView const &t) {
return str;
}
+template
+auto Make1dInterface(T const *vec, std::size_t len) {
+ Context ctx;
+ auto t = linalg::MakeTensorView(&ctx, common::Span{vec, len}, len);
+ auto str = linalg::ArrayInterfaceStr(t);
+ return str;
+}
+
/**
* \brief A tensor storage. To use it for other functionality like slicing one needs to
* obtain a view first. This way we can use it on both host and device.
@@ -865,7 +878,9 @@ class Tensor {
auto HostView() { return this->View(DeviceOrd::CPU()); }
auto HostView() const { return this->View(DeviceOrd::CPU()); }
- [[nodiscard]] size_t Size() const { return data_.Size(); }
+ [[nodiscard]] std::size_t Size() const { return data_.Size(); }
+ [[nodiscard]] bool Empty() const { return Size() == 0; }
+
auto Shape() const { return common::Span{shape_}; }
auto Shape(size_t i) const { return shape_[i]; }
diff --git a/include/xgboost/span.h b/include/xgboost/span.h
index be8640f73695..7471c2e44ed6 100644
--- a/include/xgboost/span.h
+++ b/include/xgboost/span.h
@@ -30,9 +30,8 @@
#define XGBOOST_SPAN_H_
#include
-#include
-#include // size_t
+#include // size_t
#include
#include
#include // numeric_limits
@@ -73,8 +72,7 @@
#endif // defined(_MSC_VER) && _MSC_VER < 1910
-namespace xgboost {
-namespace common {
+namespace xgboost::common {
#if defined(__CUDA_ARCH__)
// Usual logging facility is not available inside device code.
@@ -701,14 +699,14 @@ class IterSpan {
return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};
}
[[nodiscard]] XGBOOST_DEVICE constexpr iterator begin() const noexcept { // NOLINT
- return {this, 0};
+ return it_;
}
[[nodiscard]] XGBOOST_DEVICE constexpr iterator end() const noexcept { // NOLINT
- return {this, size()};
+ return it_ + size();
}
};
-} // namespace common
-} // namespace xgboost
+} // namespace xgboost::common
+
#if defined(_MSC_VER) &&_MSC_VER < 1910
#undef constexpr
diff --git a/include/xgboost/tree_model.h b/include/xgboost/tree_model.h
index 4c475da2ea29..32b93c5cacaf 100644
--- a/include/xgboost/tree_model.h
+++ b/include/xgboost/tree_model.h
@@ -1,5 +1,5 @@
/**
- * Copyright 2014-2023 by Contributors
+ * Copyright 2014-2024, XGBoost Contributors
* \file tree_model.h
* \brief model structure for tree
* \author Tianqi Chen
@@ -688,6 +688,9 @@ class RegTree : public Model {
}
return (*this)[nidx].DefaultLeft();
}
+ [[nodiscard]] bst_node_t DefaultChild(bst_node_t nidx) const {
+ return this->DefaultLeft(nidx) ? this->LeftChild(nidx) : this->RightChild(nidx);
+ }
[[nodiscard]] bool IsRoot(bst_node_t nidx) const {
if (IsMultiTarget()) {
return nidx == kRoot;
diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py
index 865d07fe8b0f..693546862b63 100755
--- a/jvm-packages/create_jni.py
+++ b/jvm-packages/create_jni.py
@@ -23,6 +23,7 @@
"USE_NCCL": "OFF",
"JVM_BINDINGS": "ON",
"LOG_CAPI_INVOCATION": "OFF",
+ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
}
@@ -97,10 +98,6 @@ def native_build(args):
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
- # if enviorment set rabit_mock
- if os.getenv("RABIT_MOCK", None) is not None:
- args.append("-DRABIT_MOCK:BOOL=ON")
-
# if enviorment set GPU_ARCH_FLAG
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
if gpu_arch_flag is not None:
@@ -162,12 +159,6 @@ def native_build(args):
maybe_makedirs(output_folder)
cp("../lib/" + library_name, output_folder)
- print("copying pure-Python tracker")
- cp(
- "../python-package/xgboost/tracker.py",
- "{}/src/main/resources".format(xgboost4j),
- )
-
print("copying train/test files")
maybe_makedirs("{}/src/test/resources".format(xgboost4j_spark))
with cd("../demo/CLI/regression"):
diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml
index 23ab70734ac6..17afbe48d2cc 100644
--- a/jvm-packages/pom.xml
+++ b/jvm-packages/pom.xml
@@ -33,21 +33,21 @@
UTF-8
1.8
1.8
- 1.18.0
+ 1.19.0
4.13.2
3.4.1
3.4.1
2.12.18
2.12
- 3.3.6
+ 3.4.0
5
OFF
OFF
23.12.1
23.12.1
cuda12
- 3.2.17
- 2.11.0
+ 3.2.18
+ 2.12.0
@@ -123,7 +123,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 3.3.0
+ 3.4.1
empty-javadoc-jar
@@ -152,7 +152,7 @@
org.apache.maven.plugins
maven-gpg-plugin
- 3.1.0
+ 3.2.4
sign-artifacts
@@ -166,7 +166,7 @@
org.apache.maven.plugins
maven-source-plugin
- 3.3.0
+ 3.3.1
attach-sources
@@ -204,7 +204,7 @@
org.apache.maven.plugins
maven-assembly-plugin
- 3.6.0
+ 3.7.1
jar-with-dependencies
@@ -275,7 +275,7 @@
org.apache.maven.plugins
maven-deploy-plugin
- 3.1.1
+ 3.1.2
internal.repo::default::file://${project.build.directory}/mvn-repo
@@ -410,7 +410,7 @@
net.alchim31.maven
scala-maven-plugin
- 4.8.1
+ 4.9.0
compile
@@ -445,7 +445,7 @@
org.apache.maven.plugins
maven-surefire-plugin
- 3.2.2
+ 3.2.5
false
false
@@ -473,7 +473,7 @@
net.alchim31.maven
scala-maven-plugin
- 4.8.1
+ 4.9.0
-Xms64m
@@ -487,12 +487,17 @@
com.esotericsoftware
kryo
- 5.5.0
+ 5.6.0
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.14.2
commons-logging
commons-logging
- 1.3.0
+ 1.3.2
org.scalatest
diff --git a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
index 7a5e3ac68815..99608b927489 100644
--- a/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
+++ b/jvm-packages/xgboost4j-flink/src/main/java/ml/dmlc/xgboost4j/java/flink/XGBoost.java
@@ -54,9 +54,9 @@ private static class MapFunction
private final Map params;
private final int round;
- private final Map workerEnvs;
+ private final Map workerEnvs;
- public MapFunction(Map params, int round, Map workerEnvs) {
+ public MapFunction(Map params, int round, Map workerEnvs) {
this.params = params;
this.round = round;
this.workerEnvs = workerEnvs;
@@ -174,9 +174,9 @@ public static XGBoostModel train(DataSet> dtrain,
int numBoostRound) throws Exception {
final RabitTracker tracker =
new RabitTracker(dtrain.getExecutionEnvironment().getParallelism());
- if (tracker.start(0L)) {
+ if (tracker.start()) {
return dtrain
- .mapPartition(new MapFunction(params, numBoostRound, tracker.getWorkerEnvs()))
+ .mapPartition(new MapFunction(params, numBoostRound, tracker.workerArgs()))
.reduce((x, y) -> x)
.collect()
.get(0);
diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml
index fc55dd15618c..25b44d6b2d2d 100644
--- a/jvm-packages/xgboost4j-gpu/pom.xml
+++ b/jvm-packages/xgboost4j-gpu/pom.xml
@@ -72,7 +72,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 3.6.2
+ 3.6.3
protected
true
@@ -88,7 +88,7 @@
exec-maven-plugin
org.codehaus.mojo
- 3.1.0
+ 3.2.0
native
@@ -113,7 +113,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 3.3.0
+ 3.4.0
diff --git a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
index d34802805d79..7e83dc6f17b0 100644
--- a/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
+++ b/jvm-packages/xgboost4j-spark-gpu/src/main/scala/ml/dmlc/xgboost4j/scala/rapids/spark/GpuPreXGBoost.scala
@@ -160,7 +160,7 @@ object GpuPreXGBoost extends PreXGBoostProvider {
// Check columns and build column data batch
val trainingData = GpuUtils.buildColumnDataBatch(feturesCols,
- labelName, weightName, marginName, "", castedDF)
+ labelName, weightName, marginName, groupName, castedDF)
// eval map
val evalDataMap = evalSets.map {
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
index 5a1af886fc3d..e17c68355c5b 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014-2023 by Contributors
+ Copyright (c) 2014-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ import scala.collection.mutable
import scala.util.Random
import scala.collection.JavaConverters._
-import ml.dmlc.xgboost4j.java.{Communicator, IRabitTracker, XGBoostError, RabitTracker => PyRabitTracker}
+import ml.dmlc.xgboost4j.java.{Communicator, ITracker, XGBoostError, RabitTracker}
import ml.dmlc.xgboost4j.scala.ExternalCheckpointManager
import ml.dmlc.xgboost4j.scala.{XGBoost => SXGBoost, _}
import ml.dmlc.xgboost4j.{LabeledPoint => XGBLabeledPoint}
@@ -38,21 +38,17 @@ import org.apache.spark.sql.SparkSession
/**
* Rabit tracker configurations.
*
- * @param workerConnectionTimeout The timeout for all workers to connect to the tracker.
- * Set timeout length to zero to disable timeout.
- * Use a finite, non-zero timeout value to prevent tracker from
- * hanging indefinitely (in milliseconds)
- * (supported by "scala" implementation only.)
- * @param hostIp The Rabit Tracker host IP address which is only used for python implementation.
+ * @param timeout The number of seconds before timeout waiting for workers to connect. and
+ * for the tracker to shutdown.
+ * @param hostIp The Rabit Tracker host IP address.
* This is only needed if the host IP cannot be automatically guessed.
- * @param pythonExec The python executed path for Rabit Tracker,
- * which is only used for python implementation.
+ * @param port The port number for the tracker to listen to. Use a system allocated one by
+ * default.
*/
-case class TrackerConf(workerConnectionTimeout: Long,
- hostIp: String = "", pythonExec: String = "")
+case class TrackerConf(timeout: Int, hostIp: String = "", port: Int = 0)
object TrackerConf {
- def apply(): TrackerConf = TrackerConf(0L)
+ def apply(): TrackerConf = TrackerConf(0)
}
private[scala] case class XGBoostExecutionInputParams(trainTestRatio: Double, seed: Long)
@@ -421,7 +417,7 @@ object XGBoost extends XGBoostStageLevel {
private def buildDistributedBooster(
buildWatches: () => Watches,
xgbExecutionParam: XGBoostExecutionParams,
- rabitEnv: java.util.Map[String, String],
+ rabitEnv: java.util.Map[String, Object],
obj: ObjectiveTrait,
eval: EvalTrait,
prevBooster: Booster): Iterator[(Booster, Map[String, Array[Float]])] = {
@@ -430,7 +426,6 @@ object XGBoost extends XGBoostStageLevel {
val taskId = TaskContext.getPartitionId().toString
val attempt = TaskContext.get().attemptNumber.toString
rabitEnv.put("DMLC_TASK_ID", taskId)
- rabitEnv.put("DMLC_NUM_ATTEMPT", attempt)
val numRounds = xgbExecutionParam.numRounds
val makeCheckpoint = xgbExecutionParam.checkpointParam.isDefined && taskId.toInt == 0
@@ -481,16 +476,15 @@ object XGBoost extends XGBoostStageLevel {
}
/** visiable for testing */
- private[scala] def getTracker(nWorkers: Int, trackerConf: TrackerConf): IRabitTracker = {
- val tracker: IRabitTracker = new PyRabitTracker(
- nWorkers, trackerConf.hostIp, trackerConf.pythonExec
- )
+ private[scala] def getTracker(nWorkers: Int, trackerConf: TrackerConf): ITracker = {
+ val tracker: ITracker = new RabitTracker(
+ nWorkers, trackerConf.hostIp, trackerConf.port, trackerConf.timeout)
tracker
}
- private def startTracker(nWorkers: Int, trackerConf: TrackerConf): IRabitTracker = {
+ private def startTracker(nWorkers: Int, trackerConf: TrackerConf): ITracker = {
val tracker = getTracker(nWorkers, trackerConf)
- require(tracker.start(trackerConf.workerConnectionTimeout), "FAULT: Failed to start tracker")
+ require(tracker.start(), "FAULT: Failed to start tracker")
tracker
}
@@ -525,8 +519,8 @@ object XGBoost extends XGBoostStageLevel {
// Train for every ${savingRound} rounds and save the partially completed booster
val tracker = startTracker(xgbExecParams.numWorkers, xgbExecParams.trackerConf)
val (booster, metrics) = try {
- tracker.getWorkerEnvs().putAll(xgbRabitParams)
- val rabitEnv = tracker.getWorkerEnvs
+ tracker.workerArgs().putAll(xgbRabitParams)
+ val rabitEnv = tracker.workerArgs
val boostersAndMetrics = trainingRDD.barrier().mapPartitions { iter => {
var optionWatches: Option[() => Watches] = None
@@ -548,11 +542,6 @@ object XGBoost extends XGBoostStageLevel {
// of the training task fails the training stage can retry. ResultStage won't retry when
// it fails.
val (booster, metrics) = boostersAndMetricsWithRes.repartition(1).collect()(0)
- val trackerReturnVal = tracker.waitFor(0L)
- logger.info(s"Rabit returns with exit code $trackerReturnVal")
- if (trackerReturnVal != 0) {
- throw new XGBoostError("XGBoostModel training failed.")
- }
(booster, metrics)
} finally {
tracker.stop()
diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GeneralParams.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GeneralParams.scala
index b85f4dc8b3ad..fafbd816a265 100644
--- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GeneralParams.scala
+++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/params/GeneralParams.scala
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -145,28 +145,28 @@ private[spark] trait GeneralParams extends Params {
* Rabit tracker configurations. The parameter must be provided as an instance of the
* TrackerConf class, which has the following definition:
*
- * case class TrackerConf(workerConnectionTimeout: Duration, trainingTimeout: Duration,
- * trackerImpl: String)
+ * case class TrackerConf(timeout: Int, hostIp: String, port: Int)
*
* See below for detailed explanations.
*
- * - trackerImpl: Select the implementation of Rabit tracker.
- * default: "python"
- *
- * Choice between "python" or "scala". The former utilizes the Java wrapper of the
- * Python Rabit tracker (in dmlc_core), and does not support timeout settings.
- * The "scala" version removes Python components, and fully supports timeout settings.
- *
- * - workerConnectionTimeout: the maximum wait time for all workers to connect to the tracker.
- * default: 0 millisecond (no timeout)
+ * - timeout : The maximum wait time for all workers to connect to the tracker. (in seconds)
+ * default: 0 (no timeout)
*
+ * Timeout for constructing the communication group and waiting for the tracker to
+ * shutdown when it's instructed to, doesn't apply to communication when tracking
+ * is running.
* The timeout value should take the time of data loading and pre-processing into account,
- * due to the lazy execution of Spark's operations. Alternatively, you may force Spark to
+ * due to potential lazy execution. Alternatively, you may force Spark to
* perform data transformation before calling XGBoost.train(), so that this timeout truly
* reflects the connection delay. Set a reasonable timeout value to prevent model
* training/testing from hanging indefinitely, possible due to network issues.
* Note that zero timeout value means to wait indefinitely (equivalent to Duration.Inf).
- * Ignored if the tracker implementation is "python".
+ *
+ * - hostIp : The Rabit Tracker host IP address. This is only needed if the host IP
+ * cannot be automatically guessed.
+ *
+ * - port : The port number for the tracker to listen to. Use a system allocated one by
+ * default.
*/
final val trackerConf = new TrackerConfParam(this, "trackerConf", "Rabit tracker configurations")
setDefault(trackerConf, TrackerConf())
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
index 5445cd1bf6a1..108053af5d76 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/CommunicatorRobustnessSuite.scala
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2014-2022 by Contributors
+ Copyright (c) 2014-2024 by Contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -20,8 +20,7 @@ import java.util.concurrent.LinkedBlockingDeque
import scala.util.Random
-import ml.dmlc.xgboost4j.java.{Communicator, RabitTracker => PyRabitTracker}
-import ml.dmlc.xgboost4j.java.IRabitTracker.TrackerStatus
+import ml.dmlc.xgboost4j.java.{Communicator, RabitTracker}
import ml.dmlc.xgboost4j.scala.DMatrix
import org.scalatest.funsuite.AnyFunSuite
@@ -33,50 +32,6 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
xgbParamsFactory.buildXGBRuntimeParams
}
- test("Customize host ip and python exec for Rabit tracker") {
- val hostIp = "192.168.22.111"
- val pythonExec = "/usr/bin/python3"
-
- val paramMap = Map(
- "num_workers" -> numWorkers,
- "tracker_conf" -> TrackerConf(0L, hostIp))
- val xgbExecParams = getXGBoostExecutionParams(paramMap)
- val tracker = XGBoost.getTracker(xgbExecParams.numWorkers, xgbExecParams.trackerConf)
- tracker match {
- case pyTracker: PyRabitTracker =>
- val cmd = pyTracker.getRabitTrackerCommand
- assert(cmd.contains(hostIp))
- assert(cmd.startsWith("python"))
- case _ => assert(false, "expected python tracker implementation")
- }
-
- val paramMap1 = Map(
- "num_workers" -> numWorkers,
- "tracker_conf" -> TrackerConf(0L, "", pythonExec))
- val xgbExecParams1 = getXGBoostExecutionParams(paramMap1)
- val tracker1 = XGBoost.getTracker(xgbExecParams1.numWorkers, xgbExecParams1.trackerConf)
- tracker1 match {
- case pyTracker: PyRabitTracker =>
- val cmd = pyTracker.getRabitTrackerCommand
- assert(cmd.startsWith(pythonExec))
- assert(!cmd.contains(hostIp))
- case _ => assert(false, "expected python tracker implementation")
- }
-
- val paramMap2 = Map(
- "num_workers" -> numWorkers,
- "tracker_conf" -> TrackerConf(0L, hostIp, pythonExec))
- val xgbExecParams2 = getXGBoostExecutionParams(paramMap2)
- val tracker2 = XGBoost.getTracker(xgbExecParams2.numWorkers, xgbExecParams2.trackerConf)
- tracker2 match {
- case pyTracker: PyRabitTracker =>
- val cmd = pyTracker.getRabitTrackerCommand
- assert(cmd.startsWith(pythonExec))
- assert(cmd.contains(s" --host-ip=${hostIp}"))
- case _ => assert(false, "expected python tracker implementation")
- }
- }
-
test("test Java RabitTracker wrapper's exception handling: it should not hang forever.") {
/*
Deliberately create new instances of SparkContext in each unit test to avoid reusing the
@@ -88,9 +43,9 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
*/
val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache()
- val tracker = new PyRabitTracker(numWorkers)
- tracker.start(0)
- val trackerEnvs = tracker.getWorkerEnvs
+ val tracker = new RabitTracker(numWorkers)
+ tracker.start()
+ val trackerEnvs = tracker. workerArgs
val workerCount: Int = numWorkers
/*
@@ -99,22 +54,8 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
thrown: the thread running the dummy spark job (sparkThread) catches the exception and
delegates it to the UnCaughtExceptionHandler, which is the Rabit tracker itself.
- The Java RabitTracker class reacts to exceptions by killing the spawned process running
- the Python tracker. If at least one Rabit worker has yet connected to the tracker before
- it is killed, the resulted connection failure will trigger the Rabit worker to call
- "exit(-1);" in the native C++ code, effectively ending the dummy Spark task.
-
- In cluster (standalone or YARN) mode of Spark, tasks are run in containers and thus are
- isolated from each other. That is, one task calling "exit(-1);" has no effect on other tasks
- running in separate containers. However, as unit tests are run in Spark local mode, in which
- tasks are executed by threads belonging to the same process, one thread calling "exit(-1);"
- ultimately kills the entire process, which also happens to host the Spark driver, causing
- the entire Spark application to crash.
-
To prevent unit tests from crashing, deterministic delays were introduced to make sure that
the exception is thrown at last, ideally after all worker connections have been established.
- For the same reason, the Java RabitTracker class delays the killing of the Python tracker
- process to ensure that pending worker connections are handled.
*/
val dummyTasks = rdd.mapPartitions { iter =>
Communicator.init(trackerEnvs)
@@ -137,7 +78,32 @@ class CommunicatorRobustnessSuite extends AnyFunSuite with PerTest {
sparkThread.setUncaughtExceptionHandler(tracker)
sparkThread.start()
- assert(tracker.waitFor(0) != 0)
+ }
+
+ test("Communicator allreduce works.") {
+ val rdd = sc.parallelize(1 to numWorkers, numWorkers).cache()
+ val tracker = new RabitTracker(numWorkers)
+ tracker.start()
+ val trackerEnvs = tracker.workerArgs
+
+ val workerCount: Int = numWorkers
+
+ rdd.mapPartitions { iter =>
+ val index = iter.next()
+ Communicator.init(trackerEnvs)
+ val a = Array(1.0f, 2.0f, 3.0f)
+ System.out.println(a.mkString(", "))
+ val b = Communicator.allReduce(a, Communicator.OpType.SUM)
+ for (i <- 0 to 2) {
+ assert(a(i) * workerCount == b(i))
+ }
+ val c = Communicator.allReduce(a, Communicator.OpType.MIN);
+ for (i <- 0 to 2) {
+ assert(a(i) == c(i))
+ }
+ Communicator.shutdown()
+ Iterator(index)
+ }.collect()
}
test("should allow the dataframe containing communicator calls to be partially evaluated for" +
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala
index f187f7394ffa..20a95f2a23e4 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/ParameterSuite.scala
@@ -23,7 +23,6 @@ import org.apache.spark.SparkException
import org.apache.spark.ml.param.ParamMap
class ParameterSuite extends AnyFunSuite with PerTest with BeforeAndAfterAll {
-
test("XGBoost and Spark parameters synchronize correctly") {
val xgbParamMap = Map("eta" -> "1", "objective" -> "binary:logistic",
"objective_type" -> "classification")
@@ -50,7 +49,6 @@ class ParameterSuite extends AnyFunSuite with PerTest with BeforeAndAfterAll {
intercept[SparkException] {
xgb.fit(trainingDF)
}
-
}
test("fail training elegantly with unsupported eval metrics") {
diff --git a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala
index 86b82e63ce33..136d39e8bc0f 100644
--- a/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala
+++ b/jvm-packages/xgboost4j-spark/src/test/scala/ml/dmlc/xgboost4j/scala/spark/XGBoostCommunicatorRegressionSuite.scala
@@ -47,11 +47,6 @@ class XGBoostCommunicatorRegressionSuite extends AnyFunSuite with PerTest {
val model2 = new XGBoostClassifier(xgbSettings ++ Map("rabit_ring_reduce_threshold" -> 1))
.fit(training)
- assert(Communicator.communicatorEnvs.asScala.size > 3)
- Communicator.communicatorEnvs.asScala.foreach( item => {
- if (item._1.toString == "rabit_reduce_ring_mincount") assert(item._2 == "1")
- })
-
val prediction2 = model2.transform(testDF).select("prediction").collect()
// check parity w/o rabit cache
prediction1.zip(prediction2).foreach { case (Row(p1: Double), Row(p2: Double)) =>
@@ -70,10 +65,6 @@ class XGBoostCommunicatorRegressionSuite extends AnyFunSuite with PerTest {
val model2 = new XGBoostRegressor(xgbSettings ++ Map("rabit_ring_reduce_threshold" -> 1)
).fit(training)
- assert(Communicator.communicatorEnvs.asScala.size > 3)
- Communicator.communicatorEnvs.asScala.foreach( item => {
- if (item._1.toString == "rabit_reduce_ring_mincount") assert(item._2 == "1")
- })
// check the equality of single instance prediction
val prediction2 = model2.transform(testDF).select("prediction").collect()
// check parity w/o rabit cache
@@ -81,25 +72,4 @@ class XGBoostCommunicatorRegressionSuite extends AnyFunSuite with PerTest {
assert(math.abs(p1 - p2) < predictionErrorMin)
}
}
-
- test("test rabit timeout fail handle") {
- val training = buildDataFrame(Classification.train)
- // mock rank 0 failure during 8th allreduce synchronization
- Communicator.mockList = Array("0,8,0,0").toList.asJava
-
- intercept[SparkException] {
- new XGBoostClassifier(Map(
- "eta" -> "0.1",
- "max_depth" -> "10",
- "verbosity" -> "1",
- "objective" -> "binary:logistic",
- "num_round" -> 5,
- "num_workers" -> numWorkers,
- "rabit_timeout" -> 0))
- .fit(training)
- }
-
- Communicator.mockList = Array.empty.toList.asJava
- }
-
}
diff --git a/jvm-packages/xgboost4j-tester/generate_pom.py b/jvm-packages/xgboost4j-tester/generate_pom.py
index b9c274c28a4d..ad729b3a64cb 100644
--- a/jvm-packages/xgboost4j-tester/generate_pom.py
+++ b/jvm-packages/xgboost4j-tester/generate_pom.py
@@ -22,7 +22,7 @@
{scala_version}
3.2.15
{scala_binary_version}
- 5.5.0
+ 5.6.0
@@ -51,6 +51,11 @@
commons-logging
1.2
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.14.2
+
org.scalatest
scalatest_${{scala.binary.version}}
diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml
index 7eb18691995b..5a83a400c50b 100644
--- a/jvm-packages/xgboost4j/pom.xml
+++ b/jvm-packages/xgboost4j/pom.xml
@@ -60,7 +60,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 3.6.2
+ 3.6.3
protected
true
@@ -76,7 +76,7 @@
exec-maven-plugin
org.codehaus.mojo
- 3.1.0
+ 3.2.0
native
@@ -99,7 +99,7 @@
org.apache.maven.plugins
maven-jar-plugin
- 3.3.0
+ 3.4.1
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Communicator.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Communicator.java
index 795e7d99e8fe..ee1bc7b4a5a9 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Communicator.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Communicator.java
@@ -7,6 +7,9 @@
import java.util.List;
import java.util.Map;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
/**
* Collective communicator global class for synchronization.
*
@@ -30,8 +33,9 @@ public int getOperand() {
}
public enum DataType implements Serializable {
- INT8(0, 1), UINT8(1, 1), INT32(2, 4), UINT32(3, 4),
- INT64(4, 8), UINT64(5, 8), FLOAT32(6, 4), FLOAT64(7, 8);
+ FLOAT16(0, 2), FLOAT32(1, 4), FLOAT64(2, 8),
+ INT8(4, 1), INT16(5, 2), INT32(6, 4), INT64(7, 8),
+ UINT8(8, 1), UINT16(9, 2), UINT32(10, 4), UINT64(11, 8);
private final int enumOp;
private final int size;
@@ -56,30 +60,20 @@ private static void checkCall(int ret) throws XGBoostError {
}
}
- // used as way to test/debug passed communicator init parameters
- public static Map communicatorEnvs;
- public static List mockList = new LinkedList<>();
-
/**
* Initialize the collective communicator on current working thread.
*
* @param envs The additional environment variables to pass to the communicator.
* @throws XGBoostError
*/
- public static void init(Map envs) throws XGBoostError {
- communicatorEnvs = envs;
- String[] args = new String[envs.size() * 2 + mockList.size() * 2];
- int idx = 0;
- for (java.util.Map.Entry e : envs.entrySet()) {
- args[idx++] = e.getKey();
- args[idx++] = e.getValue();
- }
- // pass list of rabit mock strings eg mock=0,1,0,0
- for (String mock : mockList) {
- args[idx++] = "mock";
- args[idx++] = mock;
+ public static void init(Map envs) throws XGBoostError {
+ ObjectMapper mapper = new ObjectMapper();
+ try {
+ String jconfig = mapper.writeValueAsString(envs);
+ checkCall(XGBoostJNI.CommunicatorInit(jconfig));
+ } catch (JsonProcessingException ex) {
+ throw new XGBoostError("Failed to read arguments for the communicator.", ex);
}
- checkCall(XGBoostJNI.CommunicatorInit(args));
}
/**
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IRabitTracker.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
similarity index 56%
rename from jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IRabitTracker.java
rename to jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
index 984fb80e6dd8..1bfef677d45c 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/IRabitTracker.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/ITracker.java
@@ -1,14 +1,13 @@
package ml.dmlc.xgboost4j.java;
import java.util.Map;
-import java.util.concurrent.TimeUnit;
/**
- * Interface for Rabit tracker implementations with three public methods:
+ * Interface for a tracker implementations with three public methods:
*
- * - start(timeout): Start the Rabit tracker awaiting for worker connections, with a given
- * timeout value (in milliseconds.)
- * - getWorkerEnvs(): Return the environment variables needed to initialize Rabit clients.
+ * - start(timeout): Start the tracker awaiting for worker connections, with a given
+ * timeout value (in seconds).
+ * - workerArgs(): Return the arguments needed to initialize Rabit clients.
* - waitFor(timeout): Wait for the task execution by the worker nodes for at most `timeout`
* milliseconds.
*
@@ -21,7 +20,7 @@
* The Rabit tracker handles connections from distributed workers, assigns ranks to workers, and
* brokers connections between workers.
*/
-public interface IRabitTracker extends Thread.UncaughtExceptionHandler {
+public interface ITracker extends Thread.UncaughtExceptionHandler {
enum TrackerStatus {
SUCCESS(0), INTERRUPTED(1), TIMEOUT(2), FAILURE(3);
@@ -36,9 +35,11 @@ public int getStatusCode() {
}
}
- Map getWorkerEnvs();
- boolean start(long workerConnectionTimeout);
- void stop();
- // taskExecutionTimeout has no effect in current version of XGBoost.
- int waitFor(long taskExecutionTimeout);
+ Map workerArgs() throws XGBoostError;
+
+ boolean start() throws XGBoostError;
+
+ void stop() throws XGBoostError;
+
+ void waitFor(long taskExecutionTimeout) throws XGBoostError;
}
diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
index 0a05b3de0d7f..914a493cc8d1 100644
--- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
+++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/RabitTracker.java
@@ -1,101 +1,40 @@
package ml.dmlc.xgboost4j.java;
-import java.io.*;
-import java.util.HashMap;
import java.util.Map;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicReference;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Java implementation of the Rabit tracker to coordinate distributed workers.
- * As a wrapper of the Python Rabit tracker, this implementation does not handle timeout for both
- * start() and waitFor() methods (i.e., the timeout is infinite.)
- *
- * For systems lacking Python environment, or for timeout functionality, consider using the Scala
- * Rabit tracker (ml.dmlc.xgboost4j.scala.rabit.RabitTracker) which does not depend on Python, and
- * provides timeout support.
*
* The tracker must be started on driver node before running distributed jobs.
*/
-public class RabitTracker implements IRabitTracker {
+public class RabitTracker implements ITracker {
// Maybe per tracker logger?
private static final Log logger = LogFactory.getLog(RabitTracker.class);
- // tracker python file.
- private static String tracker_py = null;
- private static TrackerProperties trackerProperties = TrackerProperties.getInstance();
- // environment variable to be pased.
- private Map envs = new HashMap();
- // number of workers to be submitted.
- private int numWorkers;
- private String hostIp = "";
- private String pythonExec = "";
- private AtomicReference trackerProcess = new AtomicReference();
-
- static {
- try {
- initTrackerPy();
- } catch (IOException ex) {
- logger.error("load tracker library failed.");
- logger.error(ex);
- }
- }
-
- /**
- * Tracker logger that logs output from tracker.
- */
- private class TrackerProcessLogger implements Runnable {
- public void run() {
-
- Log trackerProcessLogger = LogFactory.getLog(TrackerProcessLogger.class);
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- trackerProcess.get().getErrorStream()));
- String line;
- try {
- while ((line = reader.readLine()) != null) {
- trackerProcessLogger.info(line);
- }
- trackerProcess.get().waitFor();
- int exitValue = trackerProcess.get().exitValue();
- if (exitValue != 0) {
- trackerProcessLogger.error("Tracker Process ends with exit code " + exitValue);
- } else {
- trackerProcessLogger.info("Tracker Process ends with exit code " + exitValue);
- }
- } catch (IOException ex) {
- trackerProcessLogger.error(ex.toString());
- } catch (InterruptedException ie) {
- // we should not get here as RabitTracker is accessed in the main thread
- ie.printStackTrace();
- logger.error("the RabitTracker thread is terminated unexpectedly");
- }
- }
- }
+ private long handle = 0;
+ private Thread tracker_daemon;
- private static void initTrackerPy() throws IOException {
- try {
- tracker_py = NativeLibLoader.createTempFileFromResource("/tracker.py");
- } catch (IOException ioe) {
- logger.trace("cannot access tracker python script");
- throw ioe;
- }
+ public RabitTracker(int numWorkers) throws XGBoostError {
+ this(numWorkers, "");
}
- public RabitTracker(int numWorkers)
+ public RabitTracker(int numWorkers, String hostIp)
throws XGBoostError {
+ this(numWorkers, hostIp, 0, 300);
+ }
+ public RabitTracker(int numWorkers, String hostIp, int port, int timeout) throws XGBoostError {
if (numWorkers < 1) {
throw new XGBoostError("numWorkers must be greater equal to one");
}
- this.numWorkers = numWorkers;
- }
- public RabitTracker(int numWorkers, String hostIp, String pythonExec)
- throws XGBoostError {
- this(numWorkers);
- this.hostIp = hostIp;
- this.pythonExec = pythonExec;
+ long[] out = new long[1];
+ XGBoostJNI.checkCall(XGBoostJNI.TrackerCreate(hostIp, numWorkers, port, 0, timeout, out));
+ this.handle = out[0];
}
public void uncaughtException(Thread t, Throwable e) {
@@ -105,7 +44,7 @@ public void uncaughtException(Thread t, Throwable e) {
} catch (InterruptedException ex) {
logger.error(ex);
} finally {
- trackerProcess.get().destroy();
+ this.tracker_daemon.interrupt();
}
}
@@ -113,115 +52,43 @@ public void uncaughtException(Thread t, Throwable e) {
* Get environments that can be used to pass to worker.
* @return The environment settings.
*/
- public Map getWorkerEnvs() {
- return envs;
- }
-
- private void loadEnvs(InputStream ins) throws IOException {
- try {
- BufferedReader reader = new BufferedReader(new InputStreamReader(ins));
- assert reader.readLine().trim().equals("DMLC_TRACKER_ENV_START");
- String line;
- while ((line = reader.readLine()) != null) {
- if (line.trim().equals("DMLC_TRACKER_ENV_END")) {
- break;
- }
- String[] sep = line.split("=");
- if (sep.length == 2) {
- envs.put(sep[0], sep[1]);
- }
- }
- reader.close();
- } catch (IOException ioe){
- logger.error("cannot get runtime configuration from tracker process");
- ioe.printStackTrace();
- throw ioe;
- }
- }
-
- /** visible for testing */
- public String getRabitTrackerCommand() {
- StringBuilder sb = new StringBuilder();
- if (pythonExec == null || pythonExec.isEmpty()) {
- sb.append("python ");
- } else {
- sb.append(pythonExec + " ");
- }
- sb.append(" " + tracker_py + " ");
- sb.append(" --log-level=DEBUG" + " ");
- sb.append(" --num-workers=" + numWorkers + " ");
-
- // we first check the property then check the parameter
- String hostIpFromProperties = trackerProperties.getHostIp();
- if(hostIpFromProperties != null && !hostIpFromProperties.isEmpty()) {
- logger.debug("Using provided host-ip: " + hostIpFromProperties + " from properties");
- sb.append(" --host-ip=" + hostIpFromProperties + " ");
- } else if (hostIp != null & !hostIp.isEmpty()) {
- logger.debug("Using the parametr host-ip: " + hostIp);
- sb.append(" --host-ip=" + hostIp + " ");
- }
- return sb.toString();
- }
-
- private boolean startTrackerProcess() {
+ public Map workerArgs() throws XGBoostError {
+ // fixme: timeout
+ String[] args = new String[1];
+ XGBoostJNI.checkCall(XGBoostJNI.TrackerWorkerArgs(this.handle, 0, args));
+ ObjectMapper mapper = new ObjectMapper();
+ TypeReference