Skip to content

Commit

Permalink
Implement multi-target for hist.
Browse files Browse the repository at this point in the history
Initial commit.

Predictor.

Compile.

fixes.

Cleanup.

Moving code around.

Start working on cat features.

Start working on model IO.

Fix.

Revert.

cleanup.

Rebase.

Reverse cleanup.

rename.

Fix rebase.

small cleanup.

inc

Merge it into reg tree.

Strategy.

Extract the cat matrix.

Use array in predictor.

Use array in scalar.

Merge two kernels.

QDM.

inplace predict.

cleanup.

naming.

cleanup.

cleanup.

sampler.

copy.

cleanup.

compile test.

Hide the tree.

Hide from the partitioner.

Hide init root.

layer to trees.

check.

Remove old sampling func.

leaf partition.

use linalg.

remove grad stats.

ro5

reverse.

Don't support prediction cache for now.

col sampler.

Cleanup.

Cleanup.

Cleanup histogram.

t

Cleanup evaluation.

ic.

Cleanup.

start working on io.

is valid.

basic io.

dispatch.

Basic IO.

Cleanup node sum.

cleanup.

Extract the updater.

Merge it into quantile hist.

cleanup.

Cleanup.

restore checks.

Cleanup.

remove num_target.

fix tests.

Fix.

fixes.

Type deduction.

R package.

Predict leaf.

Predict leaf.

cleanup.

Add a test to sampling.

check.

cleanup.

cleanup.

parallel.

Cleanup

Fix root.

column-major.

fewer right.

Cleanup.

Initial work on merging the updaters.

Fix.

Merge update tree.

Consistent naming.

HD.

Unify sampling.

Fix build.

Fix build.

CUDA build.

Fix GPU SHAP tests.

fix.
  • Loading branch information
trivialfis committed Jan 19, 2023
1 parent 26c9882 commit 6ad1e57
Show file tree
Hide file tree
Showing 65 changed files with 2,561 additions and 1,249 deletions.
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/fit_stump.o \
$(PKGROOT)/src/tree/tree_model.o \
$(PKGROOT)/src/tree/tree_updater.o \
$(PKGROOT)/src/tree/multi_target_tree_model.o \
$(PKGROOT)/src/tree/updater_approx.o \
$(PKGROOT)/src/tree/updater_colmaker.o \
$(PKGROOT)/src/tree/updater_prune.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ OBJECTS= \
$(PKGROOT)/src/tree/param.o \
$(PKGROOT)/src/tree/fit_stump.o \
$(PKGROOT)/src/tree/tree_model.o \
$(PKGROOT)/src/tree/multi_target_tree_model.o \
$(PKGROOT)/src/tree/tree_updater.o \
$(PKGROOT)/src/tree/updater_approx.o \
$(PKGROOT)/src/tree/updater_colmaker.o \
Expand Down
17 changes: 14 additions & 3 deletions demo/guide-python/multioutput_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,19 @@ def rmse_model(plot_result: bool):
"""Draw a circle with 2-dim coordinate as target variables."""
X, y = gen_circle()
# Train a regressor on it
reg = xgb.XGBRegressor(tree_method="hist", n_estimators=64)
reg = xgb.XGBRegressor(
tree_method="hist",
n_estimators=16,
n_jobs=16,
max_depth=8,
multi_strategy="mono",
subsample=0.6,
)
reg.fit(X, y, eval_set=[(X, y)])
# reg.save_model("model.json")

y_predt = reg.predict(X)
# print("y_predt:", y_predt, y)
if plot_result:
plot_predt(y, y_predt, "multi")

Expand Down Expand Up @@ -81,13 +90,15 @@ def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:
X, y = gen_circle()
Xy = xgb.DMatrix(X, y)
results: Dict[str, Dict[str, List[float]]] = {}
# Make sure the `num_target` is passed to XGBoost when custom objective is used.
# Make sure the `num_class` is passed to XGBoost when custom objective is used.
# When builtin objective is used, XGBoost can figure out the number of targets
# automatically.
booster = xgb.train(
{
"tree_method": "hist",
"num_target": y.shape[1],
"num_class": y.shape[1],
"multi_strategy": "mono",
"objective": "reg:squarederror", # fixme
},
dtrain=Xy,
num_boost_round=100,
Expand Down
30 changes: 17 additions & 13 deletions include/xgboost/base.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*!
* Copyright (c) 2015 by Contributors
* Copyright (c) 2015-2023 by Contributors
* \file base.h
* \brief defines configuration macros of xgboost.
*/
Expand All @@ -10,6 +10,7 @@
#include <dmlc/omp.h>
#include <cmath>
#include <iostream>
#include <type_traits>
#include <vector>
#include <string>
#include <utility>
Expand Down Expand Up @@ -110,19 +111,19 @@
namespace xgboost {

/*! \brief unsigned integer type used for feature index. */
using bst_uint = uint32_t; // NOLINT
using bst_uint = std::uint32_t; // NOLINT
/*! \brief integer type. */
using bst_int = int32_t; // NOLINT
using bst_int = std::int32_t; // NOLINT
/*! \brief unsigned long integers */
using bst_ulong = uint64_t; // NOLINT
using bst_ulong = std::uint64_t; // NOLINT
/*! \brief float type, used for storing statistics */
using bst_float = float; // NOLINT
/*! \brief Categorical value type. */
using bst_cat_t = int32_t; // NOLINT
using bst_cat_t = std::int32_t; // NOLINT
/*! \brief Type for data column (feature) index. */
using bst_feature_t = uint32_t; // NOLINT
using bst_feature_t = std::uint32_t; // NOLINT
/*! \brief Type for histogram bin index. */
using bst_bin_t = int32_t; // NOLINT
using bst_bin_t = std::int32_t; // NOLINT
/*! \brief Type for data row index.
*
* Be careful `std::size_t' is implementation-defined. Meaning that the binary
Expand All @@ -131,11 +132,11 @@ using bst_bin_t = int32_t; // NOLINT
*/
using bst_row_t = std::size_t; // NOLINT
/*! \brief Type for tree node index. */
using bst_node_t = int32_t; // NOLINT
using bst_node_t = std::int32_t; // NOLINT
/*! \brief Type for ranking group index. */
using bst_group_t = uint32_t; // NOLINT
/*! \brief Type for indexing target variables. */
using bst_target_t = std::size_t; // NOLINT
using bst_group_t = std::uint32_t; // NOLINT
/*! \brief Type for indexing into output targets. */
using bst_target_t = std::uint32_t; // NOLINT

namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation
Expand Down Expand Up @@ -171,11 +172,14 @@ class GradientPairInternal {
}

// Copy constructor if of same value type, marked as default to be trivially_copyable
GradientPairInternal(const GradientPairInternal<T> &g) = default;
GradientPairInternal(GradientPairInternal const &g) = default;
GradientPairInternal(GradientPairInternal &&g) = default;
GradientPairInternal &operator=(GradientPairInternal const &that) = default;
GradientPairInternal &operator=(GradientPairInternal &&that) = default;

// Copy constructor if different value type - use getters and setters to
// perform conversion
template <typename T2>
template <typename T2, std::enable_if_t<!std::is_same<T, T2>::value>* = nullptr>
XGBOOST_DEVICE explicit GradientPairInternal(const GradientPairInternal<T2> &g) {
SetGrad(g.GetGrad());
SetHess(g.GetHess());
Expand Down
33 changes: 26 additions & 7 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <xgboost/predictor.h>
#include <xgboost/task.h>

#include <cstddef>
#include <map>
#include <memory>
#include <string>
Expand Down Expand Up @@ -162,6 +163,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
*/
virtual int32_t BoostedRounds() const = 0;
virtual uint32_t Groups() const = 0;
virtual bst_target_t Targets() const = 0;

void LoadModel(Json const& in) override = 0;
void SaveModel(Json* out) const override = 0;
Expand Down Expand Up @@ -305,11 +307,21 @@ struct LearnerModelParam {
linalg::Tensor<float, 1> base_score_;

public:
/* \brief number of features */
uint32_t num_feature { 0 };
/* \brief number of classes, if it is multi-class classification */
uint32_t num_output_group { 0 };
/* \brief Current task, determined by objective. */
/**
* \brief The number of features.
*/
bst_feature_t num_feature{0};
/**
* \brief The number of classes or targets if the current strategy is composite.
*/
uint32_t num_output_group{0};
/**
* \brief The number of output targets.
*/
bst_target_t num_target{0};
/**
* \brief Current task, determined by objective.
*/
ObjInfo task{ObjInfo::kRegression};

LearnerModelParam() = default;
Expand All @@ -319,13 +331,20 @@ struct LearnerModelParam {
linalg::Tensor<float, 1> base_margin, ObjInfo t);
LearnerModelParam(LearnerModelParamLegacy const& user_param, ObjInfo t);
LearnerModelParam(bst_feature_t n_features, linalg::Tensor<float, 1> base_margin,
uint32_t n_groups)
: base_score_{std::move(base_margin)}, num_feature{n_features}, num_output_group{n_groups} {}
uint32_t n_groups, bst_target_t n_targets)
: base_score_{std::move(base_margin)},
num_feature{n_features},
num_output_group{n_groups},
num_target{n_targets} {}

linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
linalg::TensorView<float const, 1> BaseScore(int32_t device) const;

void Copy(LearnerModelParam const& that);
bool IsVectorLeaf() const { return num_output_group == 1 && num_target > 1; }
bst_target_t OutputLength() const {
return this->IsVectorLeaf() ? this->num_target : this->num_output_group;
}

/* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
Expand Down
Loading

0 comments on commit 6ad1e57

Please sign in to comment.