Skip to content

Commit

Permalink
[MT-TREE] Support prediction cache and model slicing. (#8968)
Browse files Browse the repository at this point in the history
- Fix prediction range.
- Support prediction cache in mt-hist.
- Support model slicing.
- Make the booster a Python iterable by defining `__iter__`.
- Cleanup removed/deprecated parameters.
- A new field in the output model `iteration_indptr` for pointing to the ranges of trees for each iteration.
  • Loading branch information
trivialfis authored Mar 27, 2023
1 parent c2b3a13 commit acc110c
Show file tree
Hide file tree
Showing 30 changed files with 502 additions and 343 deletions.
3 changes: 0 additions & 3 deletions demo/json-model/json_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,6 @@ def __init__(self, model: dict) -> None:

# Load the trees
self.num_trees = int(model_shape["num_trees"])
self.leaf_size = int(model_shape["size_leaf_vector"])
# Right now XGBoost doesn't support vector leaf yet
assert self.leaf_size == 0, str(self.leaf_size)

trees: List[Tree] = []
for i in range(self.num_trees):
Expand Down
23 changes: 2 additions & 21 deletions doc/model.schema
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,7 @@
"type": "object",
"properties": {
"tree_param": {
"type": "object",
"properties": {
"num_nodes": {
"type": "string"
},
"size_leaf_vector": {
"type": "string"
},
"num_feature": {
"type": "string"
}
},
"required": [
"num_nodes",
"num_feature",
"size_leaf_vector"
]
"$ref": "#/definitions/tree_param"
},
"id": {
"type": "integer"
Expand Down Expand Up @@ -170,14 +154,11 @@
},
"num_parallel_tree": {
"type": "string"
},
"size_leaf_vector": {
"type": "string"
}
},
"required": [
"num_trees",
"size_leaf_vector"
"num_parallel_tree"
]
},
"tree_param": {
Expand Down
12 changes: 11 additions & 1 deletion include/xgboost/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,18 @@ using bst_row_t = std::size_t; // NOLINT
using bst_node_t = std::int32_t; // NOLINT
/*! \brief Type for ranking group index. */
using bst_group_t = std::uint32_t; // NOLINT
/*! \brief Type for indexing into output targets. */
/**
* \brief Type for indexing into output targets.
*/
using bst_target_t = std::uint32_t; // NOLINT
/**
* brief Type for indexing boosted layers.
*/
using bst_layer_t = std::int32_t; // NOLINT
/**
* \brief Type for indexing trees.
*/
using bst_tree_t = std::int32_t; // NOLINT

namespace detail {
/*! \brief Implementation of gradient statistics pair. Template specialisation
Expand Down
49 changes: 23 additions & 26 deletions include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,16 @@ class GradientBooster : public Model, public Configurable {
* \param fo output stream
*/
virtual void Save(dmlc::Stream* fo) const = 0;
/*!
/**
* \brief Slice a model using boosting index. The slice m:n indicates taking all trees
* that were fit during the boosting rounds m, (m+1), (m+2), ..., (n-1).
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param out Output gradient booster
* \param begin Beginning of boosted tree layer used for prediction.
* \param end End of booster layer. 0 means do not limit trees.
* \param out Output gradient booster
*/
virtual void Slice(int32_t /*layer_begin*/, int32_t /*layer_end*/, int32_t /*step*/,
virtual void Slice(bst_layer_t /*begin*/, bst_layer_t /*end*/, bst_layer_t /*step*/,
GradientBooster* /*out*/, bool* /*out_of_bound*/) const {
LOG(FATAL) << "Slice is not supported by current booster.";
LOG(FATAL) << "Slice is not supported by the current booster.";
}
/*! \brief Return number of boosted rounds.
*/
Expand All @@ -88,34 +88,31 @@ class GradientBooster : public Model, public Configurable {
virtual void DoBoost(DMatrix* p_fmat, HostDeviceVector<GradientPair>* in_gpair,
PredictionCacheEntry*, ObjFunction const* obj) = 0;

/*!
* \brief generate predictions for given feature matrix
* \param dmat feature matrix
/**
* \brief Generate predictions for given feature matrix
*
* \param dmat The feature matrix.
* \param out_preds output vector to hold the predictions
* \param training Whether the prediction value is used for training. For dart booster
* drop out is performed during training.
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param begin Beginning of boosted tree layer used for prediction.
* \param end End of booster layer. 0 means do not limit trees.
*/
virtual void PredictBatch(DMatrix* dmat,
PredictionCacheEntry* out_preds,
bool training,
unsigned layer_begin,
unsigned layer_end) = 0;
virtual void PredictBatch(DMatrix* dmat, PredictionCacheEntry* out_preds, bool training,
bst_layer_t begin, bst_layer_t end) = 0;

/*!
/**
* \brief Inplace prediction.
*
* \param p_fmat A proxy DMatrix that contains the data and related
* meta info.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param layer_begin (Optional) Beginning of boosted tree layer used for prediction.
* \param layer_end (Optional) End of booster layer. 0 means do not limit trees.
* \param p_fmat A proxy DMatrix that contains the data and related.
* \param missing Missing value in the data.
* \param [in,out] out_preds The output preds.
* \param begin (Optional) Beginning of boosted tree layer used for prediction.
* \param end (Optional) End of booster layer. 0 means do not limit trees.
*/
virtual void InplacePredict(std::shared_ptr<DMatrix>, float, PredictionCacheEntry*, uint32_t,
uint32_t) const {
LOG(FATAL) << "Inplace predict is not supported by current booster.";
virtual void InplacePredict(std::shared_ptr<DMatrix>, float, PredictionCacheEntry*, bst_layer_t,
bst_layer_t) const {
LOG(FATAL) << "Inplace predict is not supported by the current booster.";
}
/*!
* \brief online prediction function, predict score for one instance at a time
Expand Down
8 changes: 4 additions & 4 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define XGBOOST_LEARNER_H_

#include <dmlc/io.h> // for Serializable
#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair
#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
#include <xgboost/context.h> // for Context
#include <xgboost/linalg.h> // for Tensor, TensorView
#include <xgboost/metric.h> // for Metric
Expand Down Expand Up @@ -229,7 +229,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
*/
virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;

/*!
/**
* \brief Slice the model.
*
* See InplacePredict for layer parameters.
Expand All @@ -239,8 +239,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
*
* \return a sliced model.
*/
virtual Learner *Slice(int32_t begin_layer, int32_t end_layer, int32_t step,
bool *out_of_bound) = 0;
virtual Learner* Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step,
bool* out_of_bound) = 0;
/*!
* \brief dump the model in the requested format
* \param fmap feature map that may help give interpretations of feature
Expand Down
4 changes: 2 additions & 2 deletions include/xgboost/tree_updater.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ class TreeUpdater : public Configurable {
* the prediction cache. If true, the prediction cache will have been
* updated by the time this function returns.
*/
virtual bool UpdatePredictionCache(const DMatrix * /*data*/,
linalg::VectorView<float> /*out_preds*/) {
virtual bool UpdatePredictionCache(const DMatrix* /*data*/,
linalg::MatrixView<float> /*out_preds*/) {
return false;
}

Expand Down
5 changes: 5 additions & 0 deletions python-package/xgboost/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Any,
Callable,
Dict,
Generator,
Iterable,
List,
Optional,
Expand Down Expand Up @@ -1756,6 +1757,10 @@ def __getitem__(self, val: Union[int, tuple, slice]) -> "Booster":
sliced.handle = sliced_handle
return sliced

def __iter__(self) -> Generator["Booster", None, None]:
for i in range(0, self.num_boosted_rounds()):
yield self[i]

def save_config(self) -> str:
"""Output internal parameter configuration of Booster as a JSON
string.
Expand Down
2 changes: 1 addition & 1 deletion src/gbm/gblinear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class GBLinear : public GradientBooster {
}

void PredictBatch(DMatrix* p_fmat, PredictionCacheEntry* predts, bool /*training*/,
uint32_t layer_begin, uint32_t) override {
bst_layer_t layer_begin, bst_layer_t) override {
monitor_.Start("PredictBatch");
LinearCheckLayer(layer_begin);
auto* out_preds = &predts->predictions;
Expand Down
Loading

0 comments on commit acc110c

Please sign in to comment.