From fdc582ea6ba13faf15ee6707c7c7542790c8821d Mon Sep 17 00:00:00 2001 From: James Lamb Date: Sat, 24 Jul 2021 23:05:48 -0500 Subject: [PATCH] [docs] document CLI behavior when label_column is omitted (#4485) --- docs/Parameters.rst | 2 ++ examples/binary_classification/train.conf | 3 +++ examples/lambdarank/train.conf | 3 +++ examples/multiclass_classification/train.conf | 3 +++ examples/parallel_learning/train.conf | 3 +++ examples/regression/train.conf | 3 +++ examples/xendcg/train.conf | 3 +++ include/LightGBM/config.h | 1 + 8 files changed, 21 insertions(+) diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 793d3f5bec89..637ab8c6d549 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -762,6 +762,8 @@ Dataset Parameters - add a prefix ``name:`` for column name, e.g. ``label=name:is_click`` + - if omitted, the first column in the training data is used as the label + - **Note**: works only in case of loading data directly from file - ``weight_column`` :raw-html:`🔗︎`, default = ``""``, type = int or string, aliases: ``weight`` diff --git a/examples/binary_classification/train.conf b/examples/binary_classification/train.conf index 11f07bf9d8c0..f1ae6af54c1f 100644 --- a/examples/binary_classification/train.conf +++ b/examples/binary_classification/train.conf @@ -26,6 +26,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/examples/lambdarank/train.conf b/examples/lambdarank/train.conf index 5f51115eaa2b..d2b7825be7ec 100644 --- a/examples/lambdarank/train.conf +++ b/examples/lambdarank/train.conf @@ -29,6 +29,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/examples/multiclass_classification/train.conf b/examples/multiclass_classification/train.conf index 05ed99262755..8d2bf28733b5 100644 --- a/examples/multiclass_classification/train.conf +++ b/examples/multiclass_classification/train.conf @@ -41,6 +41,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/examples/parallel_learning/train.conf b/examples/parallel_learning/train.conf index e9792c4673e6..3b40778fa50f 100644 --- a/examples/parallel_learning/train.conf +++ b/examples/parallel_learning/train.conf @@ -26,6 +26,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/examples/regression/train.conf b/examples/regression/train.conf index 7fac419a5ba4..ce25d0ecae47 100644 --- a/examples/regression/train.conf +++ b/examples/regression/train.conf @@ -26,6 +26,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/examples/xendcg/train.conf b/examples/xendcg/train.conf index 6840ae4dbab9..0dafb931d319 100644 --- a/examples/xendcg/train.conf +++ b/examples/xendcg/train.conf @@ -29,6 +29,9 @@ metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true +# column in data to use as label +label_column = 0 + # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 62cd6db9b41e..38ea18df60dd 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -657,6 +657,7 @@ struct Config { // desc = used to specify the label column // desc = use number for index, e.g. ``label=0`` means column\_0 is the label // desc = add a prefix ``name:`` for column name, e.g. ``label=name:is_click`` + // desc = if omitted, the first column in the training data is used as the label // desc = **Note**: works only in case of loading data directly from file std::string label_column = "";