diff --git a/docs/Parameters.rst b/docs/Parameters.rst index 8b3bd610d80f..6a5729f7e8d5 100644 --- a/docs/Parameters.rst +++ b/docs/Parameters.rst @@ -1269,6 +1269,8 @@ The initial score file corresponds with data file line by line, and has per scor And if the name of data file is ``train.txt``, the initial score file should be named as ``train.txt.init`` and placed in the same folder as the data file. In this case, LightGBM will auto load initial score file if it exists. +If binary data files exist for raw data file ``train.txt``, for example in the name ``train.txt.bin``, then the initial score file should be named as ``train.txt.bin.init``. + Weight Data ~~~~~~~~~~~ diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index 051b60c6a942..e94eefb979e3 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -147,6 +147,9 @@ class Metadata { queries_[idx] = static_cast(value); } + /*! \brief Load initial scores from file */ + void LoadInitialScore(const std::string& data_filename); + /*! * \brief Get weights, if not exists, will return nullptr * \return Pointer of weights @@ -223,8 +226,6 @@ class Metadata { #endif // USE_CUDA_EXP private: - /*! \brief Load initial scores from file */ - void LoadInitialScore(); /*! \brief Load wights from file */ void LoadWeights(); /*! \brief Load query boundaries from file */ diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 5ff39859d4b4..a4e22956cbeb 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -272,6 +272,11 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac is_load_from_binary = true; Log::Info("Load from binary file %s", bin_filename.c_str()); dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), rank, num_machines, &num_global_data, &used_data_indices)); + + // checks whether there's a initial score file when loaded from binary data files + // the intial score file should with suffix ".bin.init" + dataset->metadata_.LoadInitialScore(bin_filename); + dataset->device_type_ = config_.device_type; dataset->gpu_device_id_ = config_.gpu_device_id; #ifdef USE_CUDA_EXP @@ -338,6 +343,9 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename, } else { // load data from binary file dataset.reset(LoadFromBinFile(filename, bin_filename.c_str(), 0, 1, &num_global_data, &used_data_indices)); + // checks whether there's a initial score file when loaded from binary data files + // the intial score file should with suffix ".bin.init" + dataset->metadata_.LoadInitialScore(bin_filename); } // not need to check validation data // check meta data diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp index edd001ee2fa1..185487629022 100644 --- a/src/io/metadata.cpp +++ b/src/io/metadata.cpp @@ -29,7 +29,7 @@ void Metadata::Init(const char* data_filename) { LoadQueryBoundaries(); LoadWeights(); LoadQueryWeights(); - LoadInitialScore(); + LoadInitialScore(data_filename_); } Metadata::~Metadata() { @@ -418,10 +418,10 @@ void Metadata::LoadWeights() { weight_load_from_file_ = true; } -void Metadata::LoadInitialScore() { +void Metadata::LoadInitialScore(const std::string& data_filename) { num_init_score_ = 0; - std::string init_score_filename(data_filename_); - init_score_filename = std::string(data_filename_); + std::string init_score_filename(data_filename); + init_score_filename = std::string(data_filename); // default init_score file name init_score_filename.append(".init"); TextReader reader(init_score_filename.c_str(), false);