From 67cc4c9c2ed2d536195c34bf232efe737afb046b Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Sun, 27 Dec 2020 01:51:08 +0300
Subject: [PATCH 01/42] add link to optuna examples (#3680)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4ca7640f4514..0a9cc08b9afb 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ Next you may want to read:
 - [**Parameters**](https://github.com/microsoft/LightGBM/blob/master/docs/Parameters.rst) is an exhaustive list of customization you can make.
 - [**Parallel Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/Parallel-Learning-Guide.rst) and [**GPU Learning**](https://github.com/microsoft/LightGBM/blob/master/docs/GPU-Tutorial.rst) can speed up computation.
 - [**Laurae++ interactive documentation**](https://sites.google.com/view/lauraepp/parameters) is a detailed guide for hyperparameters.
-- [**Optuna Hyperparameter Tuner**](https://medium.com/optuna/lightgbm-tuner-new-optuna-integration-for-hyperparameter-optimization-8b7095e99258) provides automated tuning for LightGBM hyperparameters.
+- [**Optuna Hyperparameter Tuner**](https://medium.com/optuna/lightgbm-tuner-new-optuna-integration-for-hyperparameter-optimization-8b7095e99258) provides automated tuning for LightGBM hyperparameters ([code examples](https://github.com/optuna/optuna/blob/master/examples/)).
 
 Documentation for contributors:
 

From e95468cbdc5d0732b33f5ffcd61124cb2965d129 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Mon, 28 Dec 2020 07:02:51 +0300
Subject: [PATCH 02/42] [ci] speed up files downloading for Windows CI (#3682)

* speed up files downloading for Windows CI

* run CI

* revert changing CI branch
---
 .ci/install_opencl.ps1         | 31 ++++++++++++-------------------
 .ci/test_r_package_windows.ps1 |  1 +
 .ci/test_windows.ps1           |  1 +
 .vsts-ci.yml                   |  7 -------
 4 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/.ci/install_opencl.ps1 b/.ci/install_opencl.ps1
index f6d4e54ebecf..fcf87cf88406 100644
--- a/.ci/install_opencl.ps1
+++ b/.ci/install_opencl.ps1
@@ -1,30 +1,23 @@
 Write-Output "Installing OpenCL CPU platform"
 
-$cache = "$env:PIPELINE_WORKSPACE\opencl_windows-amd_cpu-v3_0_130_135"
 $installer = "AMD-APP-SDKInstaller-v3.0.130.135-GA-windows-F-x64.exe"
 
-if ($env:OPENCL_INSTALLER_FOUND -ne 'true') {
-  # Pipeline cache miss; download OpenCL platform installer executable into workspace cache
-  Write-Output "Downloading OpenCL platform installer"
-  Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
+Write-Output "Downloading OpenCL platform installer"
+$ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
+Invoke-WebRequest -OutFile "$installer" -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/$installer"
 
-  Write-Output "Caching OpenCL platform installer"
-  New-Item $cache -ItemType Directory | Out-Null
-  Move-Item -Path "$installer" -Destination "$cache\$installer" | Out-Null
-
-  if (Test-Path "$cache\$installer") {
-    Write-Output "Successfully downloaded OpenCL platform installer"
-  } else {
-    Write-Output "Unable to download OpenCL platform installer"
-    Write-Output "Setting EXIT"
-    $host.SetShouldExit(-1)
-    Exit -1
-  }
+if (Test-Path "$installer") {
+  Write-Output "Successfully downloaded OpenCL platform installer"
+} else {
+  Write-Output "Unable to download OpenCL platform installer"
+  Write-Output "Setting EXIT"
+  $host.SetShouldExit(-1)
+  Exit -1
 }
 
-# Install OpenCL platform from installer executable expected in workspace cache
+# Install OpenCL platform from installer executable
 Write-Output "Running OpenCL installer"
-Invoke-Command -ScriptBlock {Start-Process "$cache\$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait}
+Invoke-Command -ScriptBlock { Start-Process "$installer" -ArgumentList '/S /V"/quiet /norestart /passive /log opencl.log"' -Wait }
 
 $property = Get-ItemProperty -Path Registry::HKEY_LOCAL_MACHINE\SOFTWARE\Khronos\OpenCL\Vendors
 if ($property -eq $null) {
diff --git a/.ci/test_r_package_windows.ps1 b/.ci/test_r_package_windows.ps1
index 6f96284ec15e..e49a638b10b7 100644
--- a/.ci/test_r_package_windows.ps1
+++ b/.ci/test_r_package_windows.ps1
@@ -5,6 +5,7 @@ function Download-File-With-Retries {
     [string]$url,
     [string]$destfile
   )
+  $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
   do {
     Write-Output "Downloading ${url}"
     sleep 5;
diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1
index ea2e5f99bdfe..9e53141add0e 100644
--- a/.ci/test_windows.ps1
+++ b/.ci/test_windows.ps1
@@ -40,6 +40,7 @@ elseif ($env:TASK -eq "sdist") {
   cd dist; pip install @(Get-ChildItem *.gz) -v ; Check-Output $?
 
   $env:JAVA_HOME = $env:JAVA_HOME_8_X64  # there is pre-installed Zulu OpenJDK-8 somewhere
+  $ProgressPreference = "SilentlyContinue"  # progress bar bug extremely slows down download speed
   Invoke-WebRequest -Uri "https://github.com/microsoft/LightGBM/releases/download/v2.0.12/swigwin-3.0.12.zip" -OutFile $env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip -UserAgent "NativeHost"
   Add-Type -AssemblyName System.IO.Compression.FileSystem
   [System.IO.Compression.ZipFile]::ExtractToDirectory("$env:BUILD_SOURCESDIRECTORY/swig/swigwin.zip", "$env:BUILD_SOURCESDIRECTORY/swig")
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index d6e13e482cb9..6fbf6c5a5767 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -124,13 +124,6 @@ jobs:
       Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
       Write-Host "##vso[task.setvariable variable=AZURE]true"
     displayName: 'Set Variables'
-  - task: Cache@2
-    inputs:
-      key: '"opencl.windows" | "amd.cpu" | "v3.0.130.135" | "1"'
-      path: $(Pipeline.Workspace)/opencl_windows-amd_cpu-v3_0_130_135
-      cacheHitVar: OPENCL_INSTALLER_FOUND
-    condition: eq(variables['TASK'], 'bdist')
-    displayName: 'Cache OpenCL Installer'
   - script: |
       cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/install_opencl.ps1"
     condition: eq(variables['TASK'], 'bdist')

From be1202d5fc64eaf7b408cb78cbfd02ff7be21bb2 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 28 Dec 2020 15:16:59 +0000
Subject: [PATCH 03/42] [ci] remove unused CI checks (#3688)

---
 .ci/setup.sh | 4 ++--
 .ci/test.sh  | 4 ++--
 .vsts-ci.yml | 5 ++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 6f2597a8f8b9..752158ff3952 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -43,13 +43,13 @@ else  # Linux
         chmod +x cmake.sh
         ./cmake.sh --prefix=/usr/local --exclude-subdir
     fi
-    if [[ $TRAVIS == "true" ]] || [[ $GITHUB_ACTIONS == "true" ]]; then
+    if [[ $SETUP_CONDA != "false" ]]; then
         wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
     fi
 fi
 
 if [[ "${TASK:0:9}" != "r-package" ]]; then
-    if [[ $TRAVIS == "true" ]] || [[ $OS_NAME == "macos" ]]; then
+    if [[ $SETUP_CONDA != "false" ]]; then
         sh conda.sh -b -p $CONDA
     fi
     conda config --set always_yes yes --set changeps1 no
diff --git a/.ci/test.sh b/.ci/test.sh
index a32be65a93ed..63c6e4af6c05 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -18,7 +18,7 @@ source activate $CONDA_ENV
 
 cd $BUILD_DIRECTORY
 
-if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
+if [[ $TASK == "check-docs" ]]; then
     cd $BUILD_DIRECTORY/docs
     conda install -q -y -n $CONDA_ENV -c conda-forge doxygen
     pip install --user -r requirements.txt rstcheck git+git://github.com/linkchecker/linkchecker.git@b9390c9ef63f7e1e210b48bc7fe97f76e8d39501
@@ -41,7 +41,7 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
     exit 0
 fi
 
-if [[ $TRAVIS == "true" ]] && [[ $TASK == "lint" ]]; then
+if [[ $TASK == "lint" ]]; then
     conda install -q -y -n $CONDA_ENV \
         pycodestyle \
         pydocstyle \
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 6fbf6c5a5767..e08694c7bac2 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -8,6 +8,7 @@ trigger:
 pr:
 - master
 variables:
+  AZURE: 'true'
   PYTHON_VERSION: 3.8
   CONDA_ENV: test-env
 resources:
@@ -20,6 +21,7 @@ jobs:
 ###########################################
   variables:
     COMPILER: gcc
+    SETUP_CONDA: 'false'
   pool:
     vmImage: 'ubuntu-latest'
   container: ubuntu1404
@@ -46,7 +48,6 @@ jobs:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
       echo "##vso[task.setvariable variable=OS_NAME]linux"
-      echo "##vso[task.setvariable variable=AZURE]true"
       echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       echo "##vso[task.prependpath]$CONDA/bin"
       AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK
@@ -86,7 +87,6 @@ jobs:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
       echo "##vso[task.setvariable variable=OS_NAME]macos"
-      echo "##vso[task.setvariable variable=AZURE]true"
       echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       CONDA=$AGENT_HOMEDIRECTORY/miniconda
       echo "##vso[task.setvariable variable=CONDA]$CONDA"
@@ -122,7 +122,6 @@ jobs:
   steps:
   - powershell: |
       Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
-      Write-Host "##vso[task.setvariable variable=AZURE]true"
     displayName: 'Set Variables'
   - script: |
       cmd /c "powershell -ExecutionPolicy Bypass -File %BUILD_SOURCESDIRECTORY%/.ci/install_opencl.ps1"

From 5a4608467d930aa46f8ef5d7ceb6ecfb8643b220 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 29 Dec 2020 01:24:38 +0300
Subject: [PATCH 04/42] small code and docs refactoring (#3681)

* small code and docs refactoring

* Update CMakeLists.txt

* Update .vsts-ci.yml

* Update test.sh

* continue

* continue

* revert stable sort for all-unique values
---
 CMakeLists.txt                           |  9 ++----
 docs/Parameters.rst                      | 16 ++++++-----
 include/LightGBM/c_api.h                 | 16 +++++------
 include/LightGBM/config.h                | 15 +++++-----
 src/c_api.cpp                            |  2 +-
 src/io/config.cpp                        |  4 +--
 src/io/dataset_loader.cpp                |  1 -
 src/io/tree.cpp                          |  8 ++++--
 src/treelearner/linear_tree_learner.cpp  |  2 +-
 src/treelearner/linear_tree_learner.h    |  2 +-
 src/treelearner/tree_learner.cpp         |  2 +-
 tests/python_package_test/test_basic.py  | 22 +++++++--------
 tests/python_package_test/test_engine.py | 35 +++++++++++++-----------
 13 files changed, 70 insertions(+), 64 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index de0dd6573199..d32beae1c4e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -127,12 +127,6 @@ endif(USE_CUDA)
 if(USE_OPENMP)
     find_package(OpenMP REQUIRED)
     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-else()
-    # Ignore unknown #pragma warning
-    if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
-      OR (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
-    endif()
 endif(USE_OPENMP)
 
 if(USE_GPU)
@@ -272,6 +266,9 @@ if(UNIX OR MINGW OR CYGWIN)
     if(USE_SWIG)
         SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
     endif()
+    if(NOT USE_OPENMP)
+        SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas -Wno-unused-private-field")
+    endif()
 endif()
 
 if(WIN32 AND MINGW)
diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 1287c1354331..004e87e96532 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -119,7 +119,7 @@ Core Parameters
 
 -  ``linear_tree`` :raw-html:`<a id="linear_tree" title="Permalink to this parameter" href="#linear_tree">&#x1F517;&#xFE0E;</a>`, default = ``false``, type = bool
 
-   -  fit piecewise linear gradient boosting tree, only works with cpu and serial tree learner
+   -  fit piecewise linear gradient boosting tree
 
       -  tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
 
@@ -127,15 +127,17 @@ Core Parameters
 
       -  categorical features are used for splits as normal but are not used in the linear models
 
-      -  missing values must be encoded as ``np.nan`` (Python) or ``NA`` (cli), not ``0``
+      -  missing values must be encoded as ``np.nan`` (Python) or ``NA`` (CLI), not ``0``
 
       -  it is recommended to rescale data before training so that features have similar mean and standard deviation
 
-      -  not yet supported in R-package
+      -  **Note**: only works with CPU and ``serial`` tree learner
 
-      -  ``regression_l1`` objective is not supported with linear tree boosting
+      -  **Note**: not yet supported in R-package
 
-      -  setting ``linear_tree = True`` significantly increases the memory use of LightGBM
+      -  **Note**: ``regression_l1`` objective is not supported with linear tree boosting
+
+      -  **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
 
 -  ``data`` :raw-html:`<a id="data" title="Permalink to this parameter" href="#data">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string, aliases: ``train``, ``train_data``, ``train_data_file``, ``data_filename``
 
@@ -406,7 +408,7 @@ Learning Control Parameters
 
 -  ``linear_lambda`` :raw-html:`<a id="linear_lambda" title="Permalink to this parameter" href="#linear_lambda">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, constraints: ``linear_lambda >= 0.0``
 
-   -  Linear tree regularisation, the parameter `lambda` in Eq 3 of <https://arxiv.org/pdf/1802.05640.pdf>
+   -  linear tree regularization, corresponds to the parameter ``lambda`` in Eq. 3 of `Gradient Boosting with Piece-Wise Linear Regression Trees <https://arxiv.org/pdf/1802.05640.pdf>`__
 
 -  ``min_gain_to_split`` :raw-html:`<a id="min_gain_to_split" title="Permalink to this parameter" href="#min_gain_to_split">&#x1F517;&#xFE0E;</a>`, default = ``0.0``, type = double, aliases: ``min_split_gain``, constraints: ``min_gain_to_split >= 0.0``
 
@@ -580,7 +582,7 @@ Learning Control Parameters
 
    -  if ``path_smooth > 0`` then ``min_data_in_leaf`` must be at least ``2``
 
-   -  larger values give stronger regularisation
+   -  larger values give stronger regularization
 
       -  the weight of each node is ``(n / path_smooth) * w + w_p / (n / path_smooth + 1)``, where ``n`` is the number of samples in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
 
diff --git a/include/LightGBM/c_api.h b/include/LightGBM/c_api.h
index c16143d82160..57713ba1be2b 100644
--- a/include/LightGBM/c_api.h
+++ b/include/LightGBM/c_api.h
@@ -389,14 +389,6 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetGetNumData(DatasetHandle handle,
 LIGHTGBM_C_EXPORT int LGBM_DatasetGetNumFeature(DatasetHandle handle,
                                                 int* out);
 
-/*!
-* \brief Get boolean representing whether booster is fitting linear trees.
-* \param handle Handle of dataset
-* \param[out] out The address to hold linear indicator
-* \return 0 when succeed, -1 when failure happens
-*/
-LIGHTGBM_C_EXPORT int LGBM_BoosterGetLinear(BoosterHandle handle, bool* out);
-
 /*!
  * \brief Add features from ``source`` to ``target``.
  * \param target The handle of the dataset to add features to
@@ -408,6 +400,14 @@ LIGHTGBM_C_EXPORT int LGBM_DatasetAddFeaturesFrom(DatasetHandle target,
 
 // --- start Booster interfaces
 
+/*!
+* \brief Get boolean representing whether booster is fitting linear trees.
+* \param handle Handle of booster
+* \param[out] out The address to hold linear trees indicator
+* \return 0 when succeed, -1 when failure happens
+*/
+LIGHTGBM_C_EXPORT int LGBM_BoosterGetLinear(BoosterHandle handle, bool* out);
+
 /*!
  * \brief Create a new boosting learner.
  * \param train_data Training dataset
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 43f8edd99bf8..f1678c478394 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -148,15 +148,16 @@ struct Config {
   // descl2 = **Note**: internally, LightGBM uses ``gbdt`` mode for the first ``1 / learning_rate`` iterations
   std::string boosting = "gbdt";
 
-  // desc = fit piecewise linear gradient boosting tree, only works with cpu and serial tree learner
+  // desc = fit piecewise linear gradient boosting tree
   // descl2 = tree splits are chosen in the usual way, but the model at each leaf is linear instead of constant
   // descl2 = the linear model at each leaf includes all the numerical features in that leaf's branch
   // descl2 = categorical features are used for splits as normal but are not used in the linear models
-  // descl2 = missing values must be encoded as ``np.nan`` (Python) or ``NA`` (cli), not ``0``
+  // descl2 = missing values must be encoded as ``np.nan`` (Python) or ``NA`` (CLI), not ``0``
   // descl2 = it is recommended to rescale data before training so that features have similar mean and standard deviation
-  // descl2 = not yet supported in R-package
-  // descl2 = ``regression_l1`` objective is not supported with linear tree boosting
-  // descl2 = setting ``linear_tree = True`` significantly increases the memory use of LightGBM
+  // descl2 = **Note**: only works with CPU and ``serial`` tree learner
+  // descl2 = **Note**: not yet supported in R-package
+  // descl2 = **Note**: ``regression_l1`` objective is not supported with linear tree boosting
+  // descl2 = **Note**: setting ``linear_tree=true`` significantly increases the memory use of LightGBM
   bool linear_tree = false;
 
   // alias = train, train_data, train_data_file, data_filename
@@ -378,7 +379,7 @@ struct Config {
   double lambda_l2 = 0.0;
 
   // check = >=0.0
-  // desc = Linear tree regularisation, the parameter `lambda` in Eq 3 of <https://arxiv.org/pdf/1802.05640.pdf>
+  // desc = linear tree regularization, corresponds to the parameter ``lambda`` in Eq. 3 of `Gradient Boosting with Piece-Wise Linear Regression Trees <https://arxiv.org/pdf/1802.05640.pdf>`__
   double linear_lambda = 0.0;
 
   // alias = min_split_gain
@@ -530,7 +531,7 @@ struct Config {
   // desc = helps prevent overfitting on leaves with few samples
   // desc = if set to zero, no smoothing is applied
   // desc = if ``path_smooth > 0`` then ``min_data_in_leaf`` must be at least ``2``
-  // desc = larger values give stronger regularisation
+  // desc = larger values give stronger regularization
   // descl2 = the weight of each node is ``(n / path_smooth) * w + w_p / (n / path_smooth + 1)``, where ``n`` is the number of samples in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
   // descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
   double path_smooth = 0;
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 4cf9a5e5072a..019f59dfa549 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -290,7 +290,7 @@ class Booster {
           "the `min_data_in_leaf`.");
     }
     if (new_param.count("linear_tree") && (new_config.linear_tree != old_config.linear_tree)) {
-      Log:: Fatal("Cannot change between gbdt_linear boosting and other boosting types after Dataset handle has been constructed.");
+      Log::Fatal("Cannot change linear_tree after constructed Dataset handle.");
     }
   }
 
diff --git a/src/io/config.cpp b/src/io/config.cpp
index ce034505ee4b..331e5f4088d3 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -340,9 +340,9 @@ void Config::CheckParamConflict() {
     Log::Warning("CUDA currently requires double precision calculations.");
     gpu_use_dp = true;
   }
-  // linear tree learner must be serial type and cpu device
+  // linear tree learner must be serial type and run on cpu device
   if (linear_tree) {
-    if (device_type == std::string("gpu")) {
+    if (device_type != std::string("cpu")) {
       device_type = "cpu";
       Log::Warning("Linear tree learner only works with CPU.");
     }
diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index 24c9637bfa56..4d83aef057d5 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -600,7 +600,6 @@ Dataset* DatasetLoader::LoadFromBinFile(const char* data_filename, const char* b
       }
       mem_ptr = buffer.data();
       const float* tmp_ptr_raw_row = reinterpret_cast<const float*>(mem_ptr);
-      std::vector<float> curr_row(dataset->num_numeric_features_, 0);
       for (int j = 0; j < dataset->num_features(); ++j) {
         int feat_ind = dataset->numeric_feature_map_[j];
         if (feat_ind >= 0) {
diff --git a/src/io/tree.cpp b/src/io/tree.cpp
index 91eb0a0ab116..04bd520465a3 100644
--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -697,7 +697,9 @@ Tree::Tree(const char* str, size_t* used_len) {
     is_linear_ = static_cast<bool>(is_linear_int);
   }
 
-  if ((num_leaves_ <= 1) && !is_linear_) { return; }
+  if ((num_leaves_ <= 1) && !is_linear_) {
+    return;
+  }
 
   if (key_vals.count("left_child")) {
     left_child_ = CommonC::StringToArrayFast<int>(key_vals["left_child"], num_leaves_ - 1);
@@ -780,7 +782,9 @@ Tree::Tree(const char* str, size_t* used_len) {
     leaf_features_inner_.resize(num_leaves_);
     if (num_feat.size() > 0) {
       int total_num_feat = 0;
-      for (size_t i = 0; i < num_feat.size(); ++i) { total_num_feat += num_feat[i]; }
+      for (size_t i = 0; i < num_feat.size(); ++i) {
+        total_num_feat += num_feat[i];
+      }
       std::vector<int> all_leaf_features;
       if (key_vals.count("leaf_features")) {
         all_leaf_features = Common::StringToArrayFast<int>(key_vals["leaf_features"], total_num_feat);
diff --git a/src/treelearner/linear_tree_learner.cpp b/src/treelearner/linear_tree_learner.cpp
index 58482b8eef9b..8a358733dd2a 100644
--- a/src/treelearner/linear_tree_learner.cpp
+++ b/src/treelearner/linear_tree_learner.cpp
@@ -1,5 +1,5 @@
 /*!
- * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
 #include "linear_tree_learner.h"
diff --git a/src/treelearner/linear_tree_learner.h b/src/treelearner/linear_tree_learner.h
index 88bc7f91ede1..cc522e1bc426 100644
--- a/src/treelearner/linear_tree_learner.h
+++ b/src/treelearner/linear_tree_learner.h
@@ -1,5 +1,5 @@
 /*!
- * Copyright (c) 2016 Microsoft Corporation. All rights reserved.
+ * Copyright (c) 2020 Microsoft Corporation. All rights reserved.
  * Licensed under the MIT License. See LICENSE file in the project root for license information.
  */
 #ifndef LIGHTGBM_TREELEARNER_LINEAR_TREE_LEARNER_H_
diff --git a/src/treelearner/tree_learner.cpp b/src/treelearner/tree_learner.cpp
index 04184932a335..a386a2129bcc 100644
--- a/src/treelearner/tree_learner.cpp
+++ b/src/treelearner/tree_learner.cpp
@@ -6,9 +6,9 @@
 
 #include "cuda_tree_learner.h"
 #include "gpu_tree_learner.h"
+#include "linear_tree_learner.h"
 #include "parallel_tree_learner.h"
 #include "serial_tree_learner.h"
-#include "linear_tree_learner.h"
 
 namespace LightGBM {
 
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index ed410e90088b..9229d23f3809 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -118,18 +118,18 @@ def test_save_and_load_linear(self):
         X_train[:X_train.shape[0] // 2, 0] = 0
         y_train[:X_train.shape[0] // 2] = 1
         params = {'linear_tree': True}
-        train_data = lgb.Dataset(X_train, label=y_train, params=params)
-        est = lgb.train(params, train_data, num_boost_round=10, categorical_feature=[0])
-        pred1 = est.predict(X_train)
-        train_data.save_binary('temp_dataset.bin')
+        train_data_1 = lgb.Dataset(X_train, label=y_train, params=params)
+        est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0])
+        pred_1 = est_1.predict(X_train)
+        train_data_1.save_binary('temp_dataset.bin')
         train_data_2 = lgb.Dataset('temp_dataset.bin')
-        est = lgb.train(params, train_data_2, num_boost_round=10)
-        pred2 = est.predict(X_train)
-        np.testing.assert_allclose(pred1, pred2)
-        est.save_model('temp_model.txt')
-        est2 = lgb.Booster(model_file='temp_model.txt')
-        pred3 = est2.predict(X_train)
-        np.testing.assert_allclose(pred2, pred3)
+        est_2 = lgb.train(params, train_data_2, num_boost_round=10)
+        pred_2 = est_2.predict(X_train)
+        np.testing.assert_allclose(pred_1, pred_2)
+        est_2.save_model('temp_model.txt')
+        est_3 = lgb.Booster(model_file='temp_model.txt')
+        pred_3 = est_3.predict(X_train)
+        np.testing.assert_allclose(pred_2, pred_3)
 
     def test_subset_group(self):
         X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index faed1fce28b4..30ecddc7c106 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -2232,6 +2232,7 @@ def test_dataset_update_params(self):
                           "group_column": 0,
                           "ignore_column": 0,
                           "min_data_in_leaf": 10,
+                          "linear_tree": False,
                           "verbose": -1}
         unchangeable_params = {"max_bin": 150,
                                "max_bin_by_feature": [30, 5],
@@ -2252,7 +2253,8 @@ def test_dataset_update_params(self):
                                "group_column": 1,
                                "ignore_column": 1,
                                "forcedbins_filename": "/some/path/forcedbins.json",
-                               "min_data_in_leaf": 2}
+                               "min_data_in_leaf": 2,
+                               "linear_tree": True}
         X = np.random.random((100, 2))
         y = np.random.random(100)
 
@@ -2420,45 +2422,46 @@ def test_interaction_constraints(self):
                                                               [1] + list(range(2, num_features))]),
                         train_data, num_boost_round=10)
 
-    def test_linear(self):
-        # check that setting boosting=gbdt_linear fits better than boosting=gbdt when data has linear relationship
+    def test_linear_trees(self):
+        # check that setting linear_tree=True fits better than ordinary trees when data has linear relationship
         np.random.seed(0)
         x = np.arange(0, 100, 0.1)
         y = 2 * x + np.random.normal(0, 0.1, len(x))
-        lgb_train = lgb.Dataset(x[:, np.newaxis], label=y)
+        x = x[:, np.newaxis]
+        lgb_train = lgb.Dataset(x, label=y)
         params = {'verbose': -1,
                   'metric': 'mse',
                   'seed': 0,
                   'num_leaves': 2}
         est = lgb.train(params, lgb_train, num_boost_round=10)
-        pred1 = est.predict(x[:, np.newaxis])
-        lgb_train = lgb.Dataset(x[:, np.newaxis], label=y)
+        pred1 = est.predict(x)
+        lgb_train = lgb.Dataset(x, label=y)
         res = {}
         est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res,
                         valid_sets=[lgb_train], valid_names=['train'])
-        pred2 = est.predict(x[:, np.newaxis])
+        pred2 = est.predict(x)
         np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred2), atol=10**(-1))
         self.assertLess(mean_squared_error(y, pred2), mean_squared_error(y, pred1))
         # test again with nans in data
         x[:10] = np.nan
-        lgb_train = lgb.Dataset(x[:, np.newaxis], label=y)
+        lgb_train = lgb.Dataset(x, label=y)
         est = lgb.train(params, lgb_train, num_boost_round=10)
-        pred1 = est.predict(x[:, np.newaxis])
-        lgb_train = lgb.Dataset(x[:, np.newaxis], label=y)
+        pred1 = est.predict(x)
+        lgb_train = lgb.Dataset(x, label=y)
         res = {}
         est = lgb.train(dict(params, linear_tree=True), lgb_train, num_boost_round=10, evals_result=res,
                         valid_sets=[lgb_train], valid_names=['train'])
-        pred2 = est.predict(x[:, np.newaxis])
+        pred2 = est.predict(x)
         np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred2), atol=10**(-1))
         self.assertLess(mean_squared_error(y, pred2), mean_squared_error(y, pred1))
         # test again with bagging
         res = {}
         est = lgb.train(dict(params, linear_tree=True, subsample=0.8, bagging_freq=1), lgb_train,
                         num_boost_round=10, evals_result=res, valid_sets=[lgb_train], valid_names=['train'])
-        pred = est.predict(x[:, np.newaxis])
+        pred = est.predict(x)
         np.testing.assert_allclose(res['train']['l2'][-1], mean_squared_error(y, pred), atol=10**(-1))
         # test with a feature that has only one non-nan value
-        x = np.concatenate([np.ones([x.shape[0], 1]), x[:, np.newaxis]], 1)
+        x = np.concatenate([np.ones([x.shape[0], 1]), x], 1)
         x[500:, 1] = np.nan
         y[500:] += 10
         lgb_train = lgb.Dataset(x, label=y)
@@ -2486,11 +2489,11 @@ def test_linear(self):
         p2 = est2.predict(x)
         self.assertLess(np.mean(np.abs(p1 - p2)), 2)
         # test refit: different results training on different data
-        est2 = est.refit(x[:100, :], label=y[:100])
-        p3 = est2.predict(x)
+        est3 = est.refit(x[:100, :], label=y[:100])
+        p3 = est3.predict(x)
         self.assertGreater(np.mean(np.abs(p2 - p1)), np.abs(np.max(p3 - p1)))
         # test when num_leaves - 1 < num_features and when num_leaves - 1 > num_features
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
+        X_train, _, y_train, _ = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
         params = {'linear_tree': True,
                   'verbose': -1,
                   'metric': 'mse',

From 68a40c790bebfb47f6d9ac16fc4304eaf714bbf3 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 29 Dec 2020 00:25:39 +0000
Subject: [PATCH 05/42] [docs] add doc on min_data_in_leaf approximation (fixes
 #3634) (#3690)

* [docs] add doc on min_data_in_leaf approximation (fixes #3634)

* Fix capital letter

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 docs/Parameters.rst       | 2 ++
 include/LightGBM/config.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/docs/Parameters.rst b/docs/Parameters.rst
index 004e87e96532..235dbb9040a2 100644
--- a/docs/Parameters.rst
+++ b/docs/Parameters.rst
@@ -284,6 +284,8 @@ Learning Control Parameters
 
    -  minimal number of data in one leaf. Can be used to deal with over-fitting
 
+   -  **Note**: this is an approximation based on the Hessian, so occasionally you may observe splits which produce leaf nodes that have less than this many observations
+
 -  ``min_sum_hessian_in_leaf`` :raw-html:`<a id="min_sum_hessian_in_leaf" title="Permalink to this parameter" href="#min_sum_hessian_in_leaf">&#x1F517;&#xFE0E;</a>`, default = ``1e-3``, type = double, aliases: ``min_sum_hessian_per_leaf``, ``min_sum_hessian``, ``min_hessian``, ``min_child_weight``, constraints: ``min_sum_hessian_in_leaf >= 0.0``
 
    -  minimal sum hessian in one leaf. Like ``min_data_in_leaf``, it can be used to deal with over-fitting
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index f1678c478394..b8dd3817407c 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -274,6 +274,7 @@ struct Config {
   // alias = min_data_per_leaf, min_data, min_child_samples
   // check = >=0
   // desc = minimal number of data in one leaf. Can be used to deal with over-fitting
+  // desc = **Note**: this is an approximation based on the Hessian, so occasionally you may observe splits which produce leaf nodes that have less than this many observations
   int min_data_in_leaf = 20;
 
   // alias = min_sum_hessian_per_leaf, min_sum_hessian, min_hessian, min_child_weight

From 6cb968af2eecd79f9a1b78b2f5db3b5acf75d515 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 29 Dec 2020 23:16:34 +0000
Subject: [PATCH 06/42] [python-package] remove unused Eigen files, compile
 with EIGEN_MPL2_ONLY (fixes #3684) (#3685)

* [python-package] remove unused Eigen files (fixes #3684)

* more changes

* add EIGEN_MPL2_ONLY in VS solution file

* fix VS project

* remove EIGEN_MPL2_ONLY define in linear_tree_learner

Co-authored-by: Nikita Titov <nekit94-12@hotmail.com>
---
 CMakeLists.txt                          |  3 +++
 python-package/MANIFEST.in              | 24 +++++++++++++++++++++++-
 src/treelearner/linear_tree_learner.cpp |  2 --
 windows/LightGBM.vcxproj                | 13 +++++++++----
 windows/LightGBM.vcxproj.filters        |  8 +++++++-
 5 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index d32beae1c4e1..53301511ba7b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -93,6 +93,9 @@ endif(USE_SWIG)
 SET(EIGEN_DIR "${PROJECT_SOURCE_DIR}/eigen")
 include_directories(${EIGEN_DIR})
 
+# See https://gitlab.com/libeigen/eigen/-/blob/master/COPYING.README
+ADD_DEFINITIONS(-DEIGEN_MPL2_ONLY)
+
 if(__BUILD_FOR_R)
     list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
     find_package(LibR REQUIRED)
diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in
index cc0ab0626f91..bb8a3afdd9a6 100644
--- a/python-package/MANIFEST.in
+++ b/python-package/MANIFEST.in
@@ -10,7 +10,29 @@ include compile/compute/CMakeLists.txt
 recursive-include compile/compute/cmake *
 recursive-include compile/compute/include *
 recursive-include compile/compute/meta *
-recursive-include compile/eigen *
+include compile/eigen/CMakeLists.txt
+include compile/eigen/Eigen/CMakeLists.txt
+include compile/eigen/Eigen/Cholesky
+include compile/eigen/Eigen/Core
+include compile/eigen/Eigen/Dense
+include compile/eigen/Eigen/Eigenvalues
+include compile/eigen/Eigen/Geometry
+include compile/eigen/Eigen/Householder
+include compile/eigen/Eigen/Jacobi
+include compile/eigen/Eigen/LU
+include compile/eigen/Eigen/QR
+include compile/eigen/Eigen/SVD
+recursive-include compile/eigen/Eigen/src/Cholesky *
+recursive-include compile/eigen/Eigen/src/Core *
+recursive-include compile/eigen/Eigen/src/Eigenvalues *
+recursive-include compile/eigen/Eigen/src/Geometry *
+recursive-include compile/eigen/Eigen/src/Householder *
+recursive-include compile/eigen/Eigen/src/Jacobi *
+recursive-include compile/eigen/Eigen/src/LU *
+recursive-include compile/eigen/Eigen/src/misc *
+recursive-include compile/eigen/Eigen/src/plugins *
+recursive-include compile/eigen/Eigen/src/QR *
+recursive-include compile/eigen/Eigen/src/SVD *
 include compile/external_libs/fast_double_parser/CMakeLists.txt
 include compile/external_libs/fast_double_parser/LICENSE
 include compile/external_libs/fast_double_parser/LICENSE.BSL
diff --git a/src/treelearner/linear_tree_learner.cpp b/src/treelearner/linear_tree_learner.cpp
index 8a358733dd2a..134892a13ef7 100644
--- a/src/treelearner/linear_tree_learner.cpp
+++ b/src/treelearner/linear_tree_learner.cpp
@@ -7,8 +7,6 @@
 #include <algorithm>
 
 #ifndef LGB_R_BUILD
-// preprocessor definition ensures we use only MPL2-licensed code
-#define EIGEN_MPL2_ONLY
 #include <Eigen/Dense>
 #endif  // !LGB_R_BUILD
 
diff --git a/windows/LightGBM.vcxproj b/windows/LightGBM.vcxproj
index 5c01a8cdb932..5b9190fb2fdb 100644
--- a/windows/LightGBM.vcxproj
+++ b/windows/LightGBM.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup Label="Projectconfigurations">
     <ProjectConfiguration Include="Debug_DLL|x64">
@@ -99,9 +99,14 @@
     <IncludePath>..\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
     <TargetName>lib_lightgbm</TargetName>
   </PropertyGroup>
+  <ItemDefinitionGroup>
+    <ClCompile>
+      <PreprocessorDefinitions>EIGEN_MPL2_ONLY</PreprocessorDefinitions>
+    </ClCompile>
+  </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_mpi|x64'">
     <ClCompile>
-      <PreprocessorDefinitions>USE_MPI</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_MPI;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <WarningLevel>Level4</WarningLevel>
       <OpenMPSupport>true</OpenMPSupport>
       <FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
@@ -124,7 +129,7 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
     <ClCompile>
-      <PreprocessorDefinitions>USE_SOCKET</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_SOCKET;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <WarningLevel>Level4</WarningLevel>
       <OpenMPSupport>true</OpenMPSupport>
       <FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
@@ -144,7 +149,7 @@
   </ItemDefinitionGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug_DLL|x64'">
     <ClCompile>
-      <PreprocessorDefinitions>USE_SOCKET</PreprocessorDefinitions>
+      <PreprocessorDefinitions>USE_SOCKET;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <WarningLevel>Level4</WarningLevel>
       <OpenMPSupport>true</OpenMPSupport>
       <FavorSizeOrSpeed>Neither</FavorSizeOrSpeed>
diff --git a/windows/LightGBM.vcxproj.filters b/windows/LightGBM.vcxproj.filters
index b721af3fb187..0f48c7564580 100644
--- a/windows/LightGBM.vcxproj.filters
+++ b/windows/LightGBM.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+﻿<?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
   <ItemGroup>
     <Filter Include="src">
@@ -225,6 +225,9 @@
     <ClInclude Include="..\src\treelearner\monotone_constraints.hpp">
       <Filter>src\treelearner</Filter>
     </ClInclude>
+    <ClInclude Include="..\src\treelearner\linear_tree_learner.h">
+      <Filter>src\treelearner</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\src\application\application.cpp">
@@ -320,5 +323,8 @@
     <ClCompile Include="..\src\io\train_share_states.cpp">
       <Filter>src\io</Filter>
     </ClCompile>
+    <ClCompile Include="..\src\treelearner\linear_tree_learner.cpp">
+      <Filter>src\treelearner</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file

From 967b45c68665488ac872f77848f516d84a70726c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 29 Dec 2020 23:46:57 +0000
Subject: [PATCH 07/42] [ci] fix deprecated 'brew cask install' call (#3692)

---
 .ci/test_r_package.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index 9b83d2a8b775..f59d4efe1acc 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -59,7 +59,6 @@ if [[ $OS_NAME == "linux" ]]; then
             qpdf \
             || exit -1
 
-    
     if [[ $R_BUILD_TYPE == "cran" ]]; then
         sudo apt-get install \
             --no-install-recommends \
@@ -77,7 +76,7 @@ if [[ $OS_NAME == "macos" ]]; then
     brew install \
         checkbashisms \
         qpdf
-    brew cask install basictex
+    brew install --cask basictex
     export PATH="/Library/TeX/texbin:$PATH"
     sudo tlmgr --verify-repo=none update --self
     sudo tlmgr --verify-repo=none install inconsolata helvetic

From f9a6b11ca80b4749b74e879b74efb78136147c70 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sat, 2 Jan 2021 22:44:51 +0000
Subject: [PATCH 08/42] [R-package] fix incorrect passing builds in valgrind
 checks (fixes #3704) (#3705)

* try to fix valgrind checks

* fix redirection

* uncomment other CIs
---
 .ci/test_r_package_valgrind.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.ci/test_r_package_valgrind.sh b/.ci/test_r_package_valgrind.sh
index fa4d6b3e6357..f17b3bc8b9be 100755
--- a/.ci/test_r_package_valgrind.sh
+++ b/.ci/test_r_package_valgrind.sh
@@ -10,10 +10,11 @@ RDvalgrind \
   --vanilla \
   -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes" \
   -f testthat.R \
-  2>&1 > ${ALL_LOGS_FILE} || exit -1
+  > ${ALL_LOGS_FILE} 2>&1 || exit -1
 
 cat ${ALL_LOGS_FILE}
 
+echo "writing valgrind output to ${VALGRIND_LOGS_FILE}"
 cat ${ALL_LOGS_FILE} | grep -E "^\=" > ${VALGRIND_LOGS_FILE}
 
 bytes_definitely_lost=$(
@@ -22,8 +23,8 @@ bytes_definitely_lost=$(
       | sed 's/^.*definitely lost\: \(.*\) bytes.*$/\1/' \
       | tr -d ","
 )
+echo "valgrind found ${bytes_definitely_lost} bytes definitely lost"
 if [[ ${bytes_definitely_lost} -gt 0 ]]; then
-    echo "valgrind found ${bytes_definitely_lost} bytes definitely lost"
     exit -1
 fi
 
@@ -33,8 +34,8 @@ bytes_indirectly_lost=$(
     | sed 's/^.*indirectly lost\: \(.*\) bytes.*$/\1/' \
     | tr -d ","
 )
+echo "valgrind found ${bytes_indirectly_lost} bytes indirectly lost"
 if [[ ${bytes_indirectly_lost} -gt 0 ]]; then
-    echo "valgrind found ${bytes_indirectly_lost} bytes indirectly lost"
     exit -1
 fi
 
@@ -60,8 +61,8 @@ bytes_possibly_lost=$(
     | sed 's/^.*possibly lost\: \(.*\) bytes.*$/\1/' \
     | tr -d ","
 )
+echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
 if [[ ${bytes_possibly_lost} -gt 336 ]]; then
-    echo "valgrind found ${bytes_possibly_lost} bytes possibly lost"
     exit -1
 fi
 

From 26671aa3e34cde5cbdea511b79b904cbdfbb729d Mon Sep 17 00:00:00 2001
From: sisco0 <siscomagma@gmail.com>
Date: Sat, 2 Jan 2021 19:04:18 -0600
Subject: [PATCH 09/42] fix warning  (#3678)

Compile warnings have been fixed
---
 include/LightGBM/dataset.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index b1ee7a40dd40..a17e21a1d903 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -340,7 +340,7 @@ class Dataset {
         if (has_raw_) {
           int feat_ind = numeric_feature_map_[feature_idx];
           if (feat_ind >= 0) {
-            raw_data_[feat_ind][row_idx] = feature_values[i];
+            raw_data_[feat_ind][row_idx] = static_cast<float>(feature_values[i]);
           }
         }
       }

From d7a384fa0480f019f0f4b89210755d07857b5ae4 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 3 Jan 2021 01:14:01 +0000
Subject: [PATCH 10/42] [R-package] remove unused R_AS_INT64 (#3686)

---
 R-package/src/R_object_helper.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R-package/src/R_object_helper.h b/R-package/src/R_object_helper.h
index e14a2ac6697b..ce8a326a82f4 100644
--- a/R-package/src/R_object_helper.h
+++ b/R-package/src/R_object_helper.h
@@ -108,8 +108,6 @@ typedef union { VECTOR_SER s; double align; } SEXPREC_ALIGN;
 
 #define R_AS_INT(x)    (*(reinterpret_cast<int*> DATAPTR(x)))
 
-#define R_AS_INT64(x)  (*(reinterpret_cast<int64_t*> DATAPTR(x)))
-
 #define R_IS_NULL(x)   ((*reinterpret_cast<LGBM_SE>(x)).sxpinfo.type == 0)
 
 // 64bit pointer

From 532fa914e681924bc1a32aec5469f155b4ec0b4c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 3 Jan 2021 04:26:17 +0000
Subject: [PATCH 11/42] [R-package] allow access to params in Booster (#3662)

* [R-package] allow access to params in Booster

* remove unnecessary whitespace

* fix test on resetting params

* remove pytest_cache

* Update R-package/tests/testthat/test_custom_objective.R
---
 R-package/R/lgb.Booster.R                   |  12 ++-
 R-package/R/readRDS.lgb.Booster.R           |   1 +
 R-package/tests/testthat/test_lgb.Booster.R | 104 ++++++++++++++++++++
 src/c_api.cpp                               |   8 +-
 tests/python_package_test/test_engine.py    |  34 +++++++
 5 files changed, 153 insertions(+), 6 deletions(-)

diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index c3a0de0bc9dd..192f3ed201cf 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -6,6 +6,7 @@ Booster <- R6::R6Class(
 
     best_iter = -1L,
     best_score = NA_real_,
+    params = list(),
     record_evals = list(),
 
     # Finalize will free up the handles
@@ -134,6 +135,8 @@ Booster <- R6::R6Class(
 
       }
 
+      self$params <- params
+
     },
 
     # Set training data name
@@ -187,17 +190,20 @@ Booster <- R6::R6Class(
     # Reset parameters of booster
     reset_parameter = function(params, ...) {
 
-      # Append parameters
-      params <- append(params, list(...))
+      if (methods::is(self$params, "list")) {
+        params <- modifyList(self$params, params)
+      }
+
+      params <- modifyList(params, list(...))
       params_str <- lgb.params2str(params = params)
 
-      # Reset parameters
       lgb.call(
         fun_name = "LGBM_BoosterResetParameter_R"
         , ret = NULL
         , private$handle
         , params_str
       )
+      self$params <- params
 
       return(invisible(self))
 
diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R
index 2f10767052c3..603e455e5769 100644
--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
@@ -44,6 +44,7 @@ readRDS.lgb.Booster <- function(file = "", refhook = NULL) {
     # Restore best iteration and recorded evaluations
     object2$best_iter <- object$best_iter
     object2$record_evals <- object$record_evals
+    object2$params <- object$params
 
     # Return newly loaded object
     return(object2)
diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
index e4d75661755e..02923f776fc2 100644
--- a/R-package/tests/testthat/test_lgb.Booster.R
+++ b/R-package/tests/testthat/test_lgb.Booster.R
@@ -386,6 +386,75 @@ test_that("Booster$update() throws an informative error if you provide a non-Dat
     }, regexp = "lgb.Booster.update: Only can use lgb.Dataset", fixed = TRUE)
 })
 
+test_that("Booster should store parameters and Booster$reset_parameter() should update them", {
+    data(agaricus.train, package = "lightgbm")
+    dtrain <- lgb.Dataset(
+        agaricus.train$data
+        , label = agaricus.train$label
+    )
+    # testing that this works for some cases that could break it:
+    #    - multiple metrics
+    #    - using "metric", "boosting", "num_class" in params
+    params <- list(
+        objective = "multiclass"
+        , max_depth = 4L
+        , bagging_fraction = 0.8
+        , metric = c("multi_logloss", "multi_error")
+        , boosting = "gbdt"
+        , num_class = 5L
+    )
+    bst <- Booster$new(
+        params = params
+        , train_set = dtrain
+    )
+    expect_identical(bst$params, params)
+
+    params[["bagging_fraction"]] <- 0.9
+    ret_bst <- bst$reset_parameter(params = params)
+    expect_identical(ret_bst$params, params)
+    expect_identical(bst$params, params)
+})
+
+test_that("Booster$params should include dataset params, before and after Booster$reset_parameter()", {
+    data(agaricus.train, package = "lightgbm")
+    dtrain <- lgb.Dataset(
+        agaricus.train$data
+        , label = agaricus.train$label
+        , params = list(
+            max_bin = 17L
+        )
+    )
+    params <- list(
+        objective = "binary"
+        , max_depth = 4L
+        , bagging_fraction = 0.8
+    )
+    bst <- Booster$new(
+        params = params
+        , train_set = dtrain
+    )
+    expect_identical(
+        bst$params
+        , list(
+            objective = "binary"
+            , max_depth = 4L
+            , bagging_fraction = 0.8
+            , max_bin = 17L
+        )
+    )
+
+    params[["bagging_fraction"]] <- 0.9
+    ret_bst <- bst$reset_parameter(params = params)
+    expected_params <- list(
+        objective = "binary"
+        , max_depth = 4L
+        , bagging_fraction = 0.9
+        , max_bin = 17L
+    )
+    expect_identical(ret_bst$params, expected_params)
+    expect_identical(bst$params, expected_params)
+})
+
 context("save_model")
 
 test_that("Saving a model with different feature importance types works", {
@@ -626,3 +695,38 @@ test_that("lgb.cv() correctly handles passing through params to the model file",
     }
 
 })
+
+context("saveRDS.lgb.Booster() and readRDS.lgb.Booster()")
+
+test_that("params (including dataset params) should be stored in .rds file for Booster", {
+    data(agaricus.train, package = "lightgbm")
+    dtrain <- lgb.Dataset(
+        agaricus.train$data
+        , label = agaricus.train$label
+        , params = list(
+            max_bin = 17L
+        )
+    )
+    params <- list(
+        objective = "binary"
+        , max_depth = 4L
+        , bagging_fraction = 0.8
+    )
+    bst <- Booster$new(
+        params = params
+        , train_set = dtrain
+    )
+    bst_file <- tempfile(fileext = ".rds")
+    saveRDS.lgb.Booster(bst, file = bst_file)
+
+    bst_from_file <- readRDS.lgb.Booster(file = bst_file)
+    expect_identical(
+        bst_from_file$params
+        , list(
+            objective = "binary"
+            , max_depth = 4L
+            , bagging_fraction = 0.8
+            , max_bin = 17L
+        )
+    )
+})
diff --git a/src/c_api.cpp b/src/c_api.cpp
index 019f59dfa549..2a09c03f41f3 100644
--- a/src/c_api.cpp
+++ b/src/c_api.cpp
@@ -297,13 +297,15 @@ class Booster {
   void ResetConfig(const char* parameters) {
     UNIQUE_LOCK(mutex_)
     auto param = Config::Str2Map(parameters);
-    if (param.count("num_class")) {
+    Config new_config;
+    new_config.Set(param);
+    if (param.count("num_class") && new_config.num_class != config_.num_class) {
       Log::Fatal("Cannot change num_class during training");
     }
-    if (param.count("boosting")) {
+    if (param.count("boosting") && new_config.boosting != config_.boosting) {
       Log::Fatal("Cannot change boosting during training");
     }
-    if (param.count("metric")) {
+    if (param.count("metric") && new_config.metric != config_.metric) {
       Log::Fatal("Cannot change metric during training");
     }
     CheckDatasetResetConfig(config_, param);
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 30ecddc7c106..1b30fd24eb8e 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -2609,3 +2609,37 @@ def test_average_precision_metric(self):
         lgb_X = lgb.Dataset(X, label=y)
         lgb.train(params, lgb_X, num_boost_round=1, valid_sets=[lgb_X], evals_result=res)
         self.assertAlmostEqual(res['training']['average_precision'][-1], 1)
+
+    def test_reset_params_works_with_metric_num_class_and_boosting(self):
+        X, y = load_breast_cancer(return_X_y=True)
+        params = {
+            'objective': 'multiclass',
+            'max_depth': 4,
+            'bagging_fraction': 0.8,
+            'metric': ['multi_logloss', 'multi_error'],
+            'boosting': 'gbdt',
+            'num_class': 5
+        }
+        dtrain = lgb.Dataset(X, y, params={"max_bin": 150})
+
+        bst = lgb.Booster(
+            params=params,
+            train_set=dtrain
+        )
+        expected_params = {
+            'objective': 'multiclass',
+            'max_depth': 4,
+            'bagging_fraction': 0.8,
+            'metric': ['multi_logloss', 'multi_error'],
+            'boosting': 'gbdt',
+            'num_class': 5,
+            'max_bin': 150
+        }
+        assert bst.params == expected_params
+
+        params['bagging_fraction'] = 0.9
+        ret_bst = bst.reset_parameter(params)
+
+        expected_params['bagging_fraction'] = 0.9
+        assert bst.params == expected_params
+        assert ret_bst.params == expected_params

From 85b9daa96b87da3d6543a2703757a103ec932fde Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 3 Jan 2021 16:58:34 +0000
Subject: [PATCH 12/42] [R-package] remove broken default for `file` in readRDS
 / saveRDS functions (#3664)

* [R-package] remove broken default file in readRDS / saveRDS functions

* empty commit
---
 .gitignore                           | 8 ++++++--
 R-package/R/readRDS.lgb.Booster.R    | 2 +-
 R-package/R/saveRDS.lgb.Booster.R    | 2 +-
 R-package/man/readRDS.lgb.Booster.Rd | 2 +-
 R-package/man/saveRDS.lgb.Booster.Rd | 2 +-
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index d161df99e465..2cc7c91212aa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -404,6 +404,9 @@ python-package/lightgbm/VERSION.txt
 **/autom4te.cache/
 conftest*
 R-package/config.status
+!R-package/data/agaricus.test.rda
+!R-package/data/agaricus.train.rda
+!R-package/data/bank.rda
 R-package/docs
 R-package/src/CMakeLists.txt
 R-package/src/Makevars
@@ -422,9 +425,7 @@ miktex*.zip
 
 # Files created by R examples and tests
 **/lgb-Dataset.data
-**/lgb-model.rds
 **/lgb.Dataset.data
-**/model.rds
 **/model.txt
 **/lgb-model.txt
 
@@ -432,6 +433,9 @@ miktex*.zip
 .Rproj.user
 **/.Rapp.history
 **/.Rhistory
+*.rda
+*.RData
+*.rds
 
 # Files generated by aspell
 **/*.bak
diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R
index 603e455e5769..df48d4807f2d 100644
--- a/R-package/R/readRDS.lgb.Booster.R
+++ b/R-package/R/readRDS.lgb.Booster.R
@@ -31,7 +31,7 @@
 #' new_model <- readRDS.lgb.Booster(model_file)
 #' }
 #' @export
-readRDS.lgb.Booster <- function(file = "", refhook = NULL) {
+readRDS.lgb.Booster <- function(file, refhook = NULL) {
 
   object <- readRDS(file = file, refhook = refhook)
 
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index 8c03d8879d60..5c4194d75e31 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -42,7 +42,7 @@
 #' }
 #' @export
 saveRDS.lgb.Booster <- function(object,
-                                file = "",
+                                file,
                                 ascii = FALSE,
                                 version = NULL,
                                 compress = TRUE,
diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd
index aff69a01b313..add489441986 100644
--- a/R-package/man/readRDS.lgb.Booster.Rd
+++ b/R-package/man/readRDS.lgb.Booster.Rd
@@ -4,7 +4,7 @@
 \alias{readRDS.lgb.Booster}
 \title{readRDS for \code{lgb.Booster} models}
 \usage{
-readRDS.lgb.Booster(file = "", refhook = NULL)
+readRDS.lgb.Booster(file, refhook = NULL)
 }
 \arguments{
 \item{file}{a connection or the name of the file where the R object is saved to or read from.}
diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd
index f267293f9d96..ac9a0386c02d 100644
--- a/R-package/man/saveRDS.lgb.Booster.Rd
+++ b/R-package/man/saveRDS.lgb.Booster.Rd
@@ -6,7 +6,7 @@
 \usage{
 saveRDS.lgb.Booster(
   object,
-  file = "",
+  file,
   ascii = FALSE,
   version = NULL,
   compress = TRUE,

From aae4fe401b316e156bf1d4aa8bbc213a606aab5e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 3 Jan 2021 19:17:48 +0000
Subject: [PATCH 13/42] [R-package] add support for non-ASCII feature names
 (fixes #2983) (#3647)

* [R-package] add support for non-ASCII feature names (fixes #2983)

* fix Windows
---
 R-package/tests/testthat/test_basic.R | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
index ba6fc864fcfe..d2c20debdc48 100644
--- a/R-package/tests/testthat/test_basic.R
+++ b/R-package/tests/testthat/test_basic.R
@@ -1,5 +1,7 @@
 context("lightgbm()")
 
+ON_WINDOWS <- .Platform$OS.type == "windows"
+
 data(agaricus.train, package = "lightgbm")
 data(agaricus.test, package = "lightgbm")
 train <- agaricus.train
@@ -1168,7 +1170,6 @@ test_that("lgb.train() works with early stopping for regression with a metric th
 
 
 test_that("lgb.train() supports non-ASCII feature names", {
-  testthat::skip("UTF-8 feature names are not fully supported in the R package")
   dtrain <- lgb.Dataset(
     data = matrix(rnorm(400L), ncol =  4L)
     , label = rnorm(100L)
@@ -1185,10 +1186,21 @@ test_that("lgb.train() supports non-ASCII feature names", {
   )
   expect_true(lgb.is.Booster(bst))
   dumped_model <- jsonlite::fromJSON(bst$dump_model())
-  expect_identical(
-    dumped_model[["feature_names"]]
-    , feature_names
-  )
+
+  # UTF-8 strings are not well-supported on Windows
+  # * https://developer.r-project.org/Blog/public/2020/05/02/utf-8-support-on-windows/
+  # * https://developer.r-project.org/Blog/public/2020/07/30/windows/utf-8-build-of-r-and-cran-packages/index.html
+  if (!ON_WINDOWS) {
+    expect_identical(
+      dumped_model[["feature_names"]]
+      , feature_names
+    )
+  } else {
+    expect_identical(
+      dumped_model[["feature_names"]]
+      , iconv(feature_names, to = "UTF-8")
+    )
+  }
 })
 
 test_that("when early stopping is not activated, best_iter and best_score come from valids and not training data", {

From 28e9875e1851b4d11590e7947e40fbc4b6d90cbe Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Mon, 4 Jan 2021 02:31:50 +0300
Subject: [PATCH 14/42] remove line in MANIFEST for nonexistent file (#3711)

---
 python-package/MANIFEST.in | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python-package/MANIFEST.in b/python-package/MANIFEST.in
index bb8a3afdd9a6..1f5f5851068c 100644
--- a/python-package/MANIFEST.in
+++ b/python-package/MANIFEST.in
@@ -11,7 +11,6 @@ recursive-include compile/compute/cmake *
 recursive-include compile/compute/include *
 recursive-include compile/compute/meta *
 include compile/eigen/CMakeLists.txt
-include compile/eigen/Eigen/CMakeLists.txt
 include compile/eigen/Eigen/Cholesky
 include compile/eigen/Eigen/Core
 include compile/eigen/Eigen/Dense

From d1014ea64e577921112160b2fe3cb018174861d7 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 4 Jan 2021 02:43:59 +0000
Subject: [PATCH 15/42] [docs] Remove Gitter and Slack (fixes #3689) (#3710)

---
 README.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/README.md b/README.md
index 0a9cc08b9afb..4637f4430169 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,6 @@ Light Gradient Boosting Machine
 [![Python Versions](https://img.shields.io/pypi/pyversions/lightgbm.svg?logo=python&logoColor=white)](https://pypi.org/project/lightgbm)
 [![PyPI Version](https://img.shields.io/pypi/v/lightgbm.svg?logo=pypi&logoColor=white)](https://pypi.org/project/lightgbm)
 [![CRAN Version](https://www.r-pkg.org/badges/version/lightgbm)](https://cran.r-project.org/package=lightgbm)
-[![Join Gitter at https://gitter.im/Microsoft/LightGBM](https://badges.gitter.im/Microsoft/LightGBM.svg)](https://gitter.im/Microsoft/LightGBM?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-[![Slack](https://lightgbm-slack-autojoin.herokuapp.com/badge.svg)](https://lightgbm-slack-autojoin.herokuapp.com)
 
 LightGBM is a gradient boosting framework that uses tree based learning algorithms. It is designed to be distributed and efficient with the following advantages:
 
@@ -104,9 +102,6 @@ Support
 -------
 
 - Ask a question [on Stack Overflow with the `lightgbm` tag](https://stackoverflow.com/questions/ask?tags=lightgbm), we monitor this for new questions.
-- Discuss on the [LightGBM Gitter](https://gitter.im/Microsoft/LightGBM).
-- Discuss on the [LightGBM Slack team](https://lightgbm.slack.com).
-  - Use [this invite link](https://lightgbm-slack-autojoin.herokuapp.com/) to join the team.
 - Open **bug reports** and **feature requests** (not questions) on [GitHub issues](https://github.com/microsoft/LightGBM/issues).
 
 How to Contribute

From 69798c3eb67fd3f3fe3f64d341564e481bcbaf2b Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Mon, 4 Jan 2021 21:11:24 +0300
Subject: [PATCH 16/42] [python][tests] small Python tests cleanup (#3715)

---
 .gitignore                               |  2 +-
 tests/python_package_test/test_basic.py  |  4 +--
 tests/python_package_test/test_engine.py | 31 +++++++++---------------
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2cc7c91212aa..959462864825 100644
--- a/.gitignore
+++ b/.gitignore
@@ -423,7 +423,7 @@ lightgbm.Rcheck/
 miktex*.zip
 *.def
 
-# Files created by R examples and tests
+# Files created by R and Python examples and tests
 **/lgb-Dataset.data
 **/lgb.Dataset.data
 **/model.txt
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 9229d23f3809..4855e8e844b2 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -126,8 +126,8 @@ def test_save_and_load_linear(self):
         est_2 = lgb.train(params, train_data_2, num_boost_round=10)
         pred_2 = est_2.predict(X_train)
         np.testing.assert_allclose(pred_1, pred_2)
-        est_2.save_model('temp_model.txt')
-        est_3 = lgb.Booster(model_file='temp_model.txt')
+        est_2.save_model('model.txt')
+        est_3 = lgb.Booster(model_file='model.txt')
         pred_3 = est_3.predict(X_train)
         np.testing.assert_allclose(pred_2, pred_3)
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 1b30fd24eb8e..b266fee25128 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -2612,7 +2612,8 @@ def test_average_precision_metric(self):
 
     def test_reset_params_works_with_metric_num_class_and_boosting(self):
         X, y = load_breast_cancer(return_X_y=True)
-        params = {
+        dataset_params = {"max_bin": 150}
+        booster_params = {
             'objective': 'multiclass',
             'max_depth': 4,
             'bagging_fraction': 0.8,
@@ -2620,26 +2621,18 @@ def test_reset_params_works_with_metric_num_class_and_boosting(self):
             'boosting': 'gbdt',
             'num_class': 5
         }
-        dtrain = lgb.Dataset(X, y, params={"max_bin": 150})
-
+        dtrain = lgb.Dataset(X, y, params=dataset_params)
         bst = lgb.Booster(
-            params=params,
+            params=booster_params,
             train_set=dtrain
         )
-        expected_params = {
-            'objective': 'multiclass',
-            'max_depth': 4,
-            'bagging_fraction': 0.8,
-            'metric': ['multi_logloss', 'multi_error'],
-            'boosting': 'gbdt',
-            'num_class': 5,
-            'max_bin': 150
-        }
-        assert bst.params == expected_params
 
-        params['bagging_fraction'] = 0.9
-        ret_bst = bst.reset_parameter(params)
+        expected_params = dict(dataset_params, **booster_params)
+        self.assertDictEqual(bst.params, expected_params)
+
+        booster_params['bagging_fraction'] += 0.1
+        new_bst = bst.reset_parameter(booster_params)
 
-        expected_params['bagging_fraction'] = 0.9
-        assert bst.params == expected_params
-        assert ret_bst.params == expected_params
+        expected_params = dict(dataset_params, **booster_params)
+        self.assertDictEqual(bst.params, expected_params)
+        self.assertDictEqual(new_bst.params, expected_params)

From 5d79ff20d1b7ae226531e2445b17d747b253a637 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Mon, 4 Jan 2021 22:27:47 +0300
Subject: [PATCH 17/42] [ci] remove flaky MinGW job for R (#3723)

* comment out flaky MinGW job for R

* remove instead of comment out
---
 .github/workflows/r_package.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index 2793928e359e..e0d5e92833ad 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -65,12 +65,6 @@ jobs:
             compiler: clang
             r_version: 4.0
             build_type: cmake
-          - os: windows-latest
-            task: r-package
-            compiler: MINGW
-            toolchain: MINGW
-            r_version: 3.6
-            build_type: cmake
           - os: windows-latest
             task: r-package
             compiler: MINGW

From fc1b930026a2a376d8d9174c2c7662b7185885b5 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 4 Jan 2021 20:44:02 +0000
Subject: [PATCH 18/42] [ci] remove maxParallel limits on Azure DevOps (#3725)

---
 .vsts-ci.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index e08694c7bac2..a9467b87999e 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -26,7 +26,6 @@ jobs:
     vmImage: 'ubuntu-latest'
   container: ubuntu1404
   strategy:
-    maxParallel: 6
     matrix:
       regular:
         TASK: regular
@@ -74,7 +73,6 @@ jobs:
   pool:
     vmImage: 'macOS-10.14'
   strategy:
-    maxParallel: 3
     matrix:
       regular:
         TASK: regular
@@ -110,7 +108,6 @@ jobs:
   pool:
     vmImage: 'vs2017-win2016'
   strategy:
-    maxParallel: 3
     matrix:
       regular:
         TASK: regular

From 98e6f719c5111a8bc631877c3d3d9c55e360ae5d Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 4 Jan 2021 21:43:11 +0000
Subject: [PATCH 19/42] [docs] [R-package] improve R-package docs on testing
 (#3724)

---
 R-package/README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/R-package/README.md b/R-package/README.md
index b0c6096b8834..1f07e8177302 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -207,7 +207,7 @@ Please visit [demo](https://github.com/microsoft/LightGBM/tree/master/R-package/
 Testing
 -------
 
-The R package's unit tests are run automatically on every commit, via integrations like [Travis CI](https://travis-ci.org/microsoft/LightGBM/) and [Azure DevOps](https://dev.azure.com/lightgbm-ci/lightgbm-ci/_build). Adding new tests in `R-package/tests/testthat` is a valuable way to improve the reliability of the R package.
+The R package's unit tests are run automatically on every commit, via integrations like [GitHub Actions](https://github.com/microsoft/LightGBM/actions). Adding new tests in `R-package/tests/testthat` is a valuable way to improve the reliability of the R package.
 
 When adding tests, you may want to use test coverage to identify untested areas and to check if the tests you've added are covering all branches of the intended code.
 
@@ -215,13 +215,11 @@ The example below shows how to generate code coverage for the R package on a mac
 
 ```shell
 # Install
-export CXX=/usr/local/bin/g++-8
-export CC=/usr/local/bin/gcc-8
-Rscript build_r.R --skip-install
+sh build-cran-package.sh
 
 # Get coverage
 Rscript -e " \
-    coverage  <- covr::package_coverage('./lightgbm_r', quiet=FALSE);
+    coverage  <- covr::package_coverage('./lightgbm_r', type = 'tests', quiet = FALSE);
     print(coverage);
     covr::report(coverage, file = file.path(getwd(), 'coverage.html'), browse = TRUE);
     "

From 3fde0ce9334336c4da01a04580552769bbcc892e Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Tue, 5 Jan 2021 01:59:58 +0300
Subject: [PATCH 20/42] [docs][R-package] change obsolete wordings in R README
 (#3728)

---
 R-package/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/README.md b/R-package/README.md
index 1f07e8177302..7edd14717955 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -211,7 +211,7 @@ The R package's unit tests are run automatically on every commit, via integratio
 
 When adding tests, you may want to use test coverage to identify untested areas and to check if the tests you've added are covering all branches of the intended code.
 
-The example below shows how to generate code coverage for the R package on a macOS or Linux setup, using `gcc-8` to compile `LightGBM`. To adjust for your environment, swap out the 'Install' step with [the relevant code from the instructions above](#install).
+The example below shows how to generate code coverage for the R package on a macOS or Linux setup. To adjust for your environment, refer to [the customization step described above](#custom-installation-linux-mac).
 
 ```shell
 # Install

From 3fd48864da10baa6b96abc1bde219e4956e4f0a7 Mon Sep 17 00:00:00 2001
From: Guangyu Zeng <zenggyu@foxmail.com>
Date: Tue, 5 Jan 2021 10:54:52 +0800
Subject: [PATCH 21/42] [R-package] Add explicit return statement to R
 functions. (#3703)

* add explicit return statement to functions in callback.R

* add explicit return statement to functions in lgb.Booster.R

* add explicit return statement to functions in lgb.Dataset.R

* add explicit return statement to functions in lgb.Predictor.R

* add explicit return statement to functions in lgb.cv.R

* add explicit return statement to functions in lgb.interprete.R

* add explicit return statement to functions in lgb.plot.importance.R

* add explicit return statement to functions in saveRDS.lgb.Booster.R

* add explicit return statement to functions in utils.R

* add explicit return statement to functions in lgb.plot.interpretation.R

* add explicit return statement to functions in build_r.R

* fix typo

* return(self) -> return(invisible(self))

* fix some inconsistent indention

* fix test failure

* add another return() statement

* fix linting errors
---
 R-package/R/aliases.R                 | 150 +++++++++++++-------------
 R-package/R/callback.R                |  53 +++++----
 R-package/R/lgb.Booster.R             | 102 +++++++++++-------
 R-package/R/lgb.Dataset.R             | 108 ++++++++++---------
 R-package/R/lgb.Predictor.R           |  14 ++-
 R-package/R/lgb.cv.R                  |  13 ++-
 R-package/R/lgb.interprete.R          |   8 +-
 R-package/R/lgb.plot.importance.R     |   2 +-
 R-package/R/lgb.plot.interpretation.R |   1 +
 R-package/R/metrics.R                 |  52 ++++-----
 R-package/R/saveRDS.lgb.Booster.R     |   4 +
 R-package/R/utils.R                   |  13 ++-
 build_r.R                             |   1 +
 13 files changed, 298 insertions(+), 223 deletions(-)

diff --git a/R-package/R/aliases.R b/R-package/R/aliases.R
index 8176125b6f2f..cb2288fedb15 100644
--- a/R-package/R/aliases.R
+++ b/R-package/R/aliases.R
@@ -7,74 +7,76 @@
 # [return] A named list, where each key is a parameter relevant to lgb.DataSet and each value is a character
 #          vector of corresponding aliases.
 .DATASET_PARAMETERS <- function() {
-    return(list(
-        "bin_construct_sample_cnt" = c(
-            "bin_construct_sample_cnt"
-            , "subsample_for_bin"
+    return(
+        list(
+            "bin_construct_sample_cnt" = c(
+                "bin_construct_sample_cnt"
+                , "subsample_for_bin"
+            )
+            , "categorical_feature" = c(
+                "categorical_feature"
+                , "cat_feature"
+                , "categorical_column"
+                , "cat_column"
+            )
+            , "data_random_seed" = c(
+                "data_random_seed"
+                , "data_seed"
+            )
+            , "enable_bundle" = c(
+                "enable_bundle"
+                , "is_enable_bundle"
+                , "bundle"
+            )
+            , "feature_pre_filter" = "feature_pre_filter"
+            , "forcedbins_filename" = "forcedbins_filename"
+            , "group_column" = c(
+                "group_column"
+                , "group"
+                , "group_id"
+                , "query_column"
+                , "query"
+                , "query_id"
+            )
+            , "header" = c(
+                "header"
+                , "has_header"
+            )
+            , "ignore_column" = c(
+                "ignore_column"
+                , "ignore_feature"
+                , "blacklist"
+            )
+            , "is_enable_sparse" = c(
+                "is_enable_sparse"
+                , "is_sparse"
+                , "enable_sparse"
+                , "sparse"
+            )
+            , "label_column" = c(
+                "label_column"
+                , "label"
+            )
+            , "max_bin" = "max_bin"
+            , "max_bin_by_feature" = "max_bin_by_feature"
+            , "min_data_in_bin" = "min_data_in_bin"
+            , "pre_partition" = c(
+                "pre_partition"
+                , "is_pre_partition"
+            )
+            , "two_round" = c(
+                "two_round"
+                , "two_round_loading"
+                , "use_two_round_loading"
+            )
+            , "use_missing" = "use_missing"
+            , "weight_column" = c(
+                "weight_column"
+                , "weight"
+            )
+            , "zero_as_missing" = "zero_as_missing"
         )
-        , "categorical_feature" = c(
-            "categorical_feature"
-            , "cat_feature"
-            , "categorical_column"
-            , "cat_column"
-        )
-        , "data_random_seed" = c(
-            "data_random_seed"
-            , "data_seed"
-        )
-        , "enable_bundle" = c(
-            "enable_bundle"
-            , "is_enable_bundle"
-            , "bundle"
-        )
-        , "feature_pre_filter" = "feature_pre_filter"
-        , "forcedbins_filename" = "forcedbins_filename"
-        , "group_column" = c(
-            "group_column"
-            , "group"
-            , "group_id"
-            , "query_column"
-            , "query"
-            , "query_id"
-        )
-        , "header" = c(
-            "header"
-            , "has_header"
-        )
-        , "ignore_column" = c(
-            "ignore_column"
-            , "ignore_feature"
-            , "blacklist"
-        )
-        , "is_enable_sparse" = c(
-            "is_enable_sparse"
-            , "is_sparse"
-            , "enable_sparse"
-            , "sparse"
-        )
-        , "label_column" = c(
-            "label_column"
-            , "label"
-        )
-        , "max_bin" = "max_bin"
-        , "max_bin_by_feature" = "max_bin_by_feature"
-        , "min_data_in_bin" = "min_data_in_bin"
-        , "pre_partition" = c(
-            "pre_partition"
-            , "is_pre_partition"
-        )
-        , "two_round" = c(
-            "two_round"
-            , "two_round_loading"
-            , "use_two_round_loading"
-        )
-        , "use_missing" = "use_missing"
-        , "weight_column" = c(
-            "weight_column"
-            , "weight"
-        )
-        , "zero_as_missing" = "zero_as_missing"
-    ))
+    )
 }
 
 # [description] List of respected parameter aliases. Wrapped in a function to take advantage of
@@ -115,10 +117,12 @@
 # [returns]
 #     A character vector
 .NO_METRIC_STRINGS <- function() {
-    return(c(
-        "na"
-        , "None"
-        , "null"
-        , "custom"
-    ))
+    return(
+        c(
+            "na"
+            , "None"
+            , "null"
+            , "custom"
+        )
+    )
 }
diff --git a/R-package/R/callback.R b/R-package/R/callback.R
index 53462b010ba7..8d3f58222507 100644
--- a/R-package/R/callback.R
+++ b/R-package/R/callback.R
@@ -74,6 +74,8 @@ cb.reset.parameters <- function(new_params) {
 
     }
 
+    return(invisible(NULL))
+
   }
 
   callback <- function(env) {
@@ -95,15 +97,17 @@ cb.reset.parameters <- function(new_params) {
     })
 
     if (!is.null(env$model)) {
-      env$model$reset_parameter(params = pars)
+      return(env$model$reset_parameter(params = pars))
     }
 
+    return(invisible(NULL))
+
   }
 
   attr(callback, "call") <- match.call()
   attr(callback, "is_pre_iteration") <- TRUE
   attr(callback, "name") <- "cb.reset.parameters"
-  callback
+  return(callback)
 }
 
 # Format the evaluation metric string
@@ -116,9 +120,9 @@ format.eval.string <- function(eval_res, eval_err = NULL) {
 
   # Check for empty evaluation error
   if (!is.null(eval_err)) {
-    sprintf("%s\'s %s:%g+%g", eval_res$data_name, eval_res$name, eval_res$value, eval_err)
+    return(sprintf("%s\'s %s:%g+%g", eval_res$data_name, eval_res$name, eval_res$value, eval_err))
   } else {
-    sprintf("%s\'s %s:%g", eval_res$data_name, eval_res$name, eval_res$value)
+    return(sprintf("%s\'s %s:%g", eval_res$data_name, eval_res$name, eval_res$value))
   }
 
 }
@@ -150,7 +154,7 @@ merge.eval.string <- function(env) {
 
   }
 
-  paste0(msg, collapse = "  ")
+  return(paste0(msg, collapse = "  "))
 
 }
 
@@ -180,13 +184,15 @@ cb.print.evaluation <- function(period = 1L) {
 
     }
 
+    return(invisible(NULL))
+
   }
 
   # Store attributes
   attr(callback, "call") <- match.call()
   attr(callback, "name") <- "cb.print.evaluation"
 
-  callback
+  return(callback)
 
 }
 
@@ -253,13 +259,15 @@ cb.record.evaluation <- function() {
 
     }
 
+    return(invisible(NULL))
+
   }
 
   # Store attributes
   attr(callback, "call") <- match.call()
   attr(callback, "name") <- "cb.record.evaluation"
 
-  callback
+  return(callback)
 
 }
 
@@ -311,6 +319,8 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
 
     }
 
+    return(invisible(NULL))
+
   }
 
   # Create callback
@@ -392,18 +402,21 @@ cb.early.stop <- function(stopping_rounds, first_metric_only = FALSE, verbose =
         env$met_early_stop <- TRUE
       }
     }
+
+    return(invisible(NULL))
+
   }
 
   attr(callback, "call") <- match.call()
   attr(callback, "name") <- "cb.early.stop"
 
-  callback
+  return(callback)
 
 }
 
 # Extract callback names from the list of callbacks
 callback.names <- function(cb_list) {
-  unlist(lapply(cb_list, attr, "name"))
+  return(unlist(lapply(cb_list, attr, "name")))
 }
 
 add.cb <- function(cb_list, cb) {
@@ -426,22 +439,24 @@ add.cb <- function(cb_list, cb) {
   }
 
   # Return element
-  cb_list
+  return(cb_list)
 
 }
 
 categorize.callbacks <- function(cb_list) {
 
   # Check for pre-iteration or post-iteration
-  list(
-    pre_iter = Filter(function(x) {
-      pre <- attr(x, "is_pre_iteration")
-      !is.null(pre) && pre
-    }, cb_list),
-    post_iter = Filter(function(x) {
-      pre <- attr(x, "is_pre_iteration")
-      is.null(pre) || !pre
-    }, cb_list)
+  return(
+    list(
+      pre_iter = Filter(function(x) {
+        pre <- attr(x, "is_pre_iteration")
+        !is.null(pre) && pre
+      }, cb_list),
+      post_iter = Filter(function(x) {
+        pre <- attr(x, "is_pre_iteration")
+        is.null(pre) || !pre
+      }, cb_list)
+    )
   )
 
 }
diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R
index 192f3ed201cf..dc912477cd76 100644
--- a/R-package/R/lgb.Booster.R
+++ b/R-package/R/lgb.Booster.R
@@ -21,6 +21,8 @@ Booster <- R6::R6Class(
 
       }
 
+      return(invisible(NULL))
+
     },
 
     # Initialize will create a starter booster
@@ -137,6 +139,8 @@ Booster <- R6::R6Class(
 
       self$params <- params
 
+      return(invisible(NULL))
+
     },
 
     # Set training data name
@@ -320,10 +324,12 @@ Booster <- R6::R6Class(
     current_iter = function() {
 
       cur_iter <- 0L
-      lgb.call(
-        fun_name = "LGBM_BoosterGetCurrentIteration_R"
-        , ret = cur_iter
-        , private$handle
+      return(
+        lgb.call(
+          fun_name = "LGBM_BoosterGetCurrentIteration_R"
+          , ret = cur_iter
+          , private$handle
+        )
       )
 
     },
@@ -332,10 +338,12 @@ Booster <- R6::R6Class(
     upper_bound = function() {
 
       upper_bound <- 0.0
-      lgb.call(
-        fun_name = "LGBM_BoosterGetUpperBoundValue_R"
-        , ret = upper_bound
-        , private$handle
+      return(
+        lgb.call(
+          fun_name = "LGBM_BoosterGetUpperBoundValue_R"
+          , ret = upper_bound
+          , private$handle
+        )
       )
 
     },
@@ -344,10 +352,12 @@ Booster <- R6::R6Class(
     lower_bound = function() {
 
       lower_bound <- 0.0
-      lgb.call(
-        fun_name = "LGBM_BoosterGetLowerBoundValue_R"
-        , ret = lower_bound
-        , private$handle
+      return(
+        lgb.call(
+          fun_name = "LGBM_BoosterGetLowerBoundValue_R"
+          , ret = lower_bound
+          , private$handle
+        )
       )
 
     },
@@ -397,17 +407,19 @@ Booster <- R6::R6Class(
       }
 
       # Evaluate data
-      private$inner_eval(
-        data_name = name
-        , data_idx = data_idx
-        , feval = feval
+      return(
+        private$inner_eval(
+          data_name = name
+          , data_idx = data_idx
+          , feval = feval
+        )
       )
 
     },
 
     # Evaluation training data
     eval_train = function(feval = NULL) {
-      private$inner_eval(private$name_train_set, 1L, feval)
+      return(private$inner_eval(private$name_train_set, 1L, feval))
     },
 
     # Evaluation validation data
@@ -463,12 +475,14 @@ Booster <- R6::R6Class(
       }
 
       # Return model string
-      return(lgb.call.return.str(
-        fun_name = "LGBM_BoosterSaveModelToString_R"
-        , private$handle
-        , as.integer(num_iteration)
-        , as.integer(feature_importance_type)
-      ))
+      return(
+        lgb.call.return.str(
+          fun_name = "LGBM_BoosterSaveModelToString_R"
+          , private$handle
+          , as.integer(num_iteration)
+          , as.integer(feature_importance_type)
+        )
+      )
 
     },
 
@@ -480,11 +494,13 @@ Booster <- R6::R6Class(
         num_iteration <- self$best_iter
       }
 
-      lgb.call.return.str(
-        fun_name = "LGBM_BoosterDumpModel_R"
-        , private$handle
-        , as.integer(num_iteration)
-        , as.integer(feature_importance_type)
+      return(
+        lgb.call.return.str(
+          fun_name = "LGBM_BoosterDumpModel_R"
+          , private$handle
+          , as.integer(num_iteration)
+          , as.integer(feature_importance_type)
+        )
       )
 
     },
@@ -510,7 +526,8 @@ Booster <- R6::R6Class(
 
       # Predict on new data
       predictor <- Predictor$new(private$handle, ...)
-      predictor$predict(
+      return(
+        predictor$predict(
           data = data
           , start_iteration = start_iteration
           , num_iteration = num_iteration
@@ -519,13 +536,14 @@ Booster <- R6::R6Class(
           , predcontrib = predcontrib
           , header = header
           , reshape = reshape
+        )
       )
 
     },
 
     # Transform into predictor
     to_predictor = function() {
-      Predictor$new(private$handle)
+      return(Predictor$new(private$handle))
     },
 
     # Used for save
@@ -537,6 +555,8 @@ Booster <- R6::R6Class(
       # Overwrite model in object
       self$raw <- self$save_model_to_string(NULL)
 
+      return(invisible(NULL))
+
     }
 
   ),
@@ -780,8 +800,9 @@ predict.lgb.Booster <- function(object,
   }
 
   # Return booster predictions
-  object$predict(
-    data = data
+  return(
+    object$predict(
+      data = data
       , start_iteration = start_iteration
       , num_iteration = num_iteration
       , rawscore = rawscore
@@ -789,7 +810,8 @@ predict.lgb.Booster <- function(object,
       , predcontrib =  predcontrib
       , header = header
       , reshape = reshape
-    , ...
+      , ...
+    )
   )
 }
 
@@ -896,10 +918,12 @@ lgb.save <- function(booster, filename, num_iteration = NULL) {
   }
 
   # Store booster
-  invisible(booster$save_model(
-    filename = filename
-    , num_iteration = num_iteration
-  ))
+  return(
+    invisible(booster$save_model(
+      filename = filename
+      , num_iteration = num_iteration
+    ))
+  )
 
 }
 
@@ -941,7 +965,7 @@ lgb.dump <- function(booster, num_iteration = NULL) {
   }
 
   # Return booster at requested iteration
-  booster$dump_model(num_iteration =  num_iteration)
+  return(booster$dump_model(num_iteration =  num_iteration))
 
 }
 
@@ -1045,5 +1069,5 @@ lgb.get.eval.result <- function(booster, data_name, eval_name, iters = NULL, is_
   iters <- iters - delta
 
   # Return requested result
-  as.numeric(result[iters])
+  return(as.numeric(result[iters]))
 }
diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R
index d7f7020c0a01..b9831c232f31 100644
--- a/R-package/R/lgb.Dataset.R
+++ b/R-package/R/lgb.Dataset.R
@@ -18,6 +18,8 @@ Dataset <- R6::R6Class(
 
       }
 
+      return(invisible(NULL))
+
     },
 
     # Initialize will create a starter dataset
@@ -85,6 +87,8 @@ Dataset <- R6::R6Class(
       private$info <- info
       private$version <- 0L
 
+      return(invisible(NULL))
+
     },
 
     create_valid = function(data,
@@ -325,16 +329,18 @@ Dataset <- R6::R6Class(
         num_col <- 0L
 
         # Get numeric data and numeric features
-        c(
-          lgb.call(
-            fun_name = "LGBM_DatasetGetNumData_R"
-            , ret = num_row
-            , private$handle
-          ),
-          lgb.call(
-            fun_name = "LGBM_DatasetGetNumFeature_R"
-            , ret = num_col
-            , private$handle
+        return(
+          c(
+            lgb.call(
+              fun_name = "LGBM_DatasetGetNumData_R"
+              , ret = num_row
+              , private$handle
+            ),
+            lgb.call(
+              fun_name = "LGBM_DatasetGetNumFeature_R"
+              , ret = num_col
+              , private$handle
+            )
           )
         )
 
@@ -342,7 +348,7 @@ Dataset <- R6::R6Class(
 
         # Check if dgCMatrix (sparse matrix column compressed)
         # NOTE: requires Matrix package
-        dim(private$raw_data)
+        return(dim(private$raw_data))
 
       } else {
 
@@ -368,12 +374,12 @@ Dataset <- R6::R6Class(
             , private$handle
         )
         private$colnames <- as.character(base::strsplit(cnames, "\t")[[1L]])
-        private$colnames
+        return(private$colnames)
 
       } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) {
 
         # Check if dgCMatrix (sparse matrix column compressed)
-        colnames(private$raw_data)
+        return(colnames(private$raw_data))
 
       } else {
 
@@ -470,7 +476,7 @@ Dataset <- R6::R6Class(
         }
       }
 
-      private$info[[name]]
+      return(private$info[[name]])
 
     },
 
@@ -522,17 +528,19 @@ Dataset <- R6::R6Class(
     slice = function(idxset, ...) {
 
       # Perform slicing
-      Dataset$new(
-        data = NULL
-        , params = private$params
-        , reference = self
-        , colnames = private$colnames
-        , categorical_feature = private$categorical_feature
-        , predictor = private$predictor
-        , free_raw_data = private$free_raw_data
-        , used_indices = sort(idxset, decreasing = FALSE)
-        , info = NULL
-        , ...
+      return(
+        Dataset$new(
+          data = NULL
+          , params = private$params
+          , reference = self
+          , colnames = private$colnames
+          , categorical_feature = private$categorical_feature
+          , predictor = private$predictor
+          , free_raw_data = private$free_raw_data
+          , used_indices = sort(idxset, decreasing = FALSE)
+          , info = NULL
+          , ...
+        )
       )
 
     },
@@ -679,7 +687,7 @@ Dataset <- R6::R6Class(
       if (lgb.is.null.handle(x = private$handle)) {
         self$construct()
       }
-      private$handle
+      return(private$handle)
 
     },
 
@@ -753,18 +761,20 @@ lgb.Dataset <- function(data,
                         ...) {
 
   # Create new dataset
-  invisible(Dataset$new(
-    data = data
-    , params = params
-    , reference = reference
-    , colnames = colnames
-    , categorical_feature = categorical_feature
-    , predictor = NULL
-    , free_raw_data = free_raw_data
-    , used_indices = NULL
-    , info = info
-    , ...
-  ))
+  return(
+    invisible(Dataset$new(
+      data = data
+      , params = params
+      , reference = reference
+      , colnames = colnames
+      , categorical_feature = categorical_feature
+      , predictor = NULL
+      , free_raw_data = free_raw_data
+      , used_indices = NULL
+      , info = info
+      , ...
+    ))
+  )
 
 }
 
@@ -796,7 +806,7 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) {
   }
 
   # Create validation dataset
-  invisible(dataset$create_valid(data = data, info = info, ...))
+  return(invisible(dataset$create_valid(data = data, info = info, ...)))
 
 }
 
@@ -822,7 +832,7 @@ lgb.Dataset.construct <- function(dataset) {
   }
 
   # Construct the dataset
-  invisible(dataset$construct())
+  return(invisible(dataset$construct()))
 
 }
 
@@ -856,7 +866,7 @@ dim.lgb.Dataset <- function(x, ...) {
     stop("dim.lgb.Dataset: input data should be an lgb.Dataset object")
   }
 
-  x$dim()
+  return(x$dim())
 
 }
 
@@ -893,7 +903,7 @@ dimnames.lgb.Dataset <- function(x) {
   }
 
   # Return dimension names
-  list(NULL, x$get_colnames())
+  return(list(NULL, x$get_colnames()))
 
 }
 
@@ -932,7 +942,7 @@ dimnames.lgb.Dataset <- function(x) {
 
   # Set column names properly, and return
   x$set_colnames(colnames = value[[2L]])
-  x
+  return(x)
 
 }
 
@@ -970,7 +980,7 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) {
   }
 
   # Return sliced set
-  invisible(dataset$slice(idxset = idxset, ...))
+  return(invisible(dataset$slice(idxset = idxset, ...)))
 
 }
 
@@ -1020,7 +1030,7 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) {
     stop("getinfo.lgb.Dataset: input dataset should be an lgb.Dataset object")
   }
 
-  dataset$getinfo(name = name)
+  return(dataset$getinfo(name = name))
 
 }
 
@@ -1074,7 +1084,7 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) {
   }
 
   # Set information
-  invisible(dataset$setinfo(name = name, info = info))
+  return(invisible(dataset$setinfo(name = name, info = info)))
 }
 
 #' @name lgb.Dataset.set.categorical
@@ -1106,7 +1116,7 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) {
   }
 
   # Set categoricals
-  invisible(dataset$set_categorical_feature(categorical_feature = categorical_feature))
+  return(invisible(dataset$set_categorical_feature(categorical_feature = categorical_feature)))
 
 }
 
@@ -1138,7 +1148,7 @@ lgb.Dataset.set.reference <- function(dataset, reference) {
   }
 
   # Set reference
-  invisible(dataset$set_reference(reference = reference))
+  return(invisible(dataset$set_reference(reference = reference)))
 }
 
 #' @name lgb.Dataset.save
@@ -1171,5 +1181,5 @@ lgb.Dataset.save <- function(dataset, fname) {
   }
 
   # Store binary
-  invisible(dataset$save_binary(fname = fname))
+  return(invisible(dataset$save_binary(fname = fname)))
 }
diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R
index 55f4f6363751..ced408e410dd 100644
--- a/R-package/R/lgb.Predictor.R
+++ b/R-package/R/lgb.Predictor.R
@@ -23,6 +23,8 @@ Predictor <- R6::R6Class(
 
       }
 
+      return(invisible(NULL))
+
     },
 
     # Initialize will create a starter model
@@ -59,16 +61,20 @@ Predictor <- R6::R6Class(
       class(handle) <- "lgb.Booster.handle"
       private$handle <- handle
 
+      return(invisible(NULL))
+
     },
 
     # Get current iteration
     current_iter = function() {
 
       cur_iter <- 0L
-      lgb.call(
-        fun_name = "LGBM_BoosterGetCurrentIteration_R"
-        , ret = cur_iter
-        , private$handle
+      return(
+        lgb.call(
+          fun_name = "LGBM_BoosterGetCurrentIteration_R"
+          , ret = cur_iter
+          , private$handle
+        )
       )
 
     },
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
index 5d314e4cf089..31bb1b47b307 100644
--- a/R-package/R/lgb.cv.R
+++ b/R-package/R/lgb.cv.R
@@ -9,10 +9,11 @@ CVBooster <- R6::R6Class(
     boosters = list(),
     initialize = function(x) {
       self$boosters <- x
+      return(invisible(NULL))
     },
     reset_parameter = function(new_params) {
       for (x in boosters) { x$reset_parameter(new_params) }
-      self
+      return(invisible(self))
     }
   )
 )
@@ -563,7 +564,7 @@ lgb.stratified.folds <- function(y, k = 10L) {
 
   out <- split(seq(along = y), foldVector)
   names(out) <- NULL
-  out
+  return(out)
 }
 
 lgb.merge.cv.result <- function(msg, showsd = TRUE) {
@@ -615,9 +616,11 @@ lgb.merge.cv.result <- function(msg, showsd = TRUE) {
   }
 
   # Return errors
-  list(
-    eval_list = ret_eval
-    , eval_err_list = ret_eval_err
+  return(
+    list(
+      eval_list = ret_eval
+      , eval_err_list = ret_eval_err
+    )
   )
 
 }
diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R
index e51900d8b061..3a6cb7400691 100644
--- a/R-package/R/lgb.interprete.R
+++ b/R-package/R/lgb.interprete.R
@@ -137,9 +137,11 @@ single.tree.interprete <- function(tree_dt,
     , current_value = leaf_dt[["leaf_value"]]
   )
 
-  data.table::data.table(
-    Feature = feature_seq
-    , Contribution = diff.default(value_seq)
+  return(
+    data.table::data.table(
+      Feature = feature_seq
+      , Contribution = diff.default(value_seq)
+    )
   )
 
 }
diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R
index 7a971cb0813c..dfa9a138e984 100644
--- a/R-package/R/lgb.plot.importance.R
+++ b/R-package/R/lgb.plot.importance.R
@@ -91,6 +91,6 @@ lgb.plot.importance <- function(tree_imp,
                , las = 1L
            )]
 
-  invisible(tree_imp)
+  return(invisible(tree_imp))
 
 }
diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R
index a4d7ce8f4958..87c3e25b3bb7 100644
--- a/R-package/R/lgb.plot.interpretation.R
+++ b/R-package/R/lgb.plot.interpretation.R
@@ -124,6 +124,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt,
 
     }
   }
+  return(invisible(NULL))
 }
 
 #' @importFrom graphics barplot
diff --git a/R-package/R/metrics.R b/R-package/R/metrics.R
index b411eb4d5ad8..e7099e9483e0 100644
--- a/R-package/R/metrics.R
+++ b/R-package/R/metrics.R
@@ -7,29 +7,31 @@
 #          are returned from the C++ side. For example, if you use `metric = "mse"` in your code,
 #          the metric name `"l2"` will be returned.
 .METRICS_HIGHER_BETTER <- function() {
-    return(c(
-        "l1" = FALSE
-        , "l2" = FALSE
-        , "mape" = FALSE
-        , "rmse" = FALSE
-        , "quantile" = FALSE
-        , "huber" = FALSE
-        , "fair" = FALSE
-        , "poisson" = FALSE
-        , "gamma" = FALSE
-        , "gamma_deviance" = FALSE
-        , "tweedie" = FALSE
-        , "ndcg" = TRUE
-        , "map" = TRUE
-        , "auc" = TRUE
-        , "average_precision" = TRUE
-        , "binary_logloss" = FALSE
-        , "binary_error" = FALSE
-        , "auc_mu" = TRUE
-        , "multi_logloss" = FALSE
-        , "multi_error" = FALSE
-        , "cross_entropy" = FALSE
-        , "cross_entropy_lambda" = FALSE
-        , "kullback_leibler" = FALSE
-    ))
+    return(
+        c(
+            "l1" = FALSE
+            , "l2" = FALSE
+            , "mape" = FALSE
+            , "rmse" = FALSE
+            , "quantile" = FALSE
+            , "huber" = FALSE
+            , "fair" = FALSE
+            , "poisson" = FALSE
+            , "gamma" = FALSE
+            , "gamma_deviance" = FALSE
+            , "tweedie" = FALSE
+            , "ndcg" = TRUE
+            , "map" = TRUE
+            , "auc" = TRUE
+            , "average_precision" = TRUE
+            , "binary_logloss" = FALSE
+            , "binary_error" = FALSE
+            , "auc_mu" = TRUE
+            , "multi_logloss" = FALSE
+            , "multi_error" = FALSE
+            , "cross_entropy" = FALSE
+            , "cross_entropy_lambda" = FALSE
+            , "kullback_leibler" = FALSE
+        )
+    )
 }
diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R
index 5c4194d75e31..1a0a657fae62 100644
--- a/R-package/R/saveRDS.lgb.Booster.R
+++ b/R-package/R/saveRDS.lgb.Booster.R
@@ -66,6 +66,8 @@ saveRDS.lgb.Booster <- function(object,
     # Free model from memory
     object$raw <- NA
 
+    return(invisible(NULL))
+
   } else {
 
     saveRDS(
@@ -77,6 +79,8 @@ saveRDS.lgb.Booster <- function(object,
       , refhook = refhook
     )
 
+    return(invisible(NULL))
+
   }
 
 }
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index bce0de01e919..b2689a49ddda 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -1,9 +1,9 @@
 lgb.is.Booster <- function(x) {
-  lgb.check.r6.class(object = x, name = "lgb.Booster")
+  return(lgb.check.r6.class(object = x, name = "lgb.Booster"))
 }
 
 lgb.is.Dataset <- function(x) {
-  lgb.check.r6.class(object = x, name = "lgb.Dataset")
+  return(lgb.check.r6.class(object = x, name = "lgb.Dataset"))
 }
 
 lgb.null.handle <- function() {
@@ -15,7 +15,7 @@ lgb.null.handle <- function() {
 }
 
 lgb.is.null.handle <- function(x) {
-  is.null(x) || is.na(x)
+  return(is.null(x) || is.na(x))
 }
 
 lgb.encode.char <- function(arr, len) {
@@ -54,6 +54,9 @@ lgb.last_error <- function() {
   }
 
   stop("api error: ", lgb.encode.char(arr = err_msg, len = act_len))
+
+  return(invisible(NULL))
+
 }
 
 lgb.call <- function(fun_name, ret, ...) {
@@ -232,14 +235,14 @@ lgb.c_str <- function(x) {
 
   ret <- charToRaw(as.character(x))
   ret <- c(ret, as.raw(0L))
-  ret
+  return(ret)
 
 }
 
 lgb.check.r6.class <- function(object, name) {
 
   # Check for non-existence of R6 class or named class
-  all(c("R6", name) %in% class(object))
+  return(all(c("R6", name) %in% class(object)))
 
 }
 
diff --git a/build_r.R b/build_r.R
index 74bd989f9e31..fbafadfcc319 100644
--- a/build_r.R
+++ b/build_r.R
@@ -53,6 +53,7 @@ install_libs_content <- .replace_flag("use_msys2", USING_MSYS2, install_libs_con
   if (!all(res)) {
     stop("Copying files failed!")
   }
+  return(invisible(NULL))
 }
 
 # system() will not raise an R exception if the process called

From 415c0cb57794d184ea8639aa5a98a83610639976 Mon Sep 17 00:00:00 2001
From: CharlesAuguste <charles.auguste@ymail.com>
Date: Tue, 5 Jan 2021 23:05:53 +0000
Subject: [PATCH 22/42] fix test_monotone_constraints often fails on MPI builds
 (#3683)

* Fix monotone constraint bug where split does not fulfill constraints.

* Fix indent.
---
 src/treelearner/feature_histogram.hpp | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/treelearner/feature_histogram.hpp b/src/treelearner/feature_histogram.hpp
index 2fd74fd2b000..a63239e31355 100644
--- a/src/treelearner/feature_histogram.hpp
+++ b/src/treelearner/feature_histogram.hpp
@@ -944,16 +944,20 @@ class FeatureHistogram {
         is_splittable_ = true;
         // better split point
         if (current_gain > best_gain) {
+          if (USE_MC) {
+            best_right_constraints = constraints->RightToBasicConstraint();
+            best_left_constraints = constraints->LeftToBasicConstraint();
+            if (best_right_constraints.min > best_right_constraints.max ||
+                best_left_constraints.min > best_left_constraints.max) {
+              continue;
+            }
+          }
           best_left_count = left_count;
           best_sum_left_gradient = sum_left_gradient;
           best_sum_left_hessian = sum_left_hessian;
           // left is <= threshold, right is > threshold.  so this is t-1
           best_threshold = static_cast<uint32_t>(t - 1 + offset);
           best_gain = current_gain;
-          if (USE_MC) {
-            best_right_constraints = constraints->RightToBasicConstraint();
-            best_left_constraints = constraints->LeftToBasicConstraint();
-          }
         }
       }
     } else {
@@ -1033,15 +1037,19 @@ class FeatureHistogram {
         is_splittable_ = true;
         // better split point
         if (current_gain > best_gain) {
+          if (USE_MC) {
+            best_right_constraints = constraints->RightToBasicConstraint();
+            best_left_constraints = constraints->LeftToBasicConstraint();
+            if (best_right_constraints.min > best_right_constraints.max ||
+                best_left_constraints.min > best_left_constraints.max) {
+              continue;
+            }
+          }
           best_left_count = left_count;
           best_sum_left_gradient = sum_left_gradient;
           best_sum_left_hessian = sum_left_hessian;
           best_threshold = static_cast<uint32_t>(t + offset);
           best_gain = current_gain;
-          if (USE_MC) {
-            best_right_constraints = constraints->RightToBasicConstraint();
-            best_left_constraints = constraints->LeftToBasicConstraint();
-          }
         }
       }
     }

From 753b0e9cf0cfe926190b9ffdab5812305f05109d Mon Sep 17 00:00:00 2001
From: Belinda Trotta <btrotta@users.noreply.github.com>
Date: Thu, 7 Jan 2021 18:57:59 +1100
Subject: [PATCH 23/42] Fix compiler warnings caused by implicit type
 conversion (fixes #3677) (#3729)

* Fix compiler warnings caused by implicit type conversion

* Fix more warnings

* Fix more warnings
---
 include/LightGBM/dataset.h              |  4 +--
 src/io/dataset_loader.cpp               |  8 +++---
 src/io/tree.cpp                         |  2 +-
 src/treelearner/linear_tree_learner.cpp | 38 ++++++++++++-------------
 src/treelearner/linear_tree_learner.h   |  2 +-
 5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index a17e21a1d903..90f48e70c744 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -361,7 +361,7 @@ class Dataset {
         if (has_raw_) {
           int feat_ind = numeric_feature_map_[feature_idx];
           if (feat_ind >= 0) {
-            raw_data_[feat_ind][row_idx] = inner_data.second;
+            raw_data_[feat_ind][row_idx] = static_cast<float>(inner_data.second);
           }
         }
       }
@@ -374,7 +374,7 @@ class Dataset {
     if (has_raw_) {
       int feat_ind = numeric_feature_map_[feature_idx];
       if (feat_ind >= 0) {
-        raw_data_[feat_ind][row_idx] = value;
+        raw_data_[feat_ind][row_idx] = static_cast<float>(value);
       }
     }
   }
diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index 4d83aef057d5..654003bb5225 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -1125,7 +1125,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
                      Common::Vector2Ptr<double>(&sample_values).data(),
                      Common::VectorSize<int>(sample_indices).data(), static_cast<int>(sample_indices.size()), sample_data.size(), config_);
   if (dataset->has_raw()) {
-    dataset->ResizeRaw(sample_data.size());
+    dataset->ResizeRaw(static_cast<int>(sample_data.size()));
   }
 }
 
@@ -1163,7 +1163,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
           int sub_feature = dataset->feature2subfeature_[feature_idx];
           dataset->feature_groups_[group]->PushData(tid, sub_feature, i, inner_data.second);
           if (dataset->has_raw()) {
-            feature_row[feature_idx] = inner_data.second;
+            feature_row[feature_idx] = static_cast<float>(inner_data.second);
           }
         } else {
           if (inner_data.first == weight_idx_) {
@@ -1220,7 +1220,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
           int sub_feature = dataset->feature2subfeature_[feature_idx];
           dataset->feature_groups_[group]->PushData(tid, sub_feature, i, inner_data.second);
           if (dataset->has_raw()) {
-            feature_row[feature_idx] = inner_data.second;
+            feature_row[feature_idx] = static_cast<float>(inner_data.second);
           }
         } else {
           if (inner_data.first == weight_idx_) {
@@ -1293,7 +1293,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
           int sub_feature = dataset->feature2subfeature_[feature_idx];
           dataset->feature_groups_[group]->PushData(tid, sub_feature, start_idx + i, inner_data.second);
           if (dataset->has_raw()) {
-            feature_row[feature_idx] = inner_data.second;
+            feature_row[feature_idx] = static_cast<float>(inner_data.second);
           }
         } else {
           if (inner_data.first == weight_idx_) {
diff --git a/src/io/tree.cpp b/src/io/tree.cpp
index 04bd520465a3..4f24b84db830 100644
--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -379,7 +379,7 @@ std::string Tree::ToString() const {
       << ArrayToString(leaf_const_, num_leaves_) << '\n';
     std::vector<int> num_feat(num_leaves_);
     for (int i = 0; i < num_leaves_; ++i) {
-      num_feat[i] = leaf_coeff_[i].size();
+      num_feat[i] = static_cast<int>(leaf_coeff_[i].size());
     }
     str_buf << "num_features="
       << ArrayToString(num_feat, num_leaves_) << '\n';
diff --git a/src/treelearner/linear_tree_learner.cpp b/src/treelearner/linear_tree_learner.cpp
index 134892a13ef7..75535d8756e1 100644
--- a/src/treelearner/linear_tree_learner.cpp
+++ b/src/treelearner/linear_tree_learner.cpp
@@ -201,7 +201,7 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
   std::vector<std::vector<int>> leaf_features;
   std::vector<int> leaf_num_features;
   std::vector<std::vector<const float*>> raw_data_ptr;
-  int max_num_features = 0;
+  size_t max_num_features = 0;
   for (int i = 0; i < num_leaves; ++i) {
     std::vector<int> raw_features;
     if (is_refit) {
@@ -224,8 +224,8 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
     }
     leaf_features.push_back(numerical_features);
     raw_data_ptr.push_back(data_ptr);
-    leaf_num_features.push_back(numerical_features.size());
-    if (static_cast<int>(numerical_features.size()) > max_num_features) {
+    leaf_num_features.push_back(static_cast<int>(numerical_features.size()));
+    if (numerical_features.size() > max_num_features) {
       max_num_features = numerical_features.size();
     }
   }
@@ -233,16 +233,16 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
 #pragma omp parallel for schedule(static)
   for (int i = 0; i < num_threads; ++i) {
     for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
-      int num_feat = leaf_features[leaf_num].size();
-      std::fill(XTHX_by_thread_[i][leaf_num].begin(), XTHX_by_thread_[i][leaf_num].begin() + (num_feat + 1) * (num_feat + 2) / 2, 0);
-      std::fill(XTg_by_thread_[i][leaf_num].begin(), XTg_by_thread_[i][leaf_num].begin() + num_feat + 1, 0);
+      size_t num_feat = leaf_features[leaf_num].size();
+      std::fill(XTHX_by_thread_[i][leaf_num].begin(), XTHX_by_thread_[i][leaf_num].begin() + (num_feat + 1) * (num_feat + 2) / 2, 0.0f);
+      std::fill(XTg_by_thread_[i][leaf_num].begin(), XTg_by_thread_[i][leaf_num].begin() + num_feat + 1, 0.0f);
     }
   }
 #pragma omp parallel for schedule(static)
   for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
-    int num_feat = leaf_features[leaf_num].size();
-    std::fill(XTHX_[leaf_num].begin(), XTHX_[leaf_num].begin() + (num_feat + 1) * (num_feat + 2) / 2, 0);
-    std::fill(XTg_[leaf_num].begin(), XTg_[leaf_num].begin() + num_feat + 1, 0);
+    size_t num_feat = leaf_features[leaf_num].size();
+    std::fill(XTHX_[leaf_num].begin(), XTHX_[leaf_num].begin() + (num_feat + 1) * (num_feat + 2) / 2, 0.0f);
+    std::fill(XTg_[leaf_num].begin(), XTg_[leaf_num].begin() + num_feat + 1, 0.0f);
   }
   std::vector<std::vector<int>> num_nonzero;
   for (int i = 0; i < num_threads; ++i) {
@@ -283,11 +283,11 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
         }
       }
       curr_row[num_feat] = 1.0;
-      double h = hessians[i];
-      double g = gradients[i];
+      float h = static_cast<float>(hessians[i]);
+      float g = static_cast<float>(gradients[i]);
       int j = 0;
       for (int feat1 = 0; feat1 < num_feat + 1; ++feat1) {
-        double f1_val = curr_row[feat1];
+        float f1_val = curr_row[feat1];
         XTg_by_thread_[tid][leaf_num][feat1] += f1_val * g;
         f1_val *= h;
         for (int feat2 = feat1; feat2 < num_feat + 1; ++feat2) {
@@ -304,11 +304,11 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
   for (int tid = 0; tid < num_threads; ++tid) {
 #pragma omp parallel for schedule(static)
     for (int leaf_num = 0; leaf_num < num_leaves; ++leaf_num) {
-      int num_feat = leaf_features[leaf_num].size();
-      for (int j = 0; j < (num_feat + 1) * (num_feat + 2) / 2; ++j) {
+      size_t num_feat = leaf_features[leaf_num].size();
+      for (size_t j = 0; j < (num_feat + 1) * (num_feat + 2) / 2; ++j) {
         XTHX_[leaf_num][j] += XTHX_by_thread_[tid][leaf_num][j];
       }
-      for (int feat1 = 0; feat1 < num_feat + 1; ++feat1) {
+      for (size_t feat1 = 0; feat1 < num_feat + 1; ++feat1) {
         XTg_[leaf_num][feat1] += XTg_by_thread_[tid][leaf_num][feat1];
       }
       if (HAS_NAN) {
@@ -337,12 +337,12 @@ void LinearTreeLearner::CalculateLinear(Tree* tree, bool is_refit, const score_t
       }
       continue;
     }
-    int num_feat = leaf_features[leaf_num].size();
+    size_t num_feat = leaf_features[leaf_num].size();
     Eigen::MatrixXd XTHX_mat(num_feat + 1, num_feat + 1);
     Eigen::MatrixXd XTg_mat(num_feat + 1, 1);
-    int j = 0;
-    for (int feat1 = 0; feat1 < num_feat + 1; ++feat1) {
-      for (int feat2 = feat1; feat2 < num_feat + 1; ++feat2) {
+    size_t j = 0;
+    for (size_t feat1 = 0; feat1 < num_feat + 1; ++feat1) {
+      for (size_t feat2 = feat1; feat2 < num_feat + 1; ++feat2) {
         XTHX_mat(feat1, feat2) = XTHX_[leaf_num][j];
         XTHX_mat(feat2, feat1) = XTHX_mat(feat1, feat2);
         if ((feat1 == feat2) && (feat1 < num_feat)) {
diff --git a/src/treelearner/linear_tree_learner.h b/src/treelearner/linear_tree_learner.h
index cc522e1bc426..da0998b11730 100644
--- a/src/treelearner/linear_tree_learner.h
+++ b/src/treelearner/linear_tree_learner.h
@@ -72,7 +72,7 @@ class LinearTreeLearner: public SerialTreeLearner {
       for (int feat : tree->LeafFeaturesInner(leaf_num)) {
         feat_ptr[leaf_num].push_back(train_data_->raw_index(feat));
       }
-      leaf_num_features[leaf_num] = feat_ptr[leaf_num].size();
+      leaf_num_features[leaf_num] = static_cast<int>(feat_ptr[leaf_num].size());
     }
     OMP_INIT_EX();
 #pragma omp parallel for schedule(static) if (num_data_ > 1024)

From 31bc196aba4841efbeaccae72bcbae997e7228de Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Thu, 7 Jan 2021 16:00:03 +0800
Subject: [PATCH 24/42] fix bug in ExtractFeaturesFromMemory when predidct_fun_
 is used (#3721)

---
 src/io/dataset_loader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index 654003bb5225..77a5ed53e02c 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -1205,7 +1205,7 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
       // set label
       dataset->metadata_.SetLabelAt(i, static_cast<label_t>(tmp_label));
       // free processed line:
-      text_data[i].clear();
+      ref_text_data[i].clear();
       // shrink_to_fit will be very slow in linux, and seems not free memory, disable for now
       // text_reader_->Lines()[i].shrink_to_fit();
       // push data

From ed4d0ae3167fb210738f817ab0060284a0c268f6 Mon Sep 17 00:00:00 2001
From: htgeis <sujingwei81@gmail.com>
Date: Thu, 7 Jan 2021 21:51:13 +0800
Subject: [PATCH 25/42] support more filesystem as the storage for model file
 (#3730)

Co-authored-by: jingwei.su <jingwei.su@hulu.com>
---
 src/boosting/gbdt_model_text.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/boosting/gbdt_model_text.cpp b/src/boosting/gbdt_model_text.cpp
index 8b9a2b8b450b..0e296c1ec984 100644
--- a/src/boosting/gbdt_model_text.cpp
+++ b/src/boosting/gbdt_model_text.cpp
@@ -404,13 +404,13 @@ std::string GBDT::SaveModelToString(int start_iteration, int num_iteration, int
 
 bool GBDT::SaveModelToFile(int start_iteration, int num_iteration, int feature_importance_type, const char* filename) const {
   /*! \brief File to write models */
-  std::ofstream output_file;
-  output_file.open(filename, std::ios::out | std::ios::binary);
+  auto writer = VirtualFileWriter::Make(filename);
+  if (!writer->Init()) {
+    Log::Fatal("Model file %s is not available for writes", filename);
+  }
   std::string str_to_write = SaveModelToString(start_iteration, num_iteration, feature_importance_type);
-  output_file.write(str_to_write.c_str(), str_to_write.size());
-  output_file.close();
-
-  return static_cast<bool>(output_file);
+  auto size = writer->Write(str_to_write.c_str(), str_to_write.size());
+  return size > 0;
 }
 
 bool GBDT::LoadModelFromString(const char* buffer, size_t len) {

From eb9bbfb327815e843b808f5e2b4d73ee3201b4ac Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 7 Jan 2021 11:53:38 -0600
Subject: [PATCH 26/42] [ci] Move check-docs and lint jobs off Travis (#3726)

* [ci] Move check-docs and lint jobs off Travis

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* try moving check-docs into a containerized job

* add install flag and README badge

* trying to find the error

* ignore pipefail

* remove -e

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* more changes

* additional changes

* Update .github/workflows/static_analysis.yml

* Update README.md

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* move constants

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .ci/setup.sh                          |  2 +-
 .ci/test.sh                           |  2 +-
 .ci/test_r_package.sh                 | 18 ------
 .github/workflows/r_package.yml       |  7 +--
 .github/workflows/static_analysis.yml | 85 +++++++++++++++++++++++++++
 .travis.yml                           |  6 --
 README.md                             |  3 +-
 7 files changed, 90 insertions(+), 33 deletions(-)
 create mode 100644 .github/workflows/static_analysis.yml

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 752158ff3952..79c2bdc85d54 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -48,7 +48,7 @@ else  # Linux
     fi
 fi
 
-if [[ "${TASK:0:9}" != "r-package" ]]; then
+if [[ "${TASK}" != "r-package" ]]; then
     if [[ $SETUP_CONDA != "false" ]]; then
         sh conda.sh -b -p $CONDA
     fi
diff --git a/.ci/test.sh b/.ci/test.sh
index 63c6e4af6c05..0f1c99d7882b 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -8,7 +8,7 @@ elif [[ $OS_NAME == "linux" ]] && [[ $COMPILER == "clang" ]]; then
     export CC=clang
 fi
 
-if [[ "${TASK:0:9}" == "r-package" ]]; then
+if [[ "${TASK}" == "r-package" ]]; then
     bash ${BUILD_DIRECTORY}/.ci/test_r_package.sh || exit -1
     exit 0
 fi
diff --git a/.ci/test_r_package.sh b/.ci/test_r_package.sh
index f59d4efe1acc..4ed9a448edfc 100755
--- a/.ci/test_r_package.sh
+++ b/.ci/test_r_package.sh
@@ -105,24 +105,6 @@ if [[ $OS_NAME == "macos" ]]; then
 fi
 Rscript --vanilla -e "install.packages(${packages}, repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'))" || exit -1
 
-if [[ $TASK == "r-package-check-docs" ]]; then
-    Rscript build_r.R || exit -1
-    Rscript --vanilla -e "install.packages('roxygen2', repos = '${CRAN_MIRROR}', lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'))" || exit -1
-    Rscript --vanilla -e "roxygen2::roxygenize('R-package/', load = 'installed')" || exit -1
-    num_doc_files_changed=$(
-        git diff --name-only | grep --count -E "\.Rd|NAMESPACE"
-    )
-    if [[ ${num_doc_files_changed} -gt 0 ]]; then
-        echo "Some R documentation files have changed. Please re-generate them and commit those changes."
-        echo ""
-        echo "    Rscript build_r.R"
-        echo "    Rscript -e \"roxygen2::roxygenize('R-package/', load = 'installed')\""
-        echo ""
-        exit -1
-    fi
-    exit 0
-fi
-
 cd ${BUILD_DIRECTORY}
 
 PKG_TARBALL="lightgbm_*.tar.gz"
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index e0d5e92833ad..54b2798fa40a 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -1,4 +1,4 @@
-name: R GitHub Actions
+name: R-package
 
 on:
   push:
@@ -40,11 +40,6 @@ jobs:
             compiler: clang
             r_version: 4.0
             build_type: cmake
-          - os: ubuntu-latest
-            task: r-package-check-docs
-            compiler: gcc
-            r_version: 4.0
-            build_type: cmake
           - os: macOS-latest
             task: r-package
             compiler: gcc
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
new file mode 100644
index 000000000000..ed9790c8bb06
--- /dev/null
+++ b/.github/workflows/static_analysis.yml
@@ -0,0 +1,85 @@
+# contains non-functional tests, like checks on docs
+# and code style
+name: Static Analysis
+
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+    branches:
+    - master
+
+env:
+  COMPILER: 'gcc'
+  CONDA_ENV: test-env
+  GITHUB_ACTIONS: 'true'
+  OS_NAME: 'linux'
+  PYTHON_VERSION: 3.8
+
+jobs:
+  test:
+    name: ${{ matrix.task }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - task: lint
+          - task: check-docs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2.3.4
+        with:
+          fetch-depth: 5
+          submodules: false
+      - name: Setup and run tests
+        shell: bash
+        run: |
+          export TASK="${{ matrix.task }}"
+          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
+          export CONDA=${HOME}/miniconda
+          export PATH=${CONDA}/bin:$HOME/.local/bin:${PATH}
+          $GITHUB_WORKSPACE/.ci/setup.sh || exit -1
+          $GITHUB_WORKSPACE/.ci/test.sh || exit -1
+  r-check-docs:
+    name: r-package-check-docs
+    timeout-minutes: 60
+    runs-on: ubuntu-latest
+    container: rocker/verse
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2.3.4
+        with:
+          fetch-depth: 5
+          submodules: true
+      - name: Install packages
+        shell: bash
+        run: |
+          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'roxygen2', 'testthat'), repos = 'https://cran.r-project.org')"
+          sh build-cran-package.sh || exit -1
+          R CMD INSTALL --with-keep.source lightgbm_*.tar.gz || exit -1
+      - name: Test documentation
+        shell: bash --noprofile --norc {0}
+        run: |
+          Rscript --vanilla -e "roxygen2::roxygenize('R-package/', load = 'installed')" || exit -1
+          num_doc_files_changed=$(
+              git diff --name-only | grep --count -E "\.Rd|NAMESPACE"
+          )
+          if [[ ${num_doc_files_changed} -gt 0 ]]; then
+              echo "Some R documentation files have changed. Please re-generate them and commit those changes."
+              echo ""
+              echo "    sh build-cran-package.sh"
+              echo "    R CMD INSTALL --with-keep.source lightgbm_*.tar.gz"
+              echo "    Rscript -e \"roxygen2::roxygenize('R-package/', load = 'installed')\""
+              echo ""
+              exit -1
+          fi
+  all-successful:
+    # https://github.community/t/is-it-possible-to-require-all-github-actions-tasks-to-pass-without-enumerating-them/117957/4?u=graingert
+    runs-on: ubuntu-latest
+    needs: [test, r-check-docs]
+    steps:
+    - name: Note that all tests succeeded
+      run: echo "🎉"
diff --git a/.travis.yml b/.travis.yml
index 930cc4d0f19e..82e5de10c653 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -20,8 +20,6 @@ env:
     - TASK=sdist
     - TASK=bdist
     - TASK=if-else
-    - TASK=lint
-    - TASK=check-docs
     - TASK=mpi METHOD=source
     - TASK=mpi METHOD=pip PYTHON_VERSION=3.7
     - TASK=mpi METHOD=wheel PYTHON_VERSION=3.7
@@ -37,10 +35,6 @@ matrix:
       env: TASK=gpu METHOD=pip PYTHON_VERSION=3.6
     - os: osx
       env: TASK=gpu METHOD=wheel PYTHON_VERSION=3.7
-    - os: osx
-      env: TASK=lint
-    - os: osx
-      env: TASK=check-docs
 
 before_install:
   - test -n $CC  && unset CC
diff --git a/README.md b/README.md
index 4637f4430169..293dd32e2215 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,8 @@
 Light Gradient Boosting Machine
 ===============================
 
-[![GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/R%20GitHub%20Actions/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
+[![R-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/R-package/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
+[![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Static%20Analysis/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
 [![Azure Pipelines Build Status](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_apis/build/status/Microsoft.LightGBM?branchName=master)](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_build/latest?definitionId=1)
 [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/1ys5ot401m0fep6l/branch/master?svg=true)](https://ci.appveyor.com/project/guolinke/lightgbm/branch/master)
 [![Travis Build Status](https://travis-ci.org/microsoft/LightGBM.svg?branch=master)](https://travis-ci.org/microsoft/LightGBM)

From c2dd54f13a58072090e8aaf7dd653f5c5ec1bc22 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Thu, 7 Jan 2021 23:34:51 +0300
Subject: [PATCH 27/42] [ci][docs] replace file extensions in docs during
 Sphinx build (#3738)

* replace file extensions in docs during Sphinx build

* Update .travis.yml

* Update .travis.yml

* Update conf.py

* fix lint

* fix lint
---
 .ci/test.sh               |  2 --
 docs/_static/js/script.js |  3 ---
 docs/conf.py              | 19 +++++++++++++++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index 0f1c99d7882b..58af34eaf54d 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -29,8 +29,6 @@ if [[ $TASK == "check-docs" ]]; then
     rstcheck --report warning --ignore-directives=autoclass,autofunction,doxygenfile `find . -type f -name "*.rst"` || exit -1
     # build docs and check them for broken links
     make html || exit -1
-    find ./_build/html/ -type f -name '*.html' -exec \
-    sed -i'.bak' -e 's;\(\.\/[^.]*\.\)rst\([^[:space:]]*\);\1html\2;g' {} \;  # emulate js function
     linkchecker --config=.linkcheckerrc ./_build/html/*.html || exit -1
     # check the consistency of parameters' descriptions and other stuff
     cp $BUILD_DIRECTORY/docs/Parameters.rst $BUILD_DIRECTORY/docs/Parameters-backup.rst
diff --git a/docs/_static/js/script.js b/docs/_static/js/script.js
index 1db8e41f6ac2..7863eedbc52e 100644
--- a/docs/_static/js/script.js
+++ b/docs/_static/js/script.js
@@ -1,7 +1,4 @@
 $(function() {
-    /* Replace '.rst' with '.html' in all internal links like './[Something].rst[#anchor]' */
-    $('a[href^="./"][href*=".rst"]').attr('href', (i, val) => { return val.replace('.rst', '.html'); });
-
     /* Use wider container for the page content */
     $('.wy-nav-content').each(function() { this.style.setProperty('max-width', 'none', 'important'); });
 
diff --git a/docs/conf.py b/docs/conf.py
index e30566c6fc46..c801bbbd6c6f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -23,7 +23,10 @@
 import sphinx
 
 from distutils.dir_util import copy_tree
+from docutils.nodes import reference
 from docutils.parsers.rst import Directive
+from docutils.transforms import Transform
+from re import compile
 from sphinx.errors import VersionRequirementError
 from subprocess import PIPE, Popen
 from unittest.mock import Mock
@@ -32,6 +35,8 @@
 LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package')
 sys.path.insert(0, LIB_PATH)
 
+INTERNAL_REF_REGEX = compile(r"(?P<url>\.\/.+)(?P<extension>\.rst)(?P<anchor>$|#)")
+
 # -- mock out modules
 MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse',
                 'sklearn', 'matplotlib', 'pandas', 'graphviz']
@@ -39,6 +44,19 @@
     sys.modules[mod_name] = Mock()
 
 
+class InternalRefTransform(Transform):
+    """Replaces '.rst' with '.html' in all internal links like './[Something].rst[#anchor]'."""
+
+    default_priority = 210
+    """Numerical priority of this transform, 0 through 999."""
+
+    def apply(self, **kwargs):
+        """Apply the transform to the document tree."""
+        for section in self.document.traverse(reference):
+            if section.get("refuri") is not None:
+                section["refuri"] = INTERNAL_REF_REGEX.sub(r"\g<url>.html\g<anchor>", section["refuri"])
+
+
 class IgnoredDirective(Directive):
     """Stub for unknown directives."""
 
@@ -305,5 +323,6 @@ def setup(app):
         app.connect("build-finished",
                     lambda app, exception: copy_tree(os.path.join(CURR_PATH, os.path.pardir, "lightgbm_r", "docs"),
                                                      os.path.join(app.outdir, "R"), verbose=0))
+    app.add_transform(InternalRefTransform)
     add_js_file = getattr(app, 'add_js_file', False) or app.add_javascript
     add_js_file("js/script.js")

From 60985495123b0cb99caf0035e953d5152433f85e Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 8 Jan 2021 21:53:23 -0600
Subject: [PATCH 28/42] [ci] run Azure linux tasks on self-hosted runner pool
 (#3737)

---
 .vsts-ci.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index a9467b87999e..eb7c6ab30d74 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -22,8 +22,7 @@ jobs:
   variables:
     COMPILER: gcc
     SETUP_CONDA: 'false'
-  pool:
-    vmImage: 'ubuntu-latest'
+  pool: sh-ubuntu
   container: ubuntu1404
   strategy:
     matrix:

From d6f6abf614ecbae056a8108345dd6617b4e3ab58 Mon Sep 17 00:00:00 2001
From: h-vetinari <h.vetinari@gmx.com>
Date: Sat, 9 Jan 2021 15:41:49 +0100
Subject: [PATCH 29/42] move CheckParamConflict() after LogLevel processing
 (#3742)

---
 src/io/config.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/io/config.cpp b/src/io/config.cpp
index 331e5f4088d3..78086634605b 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -234,9 +234,6 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
   }
   valid = new_valid;
 
-  // check for conflicts
-  CheckParamConflict();
-
   if (verbosity == 1) {
     LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Info);
   } else if (verbosity == 0) {
@@ -246,6 +243,9 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
   } else {
     LightGBM::Log::ResetLogLevel(LightGBM::LogLevel::Fatal);
   }
+
+  // check for conflicts
+  CheckParamConflict();
 }
 
 bool CheckMultiClassObjective(const std::string& objective) {

From c7c4e084cd63d24afda4a9fb218d90c13ad27f2f Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 10 Jan 2021 17:38:39 -0600
Subject: [PATCH 30/42] [ci] move Python Mac jobs from Travis to GitHub Actions
 (#3745)

* [ci] move Python Mac jobs from Travis to GitHub Actions

* alphabetize

* fix workflow

* fix name

* fix os

* new workflow

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* changes from code review

* swap compilers

* swap compilers back

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* Update .github/workflows/python_package.yml

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .github/workflows/python_package.yml | 79 ++++++++++++++++++++++++++++
 .travis.yml                          | 20 +------
 README.md                            |  1 +
 3 files changed, 82 insertions(+), 18 deletions(-)
 create mode 100644 .github/workflows/python_package.yml

diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml
new file mode 100644
index 000000000000..9c2103c23444
--- /dev/null
+++ b/.github/workflows/python_package.yml
@@ -0,0 +1,79 @@
+name: Python-package
+
+on:
+  push:
+    branches:
+    - master
+  pull_request:
+    branches:
+    - master
+
+env:
+  CONDA_ENV: test-env
+  GITHUB_ACTIONS: 'true'
+
+jobs:
+  test:
+    name: ${{ matrix.task }} ${{ matrix.method }} (${{ matrix.os }}, Python ${{ matrix.python_version }})
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: macOS-latest
+            task: regular
+            python_version: 3.6
+          - os: macOS-latest
+            task: sdist
+            python_version: 3.8
+          - os: macOS-latest
+            task: bdist
+            python_version: 3.8
+          - os: macOS-latest
+            task: if-else
+            python_version: 3.8
+          - os: macOS-latest
+            task: mpi
+            method: source
+            python_version: 3.8
+          - os: macOS-latest
+            task: mpi
+            method: pip
+            python_version: 3.6
+          - os: macOS-latest
+            task: mpi
+            method: wheel
+            python_version: 3.7
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2.3.4
+        with:
+          fetch-depth: 5
+          submodules: true
+      - name: Setup and run tests
+        shell: bash
+        run: |
+          export TASK="${{ matrix.task }}"
+          export METHOD="${{ matrix.method }}"
+          export PYTHON_VERSION="${{ matrix.python_version }}"
+          if [[ "${{ matrix.os }}" == "macOS-latest" ]]; then
+              export COMPILER="gcc"
+              export OS_NAME="macos"
+          elif [[ "${{ matrix.os }}" == "ubuntu-latest" ]]; then
+              export COMPILER="clang"
+              export OS_NAME="linux"
+          fi
+          export BUILD_DIRECTORY="$GITHUB_WORKSPACE"
+          export LGB_VER=$(head -n 1 VERSION.txt)
+          export CONDA=${HOME}/miniconda
+          export PATH=${CONDA}/bin:${PATH}
+          $GITHUB_WORKSPACE/.ci/setup.sh || exit -1
+          $GITHUB_WORKSPACE/.ci/test.sh || exit -1
+  all-successful:
+    # https://github.community/t/is-it-possible-to-require-all-github-actions-tasks-to-pass-without-enumerating-them/117957/4?u=graingert
+    runs-on: ubuntu-latest
+    needs: [test]
+    steps:
+    - name: Note that all tests succeeded
+      run: echo "🎉"
diff --git a/.travis.yml b/.travis.yml
index 82e5de10c653..05188fdb4b0e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,13 +8,13 @@ git:
 
 os:
   - linux
-  - osx
 dist: focal
-osx_image: xcode12.2
 
 env:
   global:  # default values
+    - COMPILER=clang
     - PYTHON_VERSION=3.8
+    - OS_NAME=linux
   matrix:
     - TASK=regular PYTHON_VERSION=3.6
     - TASK=sdist
@@ -27,26 +27,10 @@ env:
     - TASK=gpu METHOD=pip PYTHON_VERSION=3.6
     - TASK=gpu METHOD=wheel PYTHON_VERSION=3.7
 
-matrix:
-  exclude:
-    - os: osx
-      env: TASK=gpu METHOD=source
-    - os: osx
-      env: TASK=gpu METHOD=pip PYTHON_VERSION=3.6
-    - os: osx
-      env: TASK=gpu METHOD=wheel PYTHON_VERSION=3.7
-
 before_install:
   - test -n $CC  && unset CC
   - test -n $CXX && unset CXX
   - export BUILD_DIRECTORY="$TRAVIS_BUILD_DIR"
-  - if [[ $TRAVIS_OS_NAME == "osx" ]]; then
-        export OS_NAME="macos";
-        export COMPILER="gcc";
-    else
-        export OS_NAME="linux";
-        export COMPILER="clang";
-    fi
   - export CONDA="$HOME/miniconda"
   - export PATH="$CONDA/bin:$PATH"
   - export CONDA_ENV="test-env"
diff --git a/README.md b/README.md
index 293dd32e2215..de27a618896a 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,7 @@
 Light Gradient Boosting Machine
 ===============================
 
+[![Python-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Python-package/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
 [![R-package GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/R-package/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
 [![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Static%20Analysis/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
 [![Azure Pipelines Build Status](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_apis/build/status/Microsoft.LightGBM?branchName=master)](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_build/latest?definitionId=1)

From a86a211bd6b448bf59299b6f4f76c7f19ade3e25 Mon Sep 17 00:00:00 2001
From: shiyu1994 <shiyu_k1994@qq.com>
Date: Mon, 11 Jan 2021 10:53:47 +0800
Subject: [PATCH 31/42] fix bug in corner case of hist bin mismatch (#3694)

---
 src/io/dataset.cpp            | 10 +---------
 src/io/train_share_states.cpp |  3 +++
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index 2726b28eff3f..af3a41d87ff0 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -556,18 +556,11 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures(const std::vector<uint32_t>& of
     }
   }
   sum_dense_ratio /= ncol;
-  const int offset = (1.0f - sum_dense_ratio) >=
-    MultiValBin::multi_val_bin_sparse_threshold ? 1 : 0;
-  int num_total_bin = offset;
   for (int gid = 0; gid < num_groups_; ++gid) {
     if (feature_groups_[gid]->is_multi_val_) {
       for (int fid = 0; fid < feature_groups_[gid]->num_feature_; ++fid) {
         const auto& bin_mapper = feature_groups_[gid]->bin_mappers_[fid];
         most_freq_bins.push_back(bin_mapper->GetMostFreqBin());
-        num_total_bin += bin_mapper->num_bin();
-        if (most_freq_bins.back() == 0) {
-          num_total_bin -= offset;
-        }
 #pragma omp parallel for schedule(static, 1)
         for (int tid = 0; tid < num_threads; ++tid) {
           iters[tid].emplace_back(
@@ -576,7 +569,6 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures(const std::vector<uint32_t>& of
       }
     } else {
       most_freq_bins.push_back(0);
-      num_total_bin += feature_groups_[gid]->bin_offsets_.back() - offset;
       for (int tid = 0; tid < num_threads; ++tid) {
         iters[tid].emplace_back(feature_groups_[gid]->FeatureGroupIterator());
       }
@@ -586,7 +578,7 @@ MultiValBin* Dataset::GetMultiBinFromAllFeatures(const std::vector<uint32_t>& of
   Log::Debug("Dataset::GetMultiBinFromAllFeatures: sparse rate %f",
              1.0 - sum_dense_ratio);
   ret.reset(MultiValBin::CreateMultiValBin(
-      num_data_, num_total_bin, static_cast<int>(most_freq_bins.size()),
+      num_data_, offsets.back(), static_cast<int>(most_freq_bins.size()),
       1.0 - sum_dense_ratio, offsets));
   PushDataToMultiValBin(num_data_, most_freq_bins, offsets, &iters, ret.get());
   ret->FinishLoad();
diff --git a/src/io/train_share_states.cpp b/src/io/train_share_states.cpp
index 9fbab76b9bfd..478c520f1c68 100644
--- a/src/io/train_share_states.cpp
+++ b/src/io/train_share_states.cpp
@@ -176,6 +176,9 @@ void MultiValBinWrapper::CopyMultiValBinSubset(
       if (feature_groups[i]->is_multi_val_) {
         for (int j = 0; j < feature_groups[i]->num_feature_; ++j) {
           const auto& bin_mapper = feature_groups[i]->bin_mappers_[j];
+          if (i == 0 && j == 0 && bin_mapper->GetMostFreqBin() > 0) {
+            num_total_bin = 1;
+          }
           int cur_num_bin = bin_mapper->num_bin();
           if (bin_mapper->GetMostFreqBin() == 0) {
             cur_num_bin -= offset;

From 1abc2e06738d5044bc4d6f5ee326f61f34d1d277 Mon Sep 17 00:00:00 2001
From: Belinda Trotta <btrotta@users.noreply.github.com>
Date: Mon, 11 Jan 2021 23:34:32 +1100
Subject: [PATCH 32/42] Initialize any_nan_ property of LinearTreeLearner
 (#3709)

---
 src/treelearner/linear_tree_learner.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/treelearner/linear_tree_learner.cpp b/src/treelearner/linear_tree_learner.cpp
index 75535d8756e1..5855812f741e 100644
--- a/src/treelearner/linear_tree_learner.cpp
+++ b/src/treelearner/linear_tree_learner.cpp
@@ -34,6 +34,7 @@ void LinearTreeLearner::InitLinear(const Dataset* train_data, const int max_leav
       }
     }
   }
+  any_nan_ = false;
   for (int feat = 0; feat < train_data->num_features(); ++feat) {
     if (contains_nan_[feat]) {
       any_nan_ = true;

From 5784ffe75a7216b5b694d071ba121c7b7b8b0860 Mon Sep 17 00:00:00 2001
From: Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com>
Date: Mon, 11 Jan 2021 10:18:00 -0500
Subject: [PATCH 33/42] Ensure CUDA vector length is consistent with
 AlignedSize (#3748)

---
 include/LightGBM/cuda/vector_cudahost.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/LightGBM/cuda/vector_cudahost.h b/include/LightGBM/cuda/vector_cudahost.h
index 03db3386d4c7..bd488d793d09 100644
--- a/include/LightGBM/cuda/vector_cudahost.h
+++ b/include/LightGBM/cuda/vector_cudahost.h
@@ -42,6 +42,7 @@ struct CHAllocator {
   T* allocate(std::size_t n) {
     T* ptr;
     if (n == 0) return NULL;
+    n = (n + kAlignedSize - 1) & -kAlignedSize;
     #ifdef USE_CUDA
       if (LGBM_config_::current_device == lgbm_device_cuda) {
         cudaError_t ret = cudaHostAlloc(&ptr, n*sizeof(T), cudaHostAllocPortable);

From 78d31d9ae34c9c8ab2b0f8704c6962c8d3510062 Mon Sep 17 00:00:00 2001
From: Ray Bell <rayjohnbell0@gmail.com>
Date: Mon, 11 Jan 2021 12:01:25 -0500
Subject: [PATCH 34/42] [docs][python] add conda-forge install instructions
 (#3544)

* DOC: add conda-forge install instructions

* DOC: add conda-forge instructions

* DOC: fix hyperlink

* DOC: point to installation guide

* add detailed

* Update python-package/README.rst

Co-authored-by: James Lamb <jaylamb20@gmail.com>

* Update python-package/README.rst

Co-authored-by: James Lamb <jaylamb20@gmail.com>

* rm characters

* add pip install

* add :

* Update python-package/README.rst

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* Update python-package/README.rst

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* remove pip from header

* channel

Co-authored-by: James Lamb <jaylamb20@gmail.com>
Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 docs/Python-Intro.rst     |  2 +-
 python-package/README.rst | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/docs/Python-Intro.rst b/docs/Python-Intro.rst
index f514e40a6e10..bb379d5ed196 100644
--- a/docs/Python-Intro.rst
+++ b/docs/Python-Intro.rst
@@ -14,7 +14,7 @@ This document gives a basic walkthrough of LightGBM Python-package.
 Install
 -------
 
-You can install LightGBM via pip
+The preferred way to install LightGBM is via pip:
 
 ::
 
diff --git a/python-package/README.rst b/python-package/README.rst
index 6715bb9f460a..265f9d47053f 100644
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -13,13 +13,12 @@ Preparation
 
 `setuptools <https://pypi.org/project/setuptools>`_ is needed.
 
-Install from `PyPI <https://pypi.org/project/lightgbm>`_ Using ``pip``
-''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+Install from `PyPI <https://pypi.org/project/lightgbm>`_
+''''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
 .. code:: sh
 
     pip install lightgbm
- 
 
 You may need to install `wheel <https://pythonwheels.com>`_ via ``pip install wheel`` first.
 
@@ -37,7 +36,6 @@ For **macOS** (we provide wheels for 3 newest macOS versions) users:
 
 - For version smaller than 2.1.2, **gcc-7** with **OpenMP** is required.
 
-
 Build from Sources
 ******************
 
@@ -144,6 +142,17 @@ By default, installation in environment with 32-bit Python is prohibited. Howeve
 
 It is **strongly not recommended** to use this version of LightGBM!
 
+Install from `conda-forge channel <https://anaconda.org/conda-forge/lightgbm>`_
+'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
+
+If you use ``conda`` to manage Python dependencies, you can install LightGBM using ``conda install``.
+
+**Note**: The `lightgbm conda-forge feedstock <https://github.com/conda-forge/lightgbm-feedstock>`_ is not maintained by LightGBM maintainers.
+
+.. code:: sh
+
+    conda install -c conda-forge lightgbm
+
 Install from GitHub
 '''''''''''''''''''
 

From 318f7faeb8779a6f96948f9362b05b9de28486b6 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 13 Jan 2021 09:55:58 -0600
Subject: [PATCH 35/42] [ci] remove Travis (fixes #3519) (#3672)

* [ci] move CI jobs from Travis to Azure DevOps (fixes #3519)

* comment out other CIs to avoid wasting cycles

* try without docker

* add container back

* stop using --user in pip install

* run check-docs and lint without container

* job names

* move more jobs to Azure-hosted Linux pool

* fix PATH for check-docs

* uncomment other CI

* uncomment windows

* remove uses of maxParallel

* try moving macos-latest jobs to GitHub Actions

* fix config

* fix missing conda env

* set Python version

* remove commented-out code

* add more to GitHub Actions

* try to fix GPU

* remove static_analysis to prevent conflicts with #3726

* change workflow name

* try using ubuntu:latest docker

* fix conda

* trying to find where permissions first break

* add workaround for sudo

* please azure please

* image syntax

* more sudo

* noninteractive

* LC_ALL

* more  sudo

* more stuff

* CONDA dir

* paths

* get path

* missing CONDA

* fix path stuff

* more tests

* fix graphviz

* stuff

* more graphviz

* install xorg-libxau

* graphviz works, run more jobs

* stuff

* enable more tests

* uncomment GitHub Actions

* uncomment all other CIs

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* add travis.yml to Rbuildignore

* add Rbuildignore rule for fmt

* add libomp for clang builds

* changes from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 .ci/setup.sh            | 41 ++++++++++++++++++++++
 .ci/test.sh             | 18 +++++++---
 .travis.yml             | 50 --------------------------
 .vsts-ci.yml            | 77 +++++++++++++++++++++++++++++++++++++++--
 R-package/.Rbuildignore |  2 +-
 README.md               |  1 -
 6 files changed, 129 insertions(+), 60 deletions(-)
 delete mode 100644 .travis.yml

diff --git a/.ci/setup.sh b/.ci/setup.sh
index 79c2bdc85d54..c89bb6a73d8d 100755
--- a/.ci/setup.sh
+++ b/.ci/setup.sh
@@ -19,6 +19,47 @@ if [[ $OS_NAME == "macos" ]]; then
     fi
     wget -q -O conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
 else  # Linux
+    if [[ $IN_UBUNTU_LATEST_CONTAINER == "true" ]]; then
+
+        # fixes error "unable to initialize frontend: Dialog"
+        # https://github.com/moby/moby/issues/27988#issuecomment-462809153
+        echo 'debconf debconf/frontend select Noninteractive' | sudo debconf-set-selections
+
+        sudo apt-get update
+        sudo apt-get install -y --no-install-recommends \
+            software-properties-common
+
+        sudo add-apt-repository -y ppa:git-core/ppa
+        sudo apt-get update
+
+        sudo apt-get install -y --no-install-recommends \
+            apt-utils \
+            build-essential \
+            ca-certificates \
+            curl \
+            git \
+            iputils-ping \
+            jq \
+            libcurl4 \
+            libicu66 \
+            libssl1.1 \
+            libunwind8 \
+            locales \
+            netcat \
+            unzip \
+            wget \
+            zip
+
+        export LANG="en_US.UTF-8"
+        export LC_ALL="${LANG}"
+        sudo locale-gen ${LANG}
+        sudo update-locale
+
+        sudo apt-get install -y --no-install-recommends \
+            cmake \
+            clang \
+            libomp-dev
+    fi
     if [[ $TASK == "mpi" ]]; then
         sudo apt-get update
         sudo apt-get install --no-install-recommends -y libopenmpi-dev openmpi-bin
diff --git a/.ci/test.sh b/.ci/test.sh
index 58af34eaf54d..f9dd6b0c3582 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -68,7 +68,15 @@ if [[ $TASK == "if-else" ]]; then
     exit 0
 fi
 
-conda install -q -y -n $CONDA_ENV dask dask-ml distributed joblib matplotlib numpy pandas psutil pytest python-graphviz scikit-learn scipy
+conda install -q -y -n $CONDA_ENV dask dask-ml distributed joblib matplotlib numpy pandas psutil pytest scikit-learn scipy
+
+# graphviz must come from conda-forge to avoid this on some linux distros:
+# https://github.com/conda-forge/graphviz-feedstock/issues/18
+conda install -q -y \
+    -n $CONDA_ENV \
+    -c conda-forge \
+        python-graphviz \
+        xorg-libxau
 
 if [[ $OS_NAME == "macos" ]] && [[ $COMPILER == "clang" ]]; then
     # fix "OMP: Error #15: Initializing libiomp5.dylib, but found libomp.dylib already initialized." (OpenMP library conflict due to conda's MKL)
@@ -78,7 +86,7 @@ fi
 if [[ $TASK == "sdist" ]]; then
     cd $BUILD_DIRECTORY/python-package && python setup.py sdist || exit -1
     pip install --user $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz -v || exit -1
-    if [[ $AZURE == "true" ]]; then
+    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
         cp $BUILD_DIRECTORY/python-package/dist/lightgbm-$LGB_VER.tar.gz $BUILD_ARTIFACTSTAGINGDIRECTORY
         mkdir $BUILD_DIRECTORY/build && cd $BUILD_DIRECTORY/build
         if [[ $OS_NAME == "macos" ]]; then
@@ -100,12 +108,12 @@ elif [[ $TASK == "bdist" ]]; then
     if [[ $OS_NAME == "macos" ]]; then
         cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --python-tag py3 || exit -1
         mv dist/lightgbm-$LGB_VER-py3-none-macosx.whl dist/lightgbm-$LGB_VER-py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl
-        if [[ $AZURE == "true" ]]; then
+        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
         fi
     else
         cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --python-tag py3 || exit -1
-        if [[ $AZURE == "true" ]]; then
+        if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
         fi
     fi
@@ -172,7 +180,7 @@ cd $BUILD_DIRECTORY/python-package && python setup.py install --precompile --use
 pytest $BUILD_DIRECTORY/tests || exit -1
 
 if [[ $TASK == "regular" ]]; then
-    if [[ $AZURE == "true" ]]; then
+    if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
         if [[ $OS_NAME == "macos" ]]; then
             cp $BUILD_DIRECTORY/lib_lightgbm.so $BUILD_ARTIFACTSTAGINGDIRECTORY/lib_lightgbm.dylib
         else
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 05188fdb4b0e..000000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-if: branch = master
-
-language: cpp
-
-git:
-  submodules: true
-  depth: 5
-
-os:
-  - linux
-dist: focal
-
-env:
-  global:  # default values
-    - COMPILER=clang
-    - PYTHON_VERSION=3.8
-    - OS_NAME=linux
-  matrix:
-    - TASK=regular PYTHON_VERSION=3.6
-    - TASK=sdist
-    - TASK=bdist
-    - TASK=if-else
-    - TASK=mpi METHOD=source
-    - TASK=mpi METHOD=pip PYTHON_VERSION=3.7
-    - TASK=mpi METHOD=wheel PYTHON_VERSION=3.7
-    - TASK=gpu METHOD=source
-    - TASK=gpu METHOD=pip PYTHON_VERSION=3.6
-    - TASK=gpu METHOD=wheel PYTHON_VERSION=3.7
-
-before_install:
-  - test -n $CC  && unset CC
-  - test -n $CXX && unset CXX
-  - export BUILD_DIRECTORY="$TRAVIS_BUILD_DIR"
-  - export CONDA="$HOME/miniconda"
-  - export PATH="$CONDA/bin:$PATH"
-  - export CONDA_ENV="test-env"
-  - export LGB_VER=$(head -n 1 VERSION.txt)
-  - export AMDAPPSDK_PATH=$HOME/AMDAPPSDK
-  - export LD_LIBRARY_PATH="$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH"
-  - export LD_LIBRARY_PATH="/usr/local/clang/lib:$LD_LIBRARY_PATH"  # fix error "libomp.so: cannot open shared object file: No such file or directory" on Linux with Clang
-  - export OPENCL_VENDOR_PATH=$AMDAPPSDK_PATH/etc/OpenCL/vendors
-
-install:
-  - bash .ci/setup.sh
-
-script:
-  - bash .ci/test.sh
-
-notifications:
-  email: false
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index eb7c6ab30d74..1ef2d6c3de42 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -15,6 +15,9 @@ resources:
   containers:
   - container: ubuntu1404
     image: lightgbm/vsts-agent:ubuntu-14.04
+  - container: ubuntu-latest
+    image: 'ubuntu:latest'
+    options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro"
 jobs:
 ###########################################
 - job: Linux
@@ -22,6 +25,8 @@ jobs:
   variables:
     COMPILER: gcc
     SETUP_CONDA: 'false'
+    OS_NAME: 'linux'
+    PRODUCES_ARTIFACTS: 'true'
   pool: sh-ubuntu
   container: ubuntu1404
   strategy:
@@ -41,11 +46,9 @@ jobs:
       gpu_source:
         TASK: gpu
         METHOD: source
-        PYTHON_VERSION: 3.6
   steps:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=OS_NAME]linux"
       echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       echo "##vso[task.prependpath]$CONDA/bin"
       AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK
@@ -65,10 +68,78 @@ jobs:
       artifactName: PackageAssets
       artifactType: container
 ###########################################
+- job: Linux_latest
+###########################################
+  variables:
+    COMPILER: clang
+    DEBIAN_FRONTEND: 'noninteractive'
+    IN_UBUNTU_LATEST_CONTAINER: 'true'
+    OS_NAME: 'linux'
+    SETUP_CONDA: 'true'
+  pool: sh-ubuntu
+  container: ubuntu-latest
+  strategy:
+    matrix:
+      regular:
+        TASK: regular
+        PYTHON_VERSION: 3.6
+      sdist:
+        TASK: sdist
+      bdist:
+        TASK: bdist
+      inference:
+        TASK: if-else
+      mpi_source:
+        TASK: mpi
+        METHOD: source
+      mpi_pip:
+        TASK: mpi
+        METHOD: pip
+        PYTHON_VERSION: 3.7
+      mpi_wheel:
+        TASK: mpi
+        METHOD: wheel
+        PYTHON_VERSION: 3.7
+      gpu_source:
+        TASK: gpu
+        METHOD: source
+      gpu_pip:
+        TASK: gpu
+        METHOD: pip
+        PYTHON_VERSION: 3.6
+      gpu_wheel:
+        TASK: gpu
+        METHOD: wheel
+        PYTHON_VERSION: 3.7
+  steps:
+  - script: |
+      echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
+      echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
+      CONDA=$HOME/miniconda
+      echo "##vso[task.setvariable variable=CONDA]$CONDA"
+      echo "##vso[task.prependpath]$CONDA/bin"
+      AMDAPPSDK_PATH=$BUILD_SOURCESDIRECTORY/AMDAPPSDK
+      echo "##vso[task.setvariable variable=AMDAPPSDK_PATH]$AMDAPPSDK_PATH"
+      LD_LIBRARY_PATH=$AMDAPPSDK_PATH/lib/x86_64:$LD_LIBRARY_PATH
+      echo "##vso[task.setvariable variable=LD_LIBRARY_PATH]$LD_LIBRARY_PATH"
+      echo "##vso[task.setvariable variable=OPENCL_VENDOR_PATH]$AMDAPPSDK_PATH/etc/OpenCL/vendors"
+    displayName: 'Set variables'
+  # https://github.com/microsoft/azure-pipelines-agent/issues/2043#issuecomment-687983301
+  - script: |
+      /tmp/docker exec -t -u 0 ci-container \
+      sh -c "apt-get update && apt-get -o Dpkg::Options::="--force-confold" -y install sudo"
+    displayName: 'Install sudo'
+  - bash: $(Build.SourcesDirectory)/.ci/setup.sh
+    displayName: Setup
+  - bash: $(Build.SourcesDirectory)/.ci/test.sh
+    displayName: Test
+###########################################
 - job: MacOS
 ###########################################
   variables:
     COMPILER: clang
+    OS_NAME: 'macos'
+    PRODUCES_ARTIFACTS: 'true'
   pool:
     vmImage: 'macOS-10.14'
   strategy:
@@ -83,7 +154,6 @@ jobs:
   steps:
   - script: |
       echo "##vso[task.setvariable variable=BUILD_DIRECTORY]$BUILD_SOURCESDIRECTORY"
-      echo "##vso[task.setvariable variable=OS_NAME]macos"
       echo "##vso[task.setvariable variable=LGB_VER]$(head -n 1 VERSION.txt)"
       CONDA=$AGENT_HOMEDIRECTORY/miniconda
       echo "##vso[task.setvariable variable=CONDA]$CONDA"
@@ -139,6 +209,7 @@ jobs:
 ###########################################
   dependsOn:
   - Linux
+  - Linux_latest
   - MacOS
   - Windows
   condition: and(succeeded(), not(startsWith(variables['Build.SourceBranch'], 'refs/pull/')))
diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore
index 2aeecfe18cc8..55e51dffccc5 100644
--- a/R-package/.Rbuildignore
+++ b/R-package/.Rbuildignore
@@ -35,6 +35,7 @@ src/external_libs/fast_double_parser/tests
 src/external_libs/fast_double_parser/.*\.yaml
 src/external_libs/fast_double_parser/.*\.yml
 src/external_libs/fmt/.*\.md
+src/external_libs/fmt/.travis.yml
 src/external_libs/fmt/doc
 src/external_libs/fmt/support/Android\.mk
 src/external_libs/fmt/support/.*\.gradle
@@ -46,4 +47,3 @@ src/external_libs/fmt/support/Vagrantfile
 src/external_libs/fmt/support/.*\.xml
 src/external_libs/fmt/support/.*\.yml
 src/external_libs/fmt/test
-\.travis\.yml
diff --git a/README.md b/README.md
index de27a618896a..3839ee5af824 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,6 @@ Light Gradient Boosting Machine
 [![Static Analysis GitHub Actions Build Status](https://github.com/microsoft/LightGBM/workflows/Static%20Analysis/badge.svg?branch=master)](https://github.com/microsoft/LightGBM/actions)
 [![Azure Pipelines Build Status](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_apis/build/status/Microsoft.LightGBM?branchName=master)](https://lightgbm-ci.visualstudio.com/lightgbm-ci/_build/latest?definitionId=1)
 [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/1ys5ot401m0fep6l/branch/master?svg=true)](https://ci.appveyor.com/project/guolinke/lightgbm/branch/master)
-[![Travis Build Status](https://travis-ci.org/microsoft/LightGBM.svg?branch=master)](https://travis-ci.org/microsoft/LightGBM)
 [![Documentation Status](https://readthedocs.org/projects/lightgbm/badge/?version=latest)](https://lightgbm.readthedocs.io/)
 [![License](https://img.shields.io/github/license/microsoft/lightgbm.svg)](https://github.com/microsoft/LightGBM/blob/master/LICENSE)
 [![Python Versions](https://img.shields.io/pypi/pyversions/lightgbm.svg?logo=python&logoColor=white)](https://pypi.org/project/lightgbm)

From 577b2f9db2966916c72cba86a39ad4f627a70c8b Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Wed, 13 Jan 2021 21:44:44 +0300
Subject: [PATCH 36/42] [ci] indicate support of Big Sur and drop High Sierra
 (#3749)

---
 .ci/test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.ci/test.sh b/.ci/test.sh
index f9dd6b0c3582..447d6b13bc8c 100755
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -107,7 +107,7 @@ if [[ $TASK == "sdist" ]]; then
 elif [[ $TASK == "bdist" ]]; then
     if [[ $OS_NAME == "macos" ]]; then
         cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --python-tag py3 || exit -1
-        mv dist/lightgbm-$LGB_VER-py3-none-macosx.whl dist/lightgbm-$LGB_VER-py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl
+        mv dist/lightgbm-$LGB_VER-py3-none-macosx.whl dist/lightgbm-$LGB_VER-py3-none-macosx_10_14_x86_64.macosx_10_15_x86_64.macosx_11_0_x86_64.whl
         if [[ $PRODUCES_ARTIFACTS == "true" ]]; then
             cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY
         fi

From f997a0692ca0f26740d2bdef2695c3e881d4e918 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Wed, 13 Jan 2021 21:53:19 +0300
Subject: [PATCH 37/42] [ci] improve experience with optional GitHub workflows
 (#3740)

* improve experience with optional GitHub workflows

* Update README.md

* Update r_artifacts.yml

* Update optional_checks.yml

* continue

* Update triggering_comments.yml

* Update README.md

* Update r_artifacts.yml

* Update r_artifacts.yml

* Update r_artifacts.yml

* Update r_valgrind.yml

* Update r_artifacts.yml

* Update r_valgrind.yml

* Update r_valgrind.yml

* Update r_valgrind.yml

* add docstrings to fix lint

* better formatting for multi-line commands
---
 .ci/append_comment.sh                     | 50 ++++++++++++++
 .ci/get_workflow_status.py                | 82 +++++++++++++++++++++++
 .ci/rerun_workflow.sh                     | 47 +++++++++++++
 .ci/set_commit_status.sh                  | 53 +++++++++++++++
 .ci/trigger_dispatch_run.sh               | 51 ++++++++++++++
 .github/workflows/optional_checks.yml     | 25 +++++++
 .github/workflows/r_artifacts.yml         | 28 ++++++--
 .github/workflows/r_valgrind.yml          | 39 ++++++++++-
 .github/workflows/triggering_comments.yml | 34 ++++++++++
 R-package/README.md                       |  6 +-
 10 files changed, 404 insertions(+), 11 deletions(-)
 create mode 100755 .ci/append_comment.sh
 create mode 100644 .ci/get_workflow_status.py
 create mode 100755 .ci/rerun_workflow.sh
 create mode 100755 .ci/set_commit_status.sh
 create mode 100755 .ci/trigger_dispatch_run.sh
 create mode 100644 .github/workflows/optional_checks.yml
 create mode 100644 .github/workflows/triggering_comments.yml

diff --git a/.ci/append_comment.sh b/.ci/append_comment.sh
new file mode 100755
index 000000000000..14e0111249cf
--- /dev/null
+++ b/.ci/append_comment.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# [description]
+#     Update comment appending a given body to the specified original comment.
+#
+# [usage]
+#     append_comment.sh <COMMENT_ID> <BODY>
+#
+# COMMENT_ID: ID of comment that should be modified.
+#
+# BODY: Text that will be appended to the original comment body.
+
+set -e
+
+if [ -z "$GITHUB_ACTIONS" ]; then
+  echo "Must be run inside GitHub Actions CI"
+  exit -1
+fi
+
+if [ $# -ne 2 ]; then
+  echo "Usage: $0 <COMMENT_ID> <BODY>"
+  exit -1
+fi
+
+comment_id=$1
+body=$2
+
+old_comment_body=$(
+  curl -sL \
+    -H "Accept: application/vnd.github.v3+json" \
+    -H "Authorization: token $SECRETS_WORKFLOW" \
+    "${GITHUB_API_URL}/repos/microsoft/LightGBM/issues/comments/$comment_id" | \
+  jq '.body'
+)
+body=${body/failure/failure ❌}
+body=${body/error/failure ❌}
+body=${body/cancelled/failure ❌}
+body=${body/timed_out/failure ❌}
+body=${body/success/success ✔️}
+data=$(
+  jq -n \
+    --argjson body "${old_comment_body%?}\r\n\r\n$body\"" \
+    '{"body":$body}'
+)
+curl -sL \
+  -X PATCH \
+  -H "Accept: application/vnd.github.v3+json" \
+  -H "Authorization: token $SECRETS_WORKFLOW" \
+  -d "$data" \
+  "${GITHUB_API_URL}/repos/microsoft/LightGBM/issues/comments/$comment_id"
diff --git a/.ci/get_workflow_status.py b/.ci/get_workflow_status.py
new file mode 100644
index 000000000000..82a57a9537b9
--- /dev/null
+++ b/.ci/get_workflow_status.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+"""Get the most recent status of workflow for the current PR."""
+import json
+from os import environ
+from sys import argv, exit
+from time import sleep
+try:
+    from urllib import request
+except ImportError:
+    import urllib2 as request
+
+
+def get_runs(workflow_name):
+    """Get all triggering workflow comments in the current PR.
+
+    Parameters
+    ----------
+    workflow_name : string
+        Name of the workflow.
+
+    Returns
+    -------
+    pr_runs : list
+        List of comment objects sorted by the time of creation in decreasing order.
+    """
+    pr_runs = []
+    if environ.get("GITHUB_EVENT_NAME", "") == "pull_request":
+        pr_number = int(environ.get("GITHUB_REF").split('/')[-2])
+        req = request.Request(url="{}/repos/microsoft/LightGBM/issues/{}/comments".format(environ.get("GITHUB_API_URL"),
+                                                                                          pr_number),
+                              headers={"Accept": "application/vnd.github.v3+json"})
+        url = request.urlopen(req)
+        data = json.loads(url.read().decode('utf-8'))
+        url.close()
+        pr_runs = [i for i in data
+                   if i['author_association'].lower() in {'owner', 'member', 'collaborator'}
+                   and i['body'].startswith('/gha run')
+                   and 'Workflow **{}** has been triggered!'.format(workflow_name) in i['body']]
+    return pr_runs[::-1]
+
+
+def get_status(runs):
+    """Get the most recent status of workflow for the current PR.
+
+    Parameters
+    ----------
+    runs : list
+        List of comment objects sorted by the time of creation in decreasing order.
+
+    Returns
+    -------
+    status : string
+        The most recent status of workflow.
+        Can be 'success', 'failure' or 'in-progress'.
+    """
+    status = 'success'
+    for run in runs:
+        body = run['body']
+        if "Status: " in body:
+            if "Status: skipped" in body:
+                continue
+            if "Status: failure" in body:
+                status = 'failure'
+                break
+            if "Status: success" in body:
+                status = 'success'
+                break
+        else:
+            status = 'in-progress'
+            break
+    return status
+
+
+if __name__ == "__main__":
+    workflow_name = argv[1]
+    while True:
+        status = get_status(get_runs(workflow_name))
+        if status != 'in-progress':
+            break
+        sleep(60)
+    if status == 'failure':
+        exit(1)
diff --git a/.ci/rerun_workflow.sh b/.ci/rerun_workflow.sh
new file mode 100755
index 000000000000..eb098adeeaeb
--- /dev/null
+++ b/.ci/rerun_workflow.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# [description]
+#     Rerun specified workflow for given pull request.
+#
+# [usage]
+#     rerun_workflow.sh <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>
+#
+# WORKFLOW_ID: Identifier (config name of ID) of a workflow to be rerun.
+#
+# PR_NUMBER: Number of pull request for which workflow should be rerun.
+#
+# PR_BRANCH: Name of pull request's branch.
+
+set -e
+
+if [ -z "$GITHUB_ACTIONS" ]; then
+  echo "Must be run inside GitHub Actions CI"
+  exit -1
+fi
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <WORKFLOW_ID> <PR_NUMBER> <PR_BRANCH>"
+  exit -1
+fi
+
+workflow_id=$1
+pr_number=$2
+pr_branch=$3
+
+runs=$(
+  curl -sL \
+    -H "Accept: application/vnd.github.v3+json" \
+    -H "Authorization: token $SECRETS_WORKFLOW" \
+    "${GITHUB_API_URL}/repos/microsoft/LightGBM/actions/workflows/${workflow_id}/runs?event=pull_request&branch=${pr_branch}" | \
+  jq '.workflow_runs'
+)
+runs=$(echo $runs | jq --arg pr_number "$pr_number" --arg pr_branch "$pr_branch" 'map(select(.event == "pull_request" and ((.pull_requests | length) != 0 and (.pull_requests[0].number | tostring) == $pr_number or .head_branch == $pr_branch)))')
+runs=$(echo $runs | jq 'sort_by(.run_number) | reverse')
+
+if [[ $(echo $runs | jq 'length') -gt 0 ]]; then
+  curl -sL \
+    -X POST \
+    -H "Accept: application/vnd.github.v3+json" \
+    -H "Authorization: token $SECRETS_WORKFLOW" \
+    "${GITHUB_API_URL}/repos/microsoft/LightGBM/actions/runs/$(echo $runs | jq '.[0].id')/rerun"
+fi
diff --git a/.ci/set_commit_status.sh b/.ci/set_commit_status.sh
new file mode 100755
index 000000000000..b10dd3ec27c3
--- /dev/null
+++ b/.ci/set_commit_status.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#
+# [description]
+#     Set a status with a given name to the specified commit.
+#
+# [usage]
+#     set_commit_status.sh <NAME> <STATUS> <SHA>
+#
+# NAME: Name of status.
+#       Status with existing name overwrites a previous one.
+#
+# STATUS: Status to be set.
+#         Can be "error", "failure", "pending" or "success".
+#
+# SHA: SHA of a commit to set a status on.
+
+set -e
+
+if [ -z "$GITHUB_ACTIONS" ]; then
+  echo "Must be run inside GitHub Actions CI"
+  exit -1
+fi
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <NAME> <STATUS> <SHA>"
+  exit -1
+fi
+
+name=$1
+
+status=$2
+status=${status/error/failure}
+status=${status/cancelled/failure}
+status=${status/timed_out/failure}
+status=${status/in_progress/pending}
+status=${status/queued/pending}
+
+sha=$3
+
+data=$(
+  jq -n \
+    --arg state $status \
+    --arg url "${GITHUB_SERVER_URL}/microsoft/LightGBM/actions/runs/${GITHUB_RUN_ID}" \
+    --arg name "$name" \
+    '{"state":$state,"target_url":$url,"context":$name}'
+)
+
+curl -sL \
+  -X POST \
+  -H "Accept: application/vnd.github.v3+json" \
+  -H "Authorization: token $SECRETS_WORKFLOW" \
+  -d "$data" \
+  "${GITHUB_API_URL}/repos/microsoft/LightGBM/statuses/$sha"
diff --git a/.ci/trigger_dispatch_run.sh b/.ci/trigger_dispatch_run.sh
new file mode 100755
index 000000000000..040d6e9a581d
--- /dev/null
+++ b/.ci/trigger_dispatch_run.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# [description]
+#     Trigger manual workflow run by a dispatch event.
+#
+# [usage]
+#     trigger_dispatch_run.sh <PR_URL> <COMMENT_ID> <DISPATCH_NAME>
+#
+# PR_URL: URL of pull request from which dispatch is triggering.
+#
+# COMMENT_ID: ID of comment that is triggering a dispatch.
+#
+# DISPATCH_NAME: Name of a dispatch to be triggered.
+
+set -e
+
+if [ -z "$GITHUB_ACTIONS" ]; then
+  echo "Must be run inside GitHub Actions CI"
+  exit -1
+fi
+
+if [ $# -ne 3 ]; then
+  echo "Usage: $0 <PR_URL> <COMMENT_ID> <DISPATCH_NAME>"
+  exit -1
+fi
+
+pr_url=$1
+comment_id=$2
+dispatch_name=$3
+
+pr=$(
+  curl -sL \
+    -H "Accept: application/vnd.github.v3+json" \
+    -H "Authorization: token $SECRETS_WORKFLOW" \
+    "$pr_url"
+)
+data=$(
+  jq -n \
+    --arg event_type "$dispatch_name" \
+    --arg pr_number "$(echo $pr | jq '.number')" \
+    --arg pr_sha "$(echo $pr | jq '.head.sha')" \
+    --arg pr_branch "$(echo $pr | jq '.head.ref')" \
+    --arg comment_number "$comment_id" \
+    '{"event_type":$event_type,"client_payload":{"pr_number":$pr_number,"pr_sha":$pr_sha,"pr_branch":$pr_branch,"comment_number":$comment_number}}'
+)
+curl -sL \
+  -X POST \
+  -H "Accept: application/vnd.github.v3+json" \
+  -H "Authorization: token $SECRETS_WORKFLOW" \
+  -d "$data" \
+  "${GITHUB_API_URL}/repos/microsoft/LightGBM/dispatches"
diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml
new file mode 100644
index 000000000000..7cca8ec56524
--- /dev/null
+++ b/.github/workflows/optional_checks.yml
@@ -0,0 +1,25 @@
+name: Optional checks
+
+on:
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  all-successful:
+    timeout-minutes: 120
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2.3.4
+        with:
+          fetch-depth: 5
+          submodules: false
+      - name: Check that all tests succeeded
+        run: |
+            workflows=("R valgrind tests")
+            for i in "${workflows[@]}"; do
+                python "$GITHUB_WORKSPACE/.ci/get_workflow_status.py" "$i" \
+                || { echo "The last reported status from workflow \"$i\" is failure. Commit fixes and rerun the workflow."; \
+                exit -1; }
+            done
diff --git a/.github/workflows/r_artifacts.yml b/.github/workflows/r_artifacts.yml
index 62f111833298..d69a1f4d2656 100644
--- a/.github/workflows/r_artifacts.yml
+++ b/.github/workflows/r_artifacts.yml
@@ -1,27 +1,39 @@
 name: R artifact builds
 
 on:
-  pull_request_review_comment:
-    types: [created]
+  repository_dispatch:
+    types: [gha_run_build_r_artifacts]
 
 jobs:
   cran-package:
     name: cran-package
-    if: github.event.comment.body == '/gha build r-artifacts' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
     timeout-minutes: 60
     runs-on: ubuntu-latest
     container: rocker/r-base
+    env:
+      SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
     steps:
-      - name: Install Git before checkout
+      - name: Install essential software before checkout
         shell: bash
         run: |
           apt-get update
-          apt-get install --no-install-recommends -y git
+          apt-get install --no-install-recommends -y \
+            curl \
+            git \
+            jq
       - name: Checkout repository
         uses: actions/checkout@v2.3.4
         with:
           fetch-depth: 5
           submodules: true
+          repository: microsoft/LightGBM
+          ref: "refs/pull/${{ github.event.client_payload.pr_number }}/merge"
+      - name: Send init status
+        if: ${{ always() }}
+        run: |
+          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+            "${{ github.event.client_payload.comment_number }}" \
+            "Workflow **${{ github.workflow }}** has been triggered! 🚀\r\n${GITHUB_SERVER_URL}/microsoft/LightGBM/actions/runs/${GITHUB_RUN_ID}"
       - name: Build package
         shell: bash
         id: build_package
@@ -37,3 +49,9 @@ jobs:
         with:
           name: ${{ steps.build_package.outputs.artifact_name }}
           path: ${{ steps.build_package.outputs.artifact_path }}
+      - name: Send final status
+        if: ${{ always() }}
+        run: |
+          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+            "${{ github.event.client_payload.comment_number }}" \
+            "Status: ${{ job.status }}."
diff --git a/.github/workflows/r_valgrind.yml b/.github/workflows/r_valgrind.yml
index 1ddc1a7afc1e..e9af3ae6fdc5 100644
--- a/.github/workflows/r_valgrind.yml
+++ b/.github/workflows/r_valgrind.yml
@@ -1,22 +1,40 @@
 name: R valgrind tests
 
 on:
-  pull_request_review_comment:
-    types: [created]
+  repository_dispatch:
+    types: [gha_run_r_valgrind]
 
 jobs:
   test-r-valgrind:
     name: r-package (ubuntu-latest, R-devel, valgrind)
-    if: github.event.comment.body == '/gha run r-valgrind' && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
     timeout-minutes: 120
     runs-on: ubuntu-latest
     container: wch1/r-debug
+    env:
+      SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
     steps:
+      - name: Install essential software before checkout
+        shell: bash
+        run: |
+          apt-get update
+          apt-get install --no-install-recommends -y \
+            curl \
+            git \
+            jq
       - name: Checkout repository
         uses: actions/checkout@v2.3.4
         with:
           fetch-depth: 5
           submodules: true
+          repository: microsoft/LightGBM
+          ref: "refs/pull/${{ github.event.client_payload.pr_number }}/merge"
+      - name: Send init status
+        if: ${{ always() }}
+        run: |
+          $GITHUB_WORKSPACE/.ci/set_commit_status.sh "${{ github.workflow }}" "pending" "${{ github.event.client_payload.pr_sha }}"
+          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+            "${{ github.event.client_payload.comment_number }}" \
+            "Workflow **${{ github.workflow }}** has been triggered! 🚀\r\n${GITHUB_SERVER_URL}/microsoft/LightGBM/actions/runs/${GITHUB_RUN_ID}"
       - name: Install packages
         shell: bash
         run: |
@@ -26,3 +44,18 @@ jobs:
       - name: Run tests with valgrind
         shell: bash
         run: ./.ci/test_r_package_valgrind.sh
+      - name: Send final status
+        if: ${{ always() }}
+        run: |
+          $GITHUB_WORKSPACE/.ci/set_commit_status.sh "${{ github.workflow }}" "${{ job.status }}" "${{ github.event.client_payload.pr_sha }}"
+          $GITHUB_WORKSPACE/.ci/append_comment.sh \
+            "${{ github.event.client_payload.comment_number }}" \
+            "Status: ${{ job.status }}."
+      - name: Rerun workflow-indicator
+        if: ${{ always() }}
+        run: |
+          bash $GITHUB_WORKSPACE/.ci/rerun_workflow.sh \
+            "optional_checks.yml" \
+            "${{ github.event.client_payload.pr_number }}" \
+            "${{ github.event.client_payload.pr_branch }}" \
+            || true
diff --git a/.github/workflows/triggering_comments.yml b/.github/workflows/triggering_comments.yml
new file mode 100644
index 000000000000..f20b397449e8
--- /dev/null
+++ b/.github/workflows/triggering_comments.yml
@@ -0,0 +1,34 @@
+name: Triggering comments
+
+on:
+  issue_comment:
+    types: [created]
+
+jobs:
+  triggering-tests:
+    if: github.event.issue.pull_request && contains('OWNER,MEMBER,COLLABORATOR', github.event.comment.author_association)
+    runs-on: ubuntu-latest
+    env:
+      SECRETS_WORKFLOW: ${{ secrets.WORKFLOW }}
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2.3.4
+      with:
+        fetch-depth: 5
+        submodules: false
+
+    - name: Trigger R valgrind tests
+      if: github.event.comment.body == '/gha run r-valgrind'
+      run: |
+        $GITHUB_WORKSPACE/.ci/trigger_dispatch_run.sh \
+          "${{ github.event.issue.pull_request.url }}" \
+          "${{ github.event.comment.id }}" \
+          "gha_run_r_valgrind"
+
+    - name: Trigger R artifact builds
+      if: github.event.comment.body == '/gha run build-r-artifacts'
+      run: |
+        $GITHUB_WORKSPACE/.ci/trigger_dispatch_run.sh \
+          "${{ github.event.issue.pull_request.url }}" \
+          "${{ github.event.comment.id }}" \
+          "gha_run_build_r_artifacts"
diff --git a/R-package/README.md b/R-package/README.md
index 7edd14717955..a1664ec1c951 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -245,9 +245,9 @@ sh build-cran-package.sh
 
 This will create a file `lightgbm_${VERSION}.tar.gz`, where `VERSION` is the version of `LightGBM`.
 
-Alternatively, GitHub Actions can generate this file for you. On a pull request, go to the "Files changed" tab and create a comment with this phrase:
+Alternatively, GitHub Actions can generate this file for you. On a pull request, create a comment with this phrase:
 
-> /gha build r-artifacts
+> /gha run build-r-artifacts
 
 Go to https://github.com/microsoft/LightGBM/actions, and find the most recent run of the "R artifact builds" workflow. If it ran successfully, you'll find a download link for the package (in `.zip` format) in that run's "Artifacts" section.
 
@@ -389,7 +389,7 @@ RDvalgrind \
 | cat
 ```
 
-These tests can also be triggered on any pull request by leaving a review on the "Files changed" tab in a pull request:
+These tests can also be triggered on any pull request by leaving a comment in a pull request:
 
 > /gha run r-valgrind
 

From a15a37046f53514d36091854b9711f7b4c9e452a Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 15 Jan 2021 16:20:32 +0300
Subject: [PATCH 38/42] Update CUDA treelearner according to changes introduced
 for linear trees (#3750)

* Update cuda_tree_learner.cpp

* Update cuda_tree_learner.h

* Update cuda.yml
---
 .github/workflows/cuda.yml            | 1 +
 src/treelearner/cuda_tree_learner.cpp | 4 ++--
 src/treelearner/cuda_tree_learner.h   | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
index 71dbdda19ea9..db3c763b8951 100644
--- a/.github/workflows/cuda.yml
+++ b/.github/workflows/cuda.yml
@@ -47,6 +47,7 @@ jobs:
             export ROOT_DOCKER_FOLDER=/LightGBM
             cat > docker.env <<EOF
             TASK=cuda
+            METHOD=source
             COMPILER=gcc
             GITHUB_ACTIONS=true
             OS_NAME=linux
diff --git a/src/treelearner/cuda_tree_learner.cpp b/src/treelearner/cuda_tree_learner.cpp
index 16569eef257f..78735c07c6e5 100644
--- a/src/treelearner/cuda_tree_learner.cpp
+++ b/src/treelearner/cuda_tree_learner.cpp
@@ -534,8 +534,8 @@ void CUDATreeLearner::InitGPU(int num_gpu) {
   copyDenseFeature();
 }
 
-Tree* CUDATreeLearner::Train(const score_t* gradients, const score_t *hessians) {
-  Tree *ret = SerialTreeLearner::Train(gradients, hessians);
+Tree* CUDATreeLearner::Train(const score_t* gradients, const score_t *hessians, bool is_first_tree) {
+  Tree *ret = SerialTreeLearner::Train(gradients, hessians, is_first_tree);
   return ret;
 }
 
diff --git a/src/treelearner/cuda_tree_learner.h b/src/treelearner/cuda_tree_learner.h
index 442c2f53ea01..41e1591a257c 100644
--- a/src/treelearner/cuda_tree_learner.h
+++ b/src/treelearner/cuda_tree_learner.h
@@ -45,7 +45,7 @@ class CUDATreeLearner: public SerialTreeLearner {
     ~CUDATreeLearner();
     void Init(const Dataset* train_data, bool is_constant_hessian) override;
     void ResetTrainingDataInner(const Dataset* train_data, bool is_constant_hessian, bool reset_multi_val_bin) override;
-    Tree* Train(const score_t* gradients, const score_t *hessians);
+    Tree* Train(const score_t* gradients, const score_t *hessians, bool is_first_tree);
     void SetBaggingData(const Dataset* subset, const data_size_t* used_indices, data_size_t num_data) override {
       SerialTreeLearner::SetBaggingData(subset, used_indices, num_data);
       if (subset == nullptr && used_indices != nullptr) {

From 53639f4a891b94002495ca2310bcc77779cca116 Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 15 Jan 2021 18:14:14 +0300
Subject: [PATCH 39/42] [ci] Slightly optimize optional workflows checks
 (#3762)

* Update optional_checks.yml

* Update get_workflow_status.py
---
 .ci/get_workflow_status.py            | 21 +++++++++++++--------
 .github/workflows/optional_checks.yml |  8 +++++---
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/.ci/get_workflow_status.py b/.ci/get_workflow_status.py
index 82a57a9537b9..e4f338ac1554 100644
--- a/.ci/get_workflow_status.py
+++ b/.ci/get_workflow_status.py
@@ -1,5 +1,11 @@
 # coding: utf-8
-"""Get the most recent status of workflow for the current PR."""
+"""Get the most recent status of workflow for the current PR.
+
+[usage]
+    python get_workflow_status.py TRIGGER_PHRASE
+
+TRIGGER_PHRASE: Code phrase that triggers workflow.
+"""
 import json
 from os import environ
 from sys import argv, exit
@@ -10,13 +16,13 @@
     import urllib2 as request
 
 
-def get_runs(workflow_name):
+def get_runs(trigger_phrase):
     """Get all triggering workflow comments in the current PR.
 
     Parameters
     ----------
-    workflow_name : string
-        Name of the workflow.
+    trigger_phrase : string
+        Code phrase that triggers workflow.
 
     Returns
     -------
@@ -34,8 +40,7 @@ def get_runs(workflow_name):
         url.close()
         pr_runs = [i for i in data
                    if i['author_association'].lower() in {'owner', 'member', 'collaborator'}
-                   and i['body'].startswith('/gha run')
-                   and 'Workflow **{}** has been triggered!'.format(workflow_name) in i['body']]
+                   and i['body'].startswith('/gha run {}'.format(trigger_phrase))]
     return pr_runs[::-1]
 
 
@@ -72,9 +77,9 @@ def get_status(runs):
 
 
 if __name__ == "__main__":
-    workflow_name = argv[1]
+    trigger_phrase = argv[1]
     while True:
-        status = get_status(get_runs(workflow_name))
+        status = get_status(get_runs(trigger_phrase))
         if status != 'in-progress':
             break
         sleep(60)
diff --git a/.github/workflows/optional_checks.yml b/.github/workflows/optional_checks.yml
index 7cca8ec56524..2f6bd789b3b9 100644
--- a/.github/workflows/optional_checks.yml
+++ b/.github/workflows/optional_checks.yml
@@ -17,9 +17,11 @@ jobs:
           submodules: false
       - name: Check that all tests succeeded
         run: |
-            workflows=("R valgrind tests")
+            workflows=("R valgrind tests;r-valgrind")
             for i in "${workflows[@]}"; do
-                python "$GITHUB_WORKSPACE/.ci/get_workflow_status.py" "$i" \
-                || { echo "The last reported status from workflow \"$i\" is failure. Commit fixes and rerun the workflow."; \
+                workflow_name=${i%;*}
+                trigger_phrase=${i#*;}
+                python "$GITHUB_WORKSPACE/.ci/get_workflow_status.py" "$trigger_phrase" \
+                || { echo "The last reported status from workflow \"$workflow_name\" is failure. Commit fixes and rerun the workflow."; \
                 exit -1; }
             done

From 9bacf03c1695e1143b8f2f6d17574b472b6d727b Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Fri, 15 Jan 2021 10:17:19 -0500
Subject: [PATCH 40/42] [python][tests] Migrates test_basic.py to use pytest
 (#3764)

* TST Migrates test_basic.py to use pytest

* STY Linting

* CI Force CI to run
---
 tests/python_package_test/test_basic.py | 674 ++++++++++++------------
 1 file changed, 337 insertions(+), 337 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 4855e8e844b2..9249c81fec3c 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -1,10 +1,10 @@
 # coding: utf-8
 import os
 import tempfile
-import unittest
 
 import lightgbm as lgb
 import numpy as np
+import pytest
 
 from scipy import sparse
 from sklearn.datasets import dump_svmlight_file, load_svmlight_file
@@ -13,346 +13,346 @@
 from .utils import load_breast_cancer
 
 
-class TestBasic(unittest.TestCase):
-
-    def test(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True),
-                                                            test_size=0.1, random_state=2)
-        train_data = lgb.Dataset(X_train, label=y_train)
-        valid_data = train_data.create_valid(X_test, label=y_test)
-
-        params = {
-            "objective": "binary",
-            "metric": "auc",
-            "min_data": 10,
-            "num_leaves": 15,
-            "verbose": -1,
-            "num_threads": 1,
-            "max_bin": 255,
-            "gpu_use_dp": True
-        }
-        bst = lgb.Booster(params, train_data)
-        bst.add_valid(valid_data, "valid_1")
-
-        for i in range(20):
-            bst.update()
-            if i % 10 == 0:
-                print(bst.eval_train(), bst.eval_valid())
-
-        self.assertEqual(bst.current_iteration(), 20)
-        self.assertEqual(bst.num_trees(), 20)
-        self.assertEqual(bst.num_model_per_iteration(), 1)
-        self.assertAlmostEqual(bst.lower_bound(), -2.9040190126976606)
-        self.assertAlmostEqual(bst.upper_bound(), 3.3182142872462883)
-
-        bst.save_model("model.txt")
-        pred_from_matr = bst.predict(X_test)
-        with tempfile.NamedTemporaryFile() as f:
-            tname = f.name
-        with open(tname, "w+b") as f:
-            dump_svmlight_file(X_test, y_test, f)
-        pred_from_file = bst.predict(tname)
-        os.remove(tname)
-        np.testing.assert_allclose(pred_from_matr, pred_from_file)
-
-        # check saved model persistence
-        bst = lgb.Booster(params, model_file="model.txt")
-        os.remove("model.txt")
-        pred_from_model_file = bst.predict(X_test)
-        # we need to check the consistency of model file here, so test for exact equal
-        np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
-
-        # check early stopping is working. Make it stop very early, so the scores should be very close to zero
-        pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
-        pred_early_stopping = bst.predict(X_test, **pred_parameter)
-        # scores likely to be different, but prediction should still be the same
-        np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
-
-        # test that shape is checked during prediction
-        bad_X_test = X_test[:, 1:]
-        bad_shape_error_msg = "The number of features in data*"
-        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
-                                       bst.predict, bad_X_test)
-        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
-                                       bst.predict, sparse.csr_matrix(bad_X_test))
-        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
-                                       bst.predict, sparse.csc_matrix(bad_X_test))
-        with open(tname, "w+b") as f:
-            dump_svmlight_file(bad_X_test, y_test, f)
-        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
-                                       bst.predict, tname)
-        with open(tname, "w+b") as f:
-            dump_svmlight_file(X_test, y_test, f, zero_based=False)
-        np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
-                                       bst.predict, tname)
-        os.remove(tname)
-
-    def test_chunked_dataset(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
-
-        chunk_size = X_train.shape[0] // 10 + 1
-        X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
-        X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]
-
-        train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
-        valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
-        train_data.construct()
-        valid_data.construct()
-
-    def test_chunked_dataset_linear(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1,
-                                                            random_state=2)
-        chunk_size = X_train.shape[0] // 10 + 1
-        X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
-        X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]
-        params = {"bin_construct_sample_cnt": 100, 'linear_tree': True}
-        train_data = lgb.Dataset(X_train, label=y_train, params=params)
-        valid_data = train_data.create_valid(X_test, label=y_test, params=params)
-        train_data.construct()
-        valid_data.construct()
-
-    def test_save_and_load_linear(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1,
-                                                            random_state=2)
-        X_train = np.concatenate([np.ones((X_train.shape[0], 1)), X_train], 1)
-        X_train[:X_train.shape[0] // 2, 0] = 0
-        y_train[:X_train.shape[0] // 2] = 1
-        params = {'linear_tree': True}
-        train_data_1 = lgb.Dataset(X_train, label=y_train, params=params)
-        est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0])
-        pred_1 = est_1.predict(X_train)
-        train_data_1.save_binary('temp_dataset.bin')
-        train_data_2 = lgb.Dataset('temp_dataset.bin')
-        est_2 = lgb.train(params, train_data_2, num_boost_round=10)
-        pred_2 = est_2.predict(X_train)
-        np.testing.assert_allclose(pred_1, pred_2)
-        est_2.save_model('model.txt')
-        est_3 = lgb.Booster(model_file='model.txt')
-        pred_3 = est_3.predict(X_train)
-        np.testing.assert_allclose(pred_2, pred_3)
-
-    def test_subset_group(self):
-        X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                                           '../../examples/lambdarank/rank.train'))
-        q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
-                                          '../../examples/lambdarank/rank.train.query'))
-        lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
-        self.assertEqual(len(lgb_train.get_group()), 201)
-        subset = lgb_train.subset(list(range(10))).construct()
-        subset_group = subset.get_group()
-        self.assertEqual(len(subset_group), 2)
-        self.assertEqual(subset_group[0], 1)
-        self.assertEqual(subset_group[1], 9)
-
-    def test_add_features_throws_if_num_data_unequal(self):
-        X1 = np.random.random((100, 1))
-        X2 = np.random.random((10, 1))
+def test_basic(tmp_path):
+    X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True),
+                                                        test_size=0.1, random_state=2)
+    train_data = lgb.Dataset(X_train, label=y_train)
+    valid_data = train_data.create_valid(X_test, label=y_test)
+
+    params = {
+        "objective": "binary",
+        "metric": "auc",
+        "min_data": 10,
+        "num_leaves": 15,
+        "verbose": -1,
+        "num_threads": 1,
+        "max_bin": 255,
+        "gpu_use_dp": True
+    }
+    bst = lgb.Booster(params, train_data)
+    bst.add_valid(valid_data, "valid_1")
+
+    for i in range(20):
+        bst.update()
+        if i % 10 == 0:
+            print(bst.eval_train(), bst.eval_valid())
+
+    assert bst.current_iteration() == 20
+    assert bst.num_trees() == 20
+    assert bst.num_model_per_iteration() == 1
+    assert bst.lower_bound() == pytest.approx(-2.9040190126976606)
+    assert bst.upper_bound() == pytest.approx(3.3182142872462883)
+
+    tname = str(tmp_path / "svm_light.dat")
+    model_file = str(tmp_path / "model.txt")
+
+    bst.save_model(model_file)
+    pred_from_matr = bst.predict(X_test)
+    with open(tname, "w+b") as f:
+        dump_svmlight_file(X_test, y_test, f)
+    pred_from_file = bst.predict(tname)
+    np.testing.assert_allclose(pred_from_matr, pred_from_file)
+
+    # check saved model persistence
+    bst = lgb.Booster(params, model_file=model_file)
+    pred_from_model_file = bst.predict(X_test)
+    # we need to check the consistency of model file here, so test for exact equal
+    np.testing.assert_array_equal(pred_from_matr, pred_from_model_file)
+
+    # check early stopping is working. Make it stop very early, so the scores should be very close to zero
+    pred_parameter = {"pred_early_stop": True, "pred_early_stop_freq": 5, "pred_early_stop_margin": 1.5}
+    pred_early_stopping = bst.predict(X_test, **pred_parameter)
+    # scores likely to be different, but prediction should still be the same
+    np.testing.assert_array_equal(np.sign(pred_from_matr), np.sign(pred_early_stopping))
+
+    # test that shape is checked during prediction
+    bad_X_test = X_test[:, 1:]
+    bad_shape_error_msg = "The number of features in data*"
+    np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
+                                   bst.predict, bad_X_test)
+    np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
+                                   bst.predict, sparse.csr_matrix(bad_X_test))
+    np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
+                                   bst.predict, sparse.csc_matrix(bad_X_test))
+    with open(tname, "w+b") as f:
+        dump_svmlight_file(bad_X_test, y_test, f)
+    np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
+                                   bst.predict, tname)
+    with open(tname, "w+b") as f:
+        dump_svmlight_file(X_test, y_test, f, zero_based=False)
+    np.testing.assert_raises_regex(lgb.basic.LightGBMError, bad_shape_error_msg,
+                                   bst.predict, tname)
+
+
+def test_chunked_dataset():
+    X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1,
+                                                        random_state=2)
+
+    chunk_size = X_train.shape[0] // 10 + 1
+    X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
+    X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]
+
+    train_data = lgb.Dataset(X_train, label=y_train, params={"bin_construct_sample_cnt": 100})
+    valid_data = train_data.create_valid(X_test, label=y_test, params={"bin_construct_sample_cnt": 100})
+    train_data.construct()
+    valid_data.construct()
+
+
+def test_chunked_dataset_linear():
+    X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1,
+                                                        random_state=2)
+    chunk_size = X_train.shape[0] // 10 + 1
+    X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
+    X_test = [X_test[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_test.shape[0] // chunk_size + 1)]
+    params = {"bin_construct_sample_cnt": 100, 'linear_tree': True}
+    train_data = lgb.Dataset(X_train, label=y_train, params=params)
+    valid_data = train_data.create_valid(X_test, label=y_test, params=params)
+    train_data.construct()
+    valid_data.construct()
+
+
+def test_save_and_load_linear(tmp_path):
+    X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1,
+                                                        random_state=2)
+    X_train = np.concatenate([np.ones((X_train.shape[0], 1)), X_train], 1)
+    X_train[:X_train.shape[0] // 2, 0] = 0
+    y_train[:X_train.shape[0] // 2] = 1
+    params = {'linear_tree': True}
+    train_data_1 = lgb.Dataset(X_train, label=y_train, params=params)
+    est_1 = lgb.train(params, train_data_1, num_boost_round=10, categorical_feature=[0])
+    pred_1 = est_1.predict(X_train)
+
+    tmp_dataset = str(tmp_path / 'temp_dataset.bin')
+    train_data_1.save_binary(tmp_dataset)
+    train_data_2 = lgb.Dataset(tmp_dataset)
+    est_2 = lgb.train(params, train_data_2, num_boost_round=10)
+    pred_2 = est_2.predict(X_train)
+    np.testing.assert_allclose(pred_1, pred_2)
+
+    model_file = str(tmp_path / 'model.txt')
+    est_2.save_model(model_file)
+    est_3 = lgb.Booster(model_file=model_file)
+    pred_3 = est_3.predict(X_train)
+    np.testing.assert_allclose(pred_2, pred_3)
+
+
+def test_subset_group():
+    X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../../examples/lambdarank/rank.train'))
+    q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                      '../../examples/lambdarank/rank.train.query'))
+    lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
+    assert len(lgb_train.get_group()) == 201
+    subset = lgb_train.subset(list(range(10))).construct()
+    subset_group = subset.get_group()
+    assert len(subset_group) == 2
+    assert subset_group[0] == 1
+    assert subset_group[1] == 9
+
+
+def test_add_features_throws_if_num_data_unequal():
+    X1 = np.random.random((100, 1))
+    X2 = np.random.random((10, 1))
+    d1 = lgb.Dataset(X1).construct()
+    d2 = lgb.Dataset(X2).construct()
+    with pytest.raises(lgb.basic.LightGBMError):
+        d1.add_features_from(d2)
+
+
+def test_add_features_throws_if_datasets_unconstructed():
+    X1 = np.random.random((100, 1))
+    X2 = np.random.random((100, 1))
+    with pytest.raises(ValueError):
+        d1 = lgb.Dataset(X1)
+        d2 = lgb.Dataset(X2)
+        d1.add_features_from(d2)
+    with pytest.raises(ValueError):
         d1 = lgb.Dataset(X1).construct()
+        d2 = lgb.Dataset(X2)
+        d1.add_features_from(d2)
+    with pytest.raises(ValueError):
+        d1 = lgb.Dataset(X1)
         d2 = lgb.Dataset(X2).construct()
-        with self.assertRaises(lgb.basic.LightGBMError):
-            d1.add_features_from(d2)
+        d1.add_features_from(d2)
 
-    def test_add_features_throws_if_datasets_unconstructed(self):
-        X1 = np.random.random((100, 1))
-        X2 = np.random.random((100, 1))
-        with self.assertRaises(ValueError):
-            d1 = lgb.Dataset(X1)
-            d2 = lgb.Dataset(X2)
-            d1.add_features_from(d2)
-        with self.assertRaises(ValueError):
-            d1 = lgb.Dataset(X1).construct()
-            d2 = lgb.Dataset(X2)
-            d1.add_features_from(d2)
-        with self.assertRaises(ValueError):
-            d1 = lgb.Dataset(X1)
-            d2 = lgb.Dataset(X2).construct()
-            d1.add_features_from(d2)
 
-    def test_add_features_equal_data_on_alternating_used_unused(self):
-        self.maxDiff = None
-        X = np.random.random((100, 5))
-        X[:, [1, 3]] = 0
-        names = ['col_%d' % i for i in range(5)]
-        for j in range(1, 5):
-            d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
-            d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
-            d1.add_features_from(d2)
-            with tempfile.NamedTemporaryFile() as f:
-                d1name = f.name
-            d1._dump_text(d1name)
-            d = lgb.Dataset(X, feature_name=names).construct()
-            with tempfile.NamedTemporaryFile() as f:
-                dname = f.name
-            d._dump_text(dname)
-            with open(d1name, 'rt') as d1f:
-                d1txt = d1f.read()
-            with open(dname, 'rt') as df:
-                dtxt = df.read()
-            os.remove(dname)
-            os.remove(d1name)
-            self.assertEqual(dtxt, d1txt)
-
-    def test_add_features_same_booster_behaviour(self):
-        self.maxDiff = None
-        X = np.random.random((100, 5))
-        X[:, [1, 3]] = 0
-        names = ['col_%d' % i for i in range(5)]
-        for j in range(1, 5):
-            d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
-            d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
-            d1.add_features_from(d2)
-            d = lgb.Dataset(X, feature_name=names).construct()
-            y = np.random.random(100)
-            d1.set_label(y)
-            d.set_label(y)
-            b1 = lgb.Booster(train_set=d1)
-            b = lgb.Booster(train_set=d)
-            for k in range(10):
-                b.update()
-                b1.update()
-            with tempfile.NamedTemporaryFile() as df:
-                dname = df.name
-            with tempfile.NamedTemporaryFile() as d1f:
-                d1name = d1f.name
-            b1.save_model(d1name)
-            b.save_model(dname)
-            with open(dname, 'rt') as df:
-                dtxt = df.read()
-            with open(d1name, 'rt') as d1f:
-                d1txt = d1f.read()
-            self.assertEqual(dtxt, d1txt)
-
-    @unittest.skipIf(not lgb.compat.PANDAS_INSTALLED, 'pandas is not installed')
-    def test_add_features_from_different_sources(self):
-        import pandas as pd
-        n_row = 100
-        n_col = 5
-        X = np.random.random((n_row, n_col))
-        xxs = [X, sparse.csr_matrix(X), pd.DataFrame(X)]
-        names = ['col_%d' % i for i in range(n_col)]
-        for x_1 in xxs:
-            # test that method works even with free_raw_data=True
-            d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=True).construct()
-            d2 = lgb.Dataset(x_1, feature_name=names, free_raw_data=True).construct()
-            d1.add_features_from(d2)
-            self.assertIsNone(d1.data)
+def test_add_features_equal_data_on_alternating_used_unused(tmp_path):
+    X = np.random.random((100, 5))
+    X[:, [1, 3]] = 0
+    names = ['col_%d' % i for i in range(5)]
+    for j in range(1, 5):
+        d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
+        d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
+        d1.add_features_from(d2)
+        d1name = str(tmp_path / "d1.txt")
+        d1._dump_text(d1name)
+        d = lgb.Dataset(X, feature_name=names).construct()
+        dname = str(tmp_path / "d.txt")
+        d._dump_text(dname)
+        with open(d1name, 'rt') as d1f:
+            d1txt = d1f.read()
+        with open(dname, 'rt') as df:
+            dtxt = df.read()
+        assert dtxt == d1txt
 
-            # test that method works but sets raw data to None in case of immergeable data types
-            d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=False).construct()
-            d2 = lgb.Dataset([X[:n_row // 2, :], X[n_row // 2:, :]],
-                             feature_name=names, free_raw_data=False).construct()
-            d1.add_features_from(d2)
-            self.assertIsNone(d1.data)
-
-            # test that method works for different data types
-            d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=False).construct()
-            res_feature_names = [name for name in names]
-            for idx, x_2 in enumerate(xxs, 2):
-                original_type = type(d1.get_data())
-                d2 = lgb.Dataset(x_2, feature_name=names, free_raw_data=False).construct()
-                d1.add_features_from(d2)
-                self.assertIsInstance(d1.get_data(), original_type)
-                self.assertTupleEqual(d1.get_data().shape, (n_row, n_col * idx))
-                res_feature_names += ['D{}_{}'.format(idx, name) for name in names]
-                self.assertListEqual(d1.feature_name, res_feature_names)
-
-    def test_cegb_affects_behavior(self):
-        X = np.random.random((100, 5))
-        X[:, [1, 3]] = 0
+
+def test_add_features_same_booster_behaviour(tmp_path):
+    X = np.random.random((100, 5))
+    X[:, [1, 3]] = 0
+    names = ['col_%d' % i for i in range(5)]
+    for j in range(1, 5):
+        d1 = lgb.Dataset(X[:, :j], feature_name=names[:j]).construct()
+        d2 = lgb.Dataset(X[:, j:], feature_name=names[j:]).construct()
+        d1.add_features_from(d2)
+        d = lgb.Dataset(X, feature_name=names).construct()
         y = np.random.random(100)
-        names = ['col_%d' % i for i in range(5)]
-        ds = lgb.Dataset(X, feature_name=names).construct()
-        ds.set_label(y)
-        base = lgb.Booster(train_set=ds)
+        d1.set_label(y)
+        d.set_label(y)
+        b1 = lgb.Booster(train_set=d1)
+        b = lgb.Booster(train_set=d)
         for k in range(10):
-            base.update()
-        with tempfile.NamedTemporaryFile() as f:
-            basename = f.name
-        base.save_model(basename)
-        with open(basename, 'rt') as f:
-            basetxt = f.read()
-        # Set extremely harsh penalties, so CEGB will block most splits.
-        cases = [{'cegb_penalty_feature_coupled': [50, 100, 10, 25, 30]},
-                 {'cegb_penalty_feature_lazy': [1, 2, 3, 4, 5]},
-                 {'cegb_penalty_split': 1}]
-        for case in cases:
-            booster = lgb.Booster(train_set=ds, params=case)
-            for k in range(10):
-                booster.update()
-            with tempfile.NamedTemporaryFile() as f:
-                casename = f.name
-            booster.save_model(casename)
-            with open(casename, 'rt') as f:
-                casetxt = f.read()
-            self.assertNotEqual(basetxt, casetxt)
-
-    def test_cegb_scaling_equalities(self):
-        X = np.random.random((100, 5))
-        X[:, [1, 3]] = 0
-        y = np.random.random(100)
-        names = ['col_%d' % i for i in range(5)]
-        ds = lgb.Dataset(X, feature_name=names).construct()
-        ds.set_label(y)
-        # Compare pairs of penalties, to ensure scaling works as intended
-        pairs = [({'cegb_penalty_feature_coupled': [1, 2, 1, 2, 1]},
-                  {'cegb_penalty_feature_coupled': [0.5, 1, 0.5, 1, 0.5], 'cegb_tradeoff': 2}),
-                 ({'cegb_penalty_feature_lazy': [0.01, 0.02, 0.03, 0.04, 0.05]},
-                  {'cegb_penalty_feature_lazy': [0.005, 0.01, 0.015, 0.02, 0.025], 'cegb_tradeoff': 2}),
-                 ({'cegb_penalty_split': 1},
-                  {'cegb_penalty_split': 2, 'cegb_tradeoff': 0.5})]
-        for (p1, p2) in pairs:
-            booster1 = lgb.Booster(train_set=ds, params=p1)
-            booster2 = lgb.Booster(train_set=ds, params=p2)
-            for k in range(10):
-                booster1.update()
-                booster2.update()
-            with tempfile.NamedTemporaryFile() as f:
-                p1name = f.name
-            # Reset booster1's parameters to p2, so the parameter section of the file matches.
-            booster1.reset_parameter(p2)
-            booster1.save_model(p1name)
-            with open(p1name, 'rt') as f:
-                p1txt = f.read()
-            with tempfile.NamedTemporaryFile() as f:
-                p2name = f.name
-            booster2.save_model(p2name)
-            with open(p2name, 'rt') as f:
-                p2txt = f.read()
-            self.maxDiff = None
-            self.assertEqual(p1txt, p2txt)
-
-    def test_consistent_state_for_dataset_fields(self):
-
-        def check_asserts(data):
-            np.testing.assert_allclose(data.label, data.get_label())
-            np.testing.assert_allclose(data.label, data.get_field('label'))
-            self.assertFalse(np.isnan(data.label[0]))
-            self.assertFalse(np.isinf(data.label[1]))
-            np.testing.assert_allclose(data.weight, data.get_weight())
-            np.testing.assert_allclose(data.weight, data.get_field('weight'))
-            self.assertFalse(np.isnan(data.weight[0]))
-            self.assertFalse(np.isinf(data.weight[1]))
-            np.testing.assert_allclose(data.init_score, data.get_init_score())
-            np.testing.assert_allclose(data.init_score, data.get_field('init_score'))
-            self.assertFalse(np.isnan(data.init_score[0]))
-            self.assertFalse(np.isinf(data.init_score[1]))
-            self.assertTrue(np.all(np.isclose([data.label[0], data.weight[0], data.init_score[0]],
-                                              data.label[0])))
-            self.assertAlmostEqual(data.label[1], data.weight[1])
-            self.assertListEqual(data.feature_name, data.get_feature_name())
-
-        X, y = load_breast_cancer(return_X_y=True)
-        sequence = np.ones(y.shape[0])
-        sequence[0] = np.nan
-        sequence[1] = np.inf
-        feature_names = ['f{0}'.format(i) for i in range(X.shape[1])]
-        lgb_data = lgb.Dataset(X, sequence,
-                               weight=sequence, init_score=sequence,
-                               feature_name=feature_names).construct()
-        check_asserts(lgb_data)
-        lgb_data = lgb.Dataset(X, y).construct()
-        lgb_data.set_label(sequence)
-        lgb_data.set_weight(sequence)
-        lgb_data.set_init_score(sequence)
-        lgb_data.set_feature_name(feature_names)
-        check_asserts(lgb_data)
+            b.update()
+            b1.update()
+        dname = str(tmp_path / "d.txt")
+        d1name = str(tmp_path / "d1.txt")
+        b1.save_model(d1name)
+        b.save_model(dname)
+        with open(dname, 'rt') as df:
+            dtxt = df.read()
+        with open(d1name, 'rt') as d1f:
+            d1txt = d1f.read()
+        assert dtxt == d1txt
+
+
+def test_add_features_from_different_sources():
+    pd = pytest.importorskip("pandas")
+    n_row = 100
+    n_col = 5
+    X = np.random.random((n_row, n_col))
+    xxs = [X, sparse.csr_matrix(X), pd.DataFrame(X)]
+    names = ['col_%d' % i for i in range(n_col)]
+    for x_1 in xxs:
+        # test that method works even with free_raw_data=True
+        d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=True).construct()
+        d2 = lgb.Dataset(x_1, feature_name=names, free_raw_data=True).construct()
+        d1.add_features_from(d2)
+        assert d1.data is None
+
+        # test that method works but sets raw data to None in case of immergeable data types
+        d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=False).construct()
+        d2 = lgb.Dataset([X[:n_row // 2, :], X[n_row // 2:, :]],
+                         feature_name=names, free_raw_data=False).construct()
+        d1.add_features_from(d2)
+        assert d1.data is None
+
+        # test that method works for different data types
+        d1 = lgb.Dataset(x_1, feature_name=names, free_raw_data=False).construct()
+        res_feature_names = [name for name in names]
+        for idx, x_2 in enumerate(xxs, 2):
+            original_type = type(d1.get_data())
+            d2 = lgb.Dataset(x_2, feature_name=names, free_raw_data=False).construct()
+            d1.add_features_from(d2)
+            assert isinstance(d1.get_data(), original_type)
+            assert d1.get_data().shape == (n_row, n_col * idx)
+            res_feature_names += ['D{}_{}'.format(idx, name) for name in names]
+            assert d1.feature_name == res_feature_names
+
+
+def test_cegb_affects_behavior(tmp_path):
+    X = np.random.random((100, 5))
+    X[:, [1, 3]] = 0
+    y = np.random.random(100)
+    names = ['col_%d' % i for i in range(5)]
+    ds = lgb.Dataset(X, feature_name=names).construct()
+    ds.set_label(y)
+    base = lgb.Booster(train_set=ds)
+    for k in range(10):
+        base.update()
+    with tempfile.NamedTemporaryFile() as f:
+        basename = f.name
+    base.save_model(basename)
+    with open(basename, 'rt') as f:
+        basetxt = f.read()
+    # Set extremely harsh penalties, so CEGB will block most splits.
+    cases = [{'cegb_penalty_feature_coupled': [50, 100, 10, 25, 30]},
+             {'cegb_penalty_feature_lazy': [1, 2, 3, 4, 5]},
+             {'cegb_penalty_split': 1}]
+    for case in cases:
+        booster = lgb.Booster(train_set=ds, params=case)
+        for k in range(10):
+            booster.update()
+        casename = str(tmp_path / "casename.txt")
+        booster.save_model(casename)
+        with open(casename, 'rt') as f:
+            casetxt = f.read()
+        assert basetxt != casetxt
+
+
+def test_cegb_scaling_equalities(tmp_path):
+    X = np.random.random((100, 5))
+    X[:, [1, 3]] = 0
+    y = np.random.random(100)
+    names = ['col_%d' % i for i in range(5)]
+    ds = lgb.Dataset(X, feature_name=names).construct()
+    ds.set_label(y)
+    # Compare pairs of penalties, to ensure scaling works as intended
+    pairs = [({'cegb_penalty_feature_coupled': [1, 2, 1, 2, 1]},
+              {'cegb_penalty_feature_coupled': [0.5, 1, 0.5, 1, 0.5], 'cegb_tradeoff': 2}),
+             ({'cegb_penalty_feature_lazy': [0.01, 0.02, 0.03, 0.04, 0.05]},
+              {'cegb_penalty_feature_lazy': [0.005, 0.01, 0.015, 0.02, 0.025], 'cegb_tradeoff': 2}),
+             ({'cegb_penalty_split': 1},
+              {'cegb_penalty_split': 2, 'cegb_tradeoff': 0.5})]
+    for (p1, p2) in pairs:
+        booster1 = lgb.Booster(train_set=ds, params=p1)
+        booster2 = lgb.Booster(train_set=ds, params=p2)
+        for k in range(10):
+            booster1.update()
+            booster2.update()
+        p1name = str(tmp_path / "p1.txt")
+        # Reset booster1's parameters to p2, so the parameter section of the file matches.
+        booster1.reset_parameter(p2)
+        booster1.save_model(p1name)
+        with open(p1name, 'rt') as f:
+            p1txt = f.read()
+        p2name = str(tmp_path / "p2.txt")
+        booster2.save_model(p2name)
+        with open(p2name, 'rt') as f:
+            p2txt = f.read()
+        assert p1txt == p2txt
+
+
+def test_consistent_state_for_dataset_fields():
+
+    def check_asserts(data):
+        np.testing.assert_allclose(data.label, data.get_label())
+        np.testing.assert_allclose(data.label, data.get_field('label'))
+        assert not np.isnan(data.label[0])
+        assert not np.isinf(data.label[1])
+        np.testing.assert_allclose(data.weight, data.get_weight())
+        np.testing.assert_allclose(data.weight, data.get_field('weight'))
+        assert not np.isnan(data.weight[0])
+        assert not np.isinf(data.weight[1])
+        np.testing.assert_allclose(data.init_score, data.get_init_score())
+        np.testing.assert_allclose(data.init_score, data.get_field('init_score'))
+        assert not np.isnan(data.init_score[0])
+        assert not np.isinf(data.init_score[1])
+        assert np.all(np.isclose([data.label[0], data.weight[0], data.init_score[0]],
+                                 data.label[0]))
+        assert data.label[1] == pytest.approx(data.weight[1])
+        assert data.feature_name == data.get_feature_name()
+
+    X, y = load_breast_cancer(return_X_y=True)
+    sequence = np.ones(y.shape[0])
+    sequence[0] = np.nan
+    sequence[1] = np.inf
+    feature_names = ['f{0}'.format(i) for i in range(X.shape[1])]
+    lgb_data = lgb.Dataset(X, sequence,
+                           weight=sequence, init_score=sequence,
+                           feature_name=feature_names).construct()
+    check_asserts(lgb_data)
+    lgb_data = lgb.Dataset(X, y).construct()
+    lgb_data.set_label(sequence)
+    lgb_data.set_weight(sequence)
+    lgb_data.set_init_score(sequence)
+    lgb_data.set_feature_name(feature_names)
+    check_asserts(lgb_data)

From f6d2dce4ee4aa1df11892b4eeeff1a67c33095d4 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Fri, 15 Jan 2021 10:59:45 -0600
Subject: [PATCH 41/42] [dask] [python-package] Search for available ports when
 setting up network (fixes #3753) (#3766)

* starting work

* fixed port-binding issue on localhost

* minor cleanup

* updates

* getting closer

* definitely working for LocalCluster

* it works, it works

* docs

* add tests

* removing testing-only files

* linting

* Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>

* remove duplicated code

* remove unnecessary listen()

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 python-package/lightgbm/dask.py        | 117 ++++++++++++++++++++-----
 tests/python_package_test/test_dask.py |  57 ++++++++----
 2 files changed, 137 insertions(+), 37 deletions(-)

diff --git a/python-package/lightgbm/dask.py b/python-package/lightgbm/dask.py
index 66ab83591cdb..fb8b06077e70 100644
--- a/python-package/lightgbm/dask.py
+++ b/python-package/lightgbm/dask.py
@@ -5,7 +5,9 @@
 It is based on dask-xgboost package.
 """
 import logging
+import socket
 from collections import defaultdict
+from typing import Dict, Iterable
 from urllib.parse import urlparse
 
 import numpy as np
@@ -13,7 +15,7 @@
 from dask import array as da
 from dask import dataframe as dd
 from dask import delayed
-from dask.distributed import default_client, get_worker, wait
+from dask.distributed import Client, default_client, get_worker, wait
 
 from .basic import _LIB, _safe_call
 from .sklearn import LGBMClassifier, LGBMRegressor
@@ -23,33 +25,84 @@
 logger = logging.getLogger(__name__)
 
 
-def _parse_host_port(address):
-    parsed = urlparse(address)
-    return parsed.hostname, parsed.port
+def _find_open_port(worker_ip: str, local_listen_port: int, ports_to_skip: Iterable[int]) -> int:
+    """Find an open port.
 
+    This function tries to find a free port on the machine it's run on. It is intended to
+    be run once on each Dask worker, sequentially.
 
-def _build_network_params(worker_addresses, local_worker_ip, local_listen_port, time_out):
-    """Build network parameters suitable for LightGBM C backend.
+    Parameters
+    ----------
+    worker_ip : str
+        IP address for the Dask worker.
+    local_listen_port : int
+        First port to try when searching for open ports.
+    ports_to_skip: Iterable[int]
+        An iterable of integers referring to ports that should be skipped. Since multiple Dask
+        workers can run on the same physical machine, this method may be called multiple times
+        on the same machine. ``ports_to_skip`` is used to ensure that LightGBM doesn't try to use
+        the same port for two worker processes running on the same machine.
+
+    Returns
+    -------
+    result : int
+        A free port on the machine referenced by ``worker_ip``.
+    """
+    max_tries = 1000
+    out_port = None
+    found_port = False
+    for i in range(max_tries):
+        out_port = local_listen_port + i
+        if out_port in ports_to_skip:
+            continue
+        try:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                s.bind((worker_ip, out_port))
+            found_port = True
+            break
+        # if unavailable, you'll get OSError: Address already in use
+        except OSError:
+            continue
+    if not found_port:
+        msg = "LightGBM tried %s:%d-%d and could not create a connection. Try setting local_listen_port to a different value."
+        raise RuntimeError(msg % (worker_ip, local_listen_port, out_port))
+    return out_port
+
+
+def _find_ports_for_workers(client: Client, worker_addresses: Iterable[str], local_listen_port: int) -> Dict[str, int]:
+    """Find an open port on each worker.
+
+    LightGBM distributed training uses TCP sockets by default, and this method is used to
+    identify open ports on each worker so LightGBM can reliable create those sockets.
 
     Parameters
     ----------
-    worker_addresses : iterable of str - collection of worker addresses in `<protocol>://<host>:port` format
-    local_worker_ip : str
+    client : dask.distributed.Client
+        Dask client.
+    worker_addresses : Iterable[str]
+        An iterable of addresses for workers in the cluster. These are strings of the form ``<protocol>://<host>:port``
     local_listen_port : int
-    time_out : int
+        First port to try when searching for open ports.
 
     Returns
     -------
-    params: dict
+    result : Dict[str, int]
+        Dictionary where keys are worker addresses and values are an open port for LightGBM to use.
     """
-    addr_port_map = {addr: (local_listen_port + i) for i, addr in enumerate(worker_addresses)}
-    params = {
-        'machines': ','.join('%s:%d' % (_parse_host_port(addr)[0], port) for addr, port in addr_port_map.items()),
-        'local_listen_port': addr_port_map[local_worker_ip],
-        'time_out': time_out,
-        'num_machines': len(addr_port_map)
-    }
-    return params
+    lightgbm_ports = set()
+    worker_ip_to_port = {}
+    for worker_address in worker_addresses:
+        port = client.submit(
+            func=_find_open_port,
+            workers=[worker_address],
+            worker_ip=urlparse(worker_address).hostname,
+            local_listen_port=local_listen_port,
+            ports_to_skip=lightgbm_ports
+        ).result()
+        lightgbm_ports.add(port)
+        worker_ip_to_port[worker_address] = port
+
+    return worker_ip_to_port
 
 
 def _concat(seq):
@@ -63,9 +116,20 @@ def _concat(seq):
         raise TypeError('Data must be one of: numpy arrays, pandas dataframes, sparse matrices (from scipy). Got %s.' % str(type(seq[0])))
 
 
-def _train_part(params, model_factory, list_of_parts, worker_addresses, return_model, local_listen_port=12400,
+def _train_part(params, model_factory, list_of_parts, worker_address_to_port, return_model,
                 time_out=120, **kwargs):
-    network_params = _build_network_params(worker_addresses, get_worker().address, local_listen_port, time_out)
+    local_worker_address = get_worker().address
+    machine_list = ','.join([
+        '%s:%d' % (urlparse(worker_address).hostname, port)
+        for worker_address, port
+        in worker_address_to_port.items()
+    ])
+    network_params = {
+        'machines': machine_list,
+        'local_listen_port': worker_address_to_port[local_worker_address],
+        'time_out': time_out,
+        'num_machines': len(worker_address_to_port)
+    }
     params.update(network_params)
 
     # Concatenate many parts into one
@@ -138,13 +202,22 @@ def _train(client, data, label, params, model_factory, weight=None, **kwargs):
                        '(%s), using "data" as default', params.get("tree_learner", None))
         params['tree_learner'] = 'data'
 
+    # find an open port on each worker. note that multiple workers can run
+    # on the same machine, so this needs to ensure that each one gets its
+    # own port
+    local_listen_port = params.get('local_listen_port', 12400)
+    worker_address_to_port = _find_ports_for_workers(
+        client=client,
+        worker_addresses=worker_map.keys(),
+        local_listen_port=local_listen_port
+    )
+
     # Tell each worker to train on the parts that it has locally
     futures_classifiers = [client.submit(_train_part,
                                          model_factory=model_factory,
                                          params={**params, 'num_threads': worker_ncores[worker]},
                                          list_of_parts=list_of_parts,
-                                         worker_addresses=list(worker_map.keys()),
-                                         local_listen_port=params.get('local_listen_port', 12400),
+                                         worker_address_to_port=worker_address_to_port,
                                          time_out=params.get('time_out', 120),
                                          return_model=(worker == master_worker),
                                          **kwargs)
diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index d512cfcbda63..e8498314e1fc 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1,5 +1,6 @@
 # coding: utf-8
 import os
+import socket
 import sys
 
 import pytest
@@ -89,6 +90,26 @@ def test_classifier(output, centers, client, listen_port):
     assert_eq(y, p2)
 
 
+def test_training_does_not_fail_on_port_conflicts(client):
+    _, _, _, dX, dy, dw = _create_data('classification', output='array')
+
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('127.0.0.1', 12400))
+
+        dask_classifier = dlgbm.DaskLGBMClassifier(
+            time_out=5,
+            local_listen_port=12400
+        )
+        for i in range(5):
+            dask_classifier.fit(
+                X=dX,
+                y=dy,
+                sample_weight=dw,
+                client=client
+            )
+            assert dask_classifier.booster_
+
+
 @pytest.mark.parametrize('output', data_output)
 @pytest.mark.parametrize('centers', data_centers)
 def test_classifier_proba(output, centers, client, listen_port):
@@ -183,21 +204,27 @@ def test_regressor_local_predict(client, listen_port):
     assert_eq(s1, s2)
 
 
-def test_build_network_params():
-    workers_ips = [
-        'tcp://192.168.0.1:34545',
-        'tcp://192.168.0.2:34346',
-        'tcp://192.168.0.3:34347'
-    ]
-
-    params = dlgbm._build_network_params(workers_ips, 'tcp://192.168.0.2:34346', 12400, 120)
-    exp_params = {
-        'machines': '192.168.0.1:12400,192.168.0.2:12401,192.168.0.3:12402',
-        'local_listen_port': 12401,
-        'num_machines': len(workers_ips),
-        'time_out': 120
-    }
-    assert exp_params == params
+def test_find_open_port_works():
+    worker_ip = '127.0.0.1'
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind((worker_ip, 12400))
+        new_port = dlgbm._find_open_port(
+            worker_ip=worker_ip,
+            local_listen_port=12400,
+            ports_to_skip=set()
+        )
+        assert new_port == 12401
+
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s_1:
+        s_1.bind((worker_ip, 12400))
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s_2:
+            s_2.bind((worker_ip, 12401))
+            new_port = dlgbm._find_open_port(
+                worker_ip=worker_ip,
+                local_listen_port=12400,
+                ports_to_skip=set()
+            )
+            assert new_port == 12402
 
 
 @gen_cluster(client=True, timeout=None)

From f2695daba2506d047831e39aeb521e94adec5e8d Mon Sep 17 00:00:00 2001
From: Nikita Titov <nekit94-08@mail.ru>
Date: Fri, 15 Jan 2021 20:16:12 +0300
Subject: [PATCH 42/42] completely remove tempfile from test_basic (#3767)

---
 tests/python_package_test/test_basic.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 9249c81fec3c..19e15e5d6869 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -1,6 +1,5 @@
 # coding: utf-8
 import os
-import tempfile
 
 import lightgbm as lgb
 import numpy as np
@@ -268,8 +267,7 @@ def test_cegb_affects_behavior(tmp_path):
     base = lgb.Booster(train_set=ds)
     for k in range(10):
         base.update()
-    with tempfile.NamedTemporaryFile() as f:
-        basename = f.name
+    basename = str(tmp_path / "basename.txt")
     base.save_model(basename)
     with open(basename, 'rt') as f:
         basetxt = f.read()