microsoft · jameslamb · Apr 19, 2023 · Apr 17, 2023 · Apr 17, 2023
@@ -281,8 +281,8 @@ def _is_1d_collection(data: Any) -> bool:
 
 def _list_to_1d_numpy(
     data: Any,
-    dtype: "np.typing.DTypeLike" = np.float32,
-    name: str = 'list'
+    dtype: "np.typing.DTypeLike",
+    name: str
 ) -> np.ndarray:
     """Convert data to numpy 1-D array."""
     if _is_numpy_1d_array(data):
@@ -322,8 +322,8 @@ def _is_2d_collection(data: Any) -> bool:
 
 def _data_to_2d_numpy(
     data: Any,
-    dtype: "np.typing.DTypeLike" = np.float32,
-    name: str = 'list'
+    dtype: "np.typing.DTypeLike",
+    name: str
 ) -> np.ndarray:
     """Convert data to numpy 2-D array."""
     if _is_numpy_2d_array(data):
@@ -1774,15 +1774,15 @@ def _set_init_score_by_predictor(
     def _lazy_init(
         self,
         data: Optional[_LGBM_TrainDataType],
-        label: Optional[_LGBM_LabelType] = None,
-        reference: Optional["Dataset"] = None,
-        weight: Optional[_LGBM_WeightType] = None,
-        group: Optional[_LGBM_GroupType] = None,
-        init_score: Optional[_LGBM_InitScoreType] = None,
-        predictor: Optional[_InnerPredictor] = None,
-        feature_name: _LGBM_FeatureNameConfiguration = 'auto',
-        categorical_feature: _LGBM_CategoricalFeatureConfiguration = 'auto',
-        params: Optional[Dict[str, Any]] = None
+        label: Optional[_LGBM_LabelType],
+        reference: Optional["Dataset"],
+        weight: Optional[_LGBM_WeightType],
+        group: Optional[_LGBM_GroupType],
+        init_score: Optional[_LGBM_InitScoreType],
+        predictor: Optional[_InnerPredictor],
+        feature_name: _LGBM_FeatureNameConfiguration,
+        categorical_feature: _LGBM_CategoricalFeatureConfiguration,
+        params: Optional[Dict[str, Any]]
     ) -> "Dataset":
         if data is None:
             self.handle = None
@@ -2166,13 +2166,13 @@ def construct(self) -> "Dataset":
                     self._update_params(reference_params)
                 if self.used_indices is None:
                     # create valid
-                    self._lazy_init(self.data, label=self.label, reference=self.reference,
+                    self._lazy_init(data=self.data, label=self.label, reference=self.reference,
                                     weight=self.weight, group=self.group,
                                     init_score=self.init_score, predictor=self._predictor,
-                                    feature_name=self.feature_name, params=self.params)
+                                    feature_name=self.feature_name, categorical_feature='auto', params=self.params)
                 else:
                     # construct subset
-                    used_indices = _list_to_1d_numpy(self.used_indices, np.int32, name='used_indices')
+                    used_indices = _list_to_1d_numpy(self.used_indices, dtype=np.int32, name='used_indices')
                     assert used_indices.flags.c_contiguous
                     if self.reference.group is not None:
                         group_info = np.array(self.reference.group).astype(np.int32, copy=False)
@@ -2201,7 +2201,7 @@ def construct(self) -> "Dataset":
                         )
             else:
                 # create train
-                self._lazy_init(self.data, label=self.label,
+                self._lazy_init(data=self.data, label=self.label, reference=None,
                                 weight=self.weight, group=self.group,
                                 init_score=self.init_score, predictor=self._predictor,
                                 feature_name=self.feature_name, categorical_feature=self.categorical_feature, params=self.params)
@@ -2372,9 +2372,9 @@ def set_field(
         if field_name == 'init_score':
             dtype = np.float64
             if _is_1d_collection(data):
-                data = _list_to_1d_numpy(data, dtype, name=field_name)
+                data = _list_to_1d_numpy(data, dtype=dtype, name=field_name)
             elif _is_2d_collection(data):
-                data = _data_to_2d_numpy(data, dtype, name=field_name)
+                data = _data_to_2d_numpy(data, dtype=dtype, name=field_name)
                 data = data.ravel(order='F')
             else:
                 raise TypeError(
@@ -2383,7 +2383,7 @@ def set_field(
                 )
         else:
             dtype = np.int32 if field_name == 'group' else np.float32
-            data = _list_to_1d_numpy(data, dtype, name=field_name)
+            data = _list_to_1d_numpy(data, dtype=dtype, name=field_name)
 
         ptr_data: Union[_ctypes_float_ptr, _ctypes_int_ptr]
         if data.dtype == np.float32 or data.dtype == np.float64:
@@ -2597,7 +2597,7 @@ def set_label(self, label: Optional[_LGBM_LabelType]) -> "Dataset":
                     label = label.to_numpy(dtype=np.float32, na_value=np.nan)
                 label_array = np.ravel(label)
             else:
-                label_array = _list_to_1d_numpy(label, name='label')
+                label_array = _list_to_1d_numpy(label, dtype=np.float32, name='label')
             self.set_field('label', label_array)
             self.label = self.get_field('label')  # original values can be modified at cpp side
         return self
@@ -2622,7 +2622,7 @@ def set_weight(
             weight = None
         self.weight = weight
         if self.handle is not None and weight is not None:
-            weight = _list_to_1d_numpy(weight, name='weight')
+            weight = _list_to_1d_numpy(weight, dtype=np.float32, name='weight')
             self.set_field('weight', weight)
             self.weight = self.get_field('weight')  # original values can be modified at cpp side
         return self
@@ -2671,7 +2671,7 @@ def set_group(
         """
         self.group = group
         if self.handle is not None and group is not None:
-            group = _list_to_1d_numpy(group, np.int32, name='group')
+            group = _list_to_1d_numpy(group, dtype=np.int32, name='group')
             self.set_field('group', group)
         return self
 
@@ -3588,8 +3588,8 @@ def __boost(
         if self.__num_class > 1:
             grad = grad.ravel(order='F')
             hess = hess.ravel(order='F')
-        grad = _list_to_1d_numpy(grad, name='gradient')
-        hess = _list_to_1d_numpy(hess, name='hessian')
+        grad = _list_to_1d_numpy(grad, dtype=np.float32, name='gradient')
+        hess = _list_to_1d_numpy(hess, dtype=np.float32, name='hessian')
         assert grad.flags.c_contiguous
         assert hess.flags.c_contiguous
         if len(grad) != len(hess):
@@ -4098,7 +4098,7 @@ def predict(
             Prediction result.
             Can be sparse or a list of sparse objects (each element represents predictions for one class) for feature contributions (when ``pred_contrib=True``).
         """
-        predictor = self._to_predictor(deepcopy(kwargs))
+        predictor = self._to_predictor(pred_parameter=deepcopy(kwargs))
         if num_iteration is None:
             if start_iteration <= 0:
                 num_iteration = self.best_iteration
@@ -4188,7 +4188,7 @@ def refit(
             raise LightGBMError('Cannot refit due to null objective function.')
         if dataset_params is None:
             dataset_params = {}
-        predictor = self._to_predictor(deepcopy(kwargs))
+        predictor = self._to_predictor(pred_parameter=deepcopy(kwargs))
         leaf_preds = predictor.predict(
             data=data,
             start_iteration=-1,
@@ -4292,7 +4292,7 @@ def set_leaf_output(
 
     def _to_predictor(
         self,
-        pred_parameter: Optional[Dict[str, Any]] = None
+        pred_parameter: Dict[str, Any]
     ) -> _InnerPredictor:
         """Convert to predictor."""
         predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter)

@@ -189,7 +189,7 @@ def _train_part(
     local_listen_port: int,
     num_machines: int,
     return_model: bool,
-    time_out: int = 120,
+    time_out: int,
     **kwargs: Any
 ) -> Optional[LGBMModel]:
     network_params = {

@@ -173,7 +173,7 @@ def train(
     if isinstance(init_model, (str, Path)):
         predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
     elif isinstance(init_model, Booster):
-        predictor = init_model._to_predictor(dict(init_model.params, **params))
+        predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params))
     init_iteration = predictor.num_total_iteration if predictor is not None else 0
     # check dataset
     if not isinstance(train_set, Dataset):
@@ -678,7 +678,7 @@ def cv(
     if isinstance(init_model, (str, Path)):
         predictor = _InnerPredictor(model_file=init_model, pred_parameter=params)
     elif isinstance(init_model, Booster):
-        predictor = init_model._to_predictor(dict(init_model.params, **params))
+        predictor = init_model._to_predictor(pred_parameter=dict(init_model.params, **params))
     else:
         predictor = None
 

@@ -632,17 +632,17 @@ def test_list_to_1d_numpy(collection, dtype):
             y = pd_Series(y)
     if isinstance(y, np.ndarray) and len(y.shape) == 2:
         with pytest.warns(UserWarning, match='column-vector'):
-            lgb.basic._list_to_1d_numpy(y)
+            lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
         return
     elif isinstance(y, list) and isinstance(y[0], list):
         with pytest.raises(TypeError):
-            lgb.basic._list_to_1d_numpy(y)
+            lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
         return
     elif isinstance(y, pd_Series) and y.dtype == object:
         with pytest.raises(ValueError):
-            lgb.basic._list_to_1d_numpy(y)
+            lgb.basic._list_to_1d_numpy(y, dtype=np.float32, name="list")
         return
-    result = lgb.basic._list_to_1d_numpy(y, dtype=dtype)
+    result = lgb.basic._list_to_1d_numpy(y, dtype=dtype, name="list")
     assert result.size == 10
     assert result.dtype == dtype