diff --git a/.appveyor.yml b/.appveyor.yml index 250cd0e336c4..696aedccca11 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -28,7 +28,6 @@ install: - set CONDA_ENV="test-env" - ps: >- switch ($env:PYTHON_VERSION) { - "2.7" {$env:MINICONDA = "C:\Miniconda-x64"} "3.6" {$env:MINICONDA = "C:\Miniconda36-x64"} "3.7" {$env:MINICONDA = "C:\Miniconda37-x64"} default {$env:MINICONDA = "C:\Miniconda37-x64"} diff --git a/.ci/test.sh b/.ci/test.sh index 96f917775044..4cd181790ad7 100755 --- a/.ci/test.sh +++ b/.ci/test.sh @@ -100,15 +100,15 @@ if [[ $TASK == "sdist" ]]; then exit 0 elif [[ $TASK == "bdist" ]]; then if [[ $OS_NAME == "macos" ]]; then - cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --universal || exit -1 - mv dist/lightgbm-$LGB_VER-py2.py3-none-macosx.whl dist/lightgbm-$LGB_VER-py2.py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl + cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=macosx --python-tag py3 || exit -1 + mv dist/lightgbm-$LGB_VER-py3-none-macosx.whl dist/lightgbm-$LGB_VER-py3-none-macosx_10_13_x86_64.macosx_10_14_x86_64.macosx_10_15_x86_64.whl if [[ $AZURE == "true" ]]; then - cp dist/lightgbm-$LGB_VER-py2.py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY + cp dist/lightgbm-$LGB_VER-py3-none-macosx*.whl $BUILD_ARTIFACTSTAGINGDIRECTORY fi else - cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --universal || exit -1 + cd $BUILD_DIRECTORY/python-package && python setup.py bdist_wheel --plat-name=manylinux1_x86_64 --python-tag py3 || exit -1 if [[ $AZURE == "true" ]]; then - cp dist/lightgbm-$LGB_VER-py2.py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY + cp dist/lightgbm-$LGB_VER-py3-none-manylinux1_x86_64.whl $BUILD_ARTIFACTSTAGINGDIRECTORY fi fi pip install --user $BUILD_DIRECTORY/python-package/dist/*.whl || exit -1 diff --git a/.ci/test_windows.ps1 b/.ci/test_windows.ps1 index 267c553bfb30..950b2463955d 100644 --- a/.ci/test_windows.ps1 +++ b/.ci/test_windows.ps1 @@ -49,7 +49,7 @@ elseif ($env:TASK -eq "sdist") { } elseif ($env:TASK -eq "bdist") { cd $env:BUILD_SOURCESDIRECTORY/python-package - python setup.py bdist_wheel --plat-name=win-amd64 --universal ; Check-Output $? + python setup.py bdist_wheel --plat-name=win-amd64 --python-tag py3 ; Check-Output $? cd dist; pip install @(Get-ChildItem *.whl) ; Check-Output $? cp @(Get-ChildItem *.whl) $env:BUILD_ARTIFACTSTAGINGDIRECTORY } elseif (($env:APPVEYOR -eq "true") -and ($env:TASK -eq "python")) { diff --git a/.travis.yml b/.travis.yml index 50d2eeabf6e9..930cc4d0f19e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,7 @@ env: - PYTHON_VERSION=3.8 matrix: - TASK=regular PYTHON_VERSION=3.6 - - TASK=sdist PYTHON_VERSION=2.7 + - TASK=sdist - TASK=bdist - TASK=if-else - TASK=lint diff --git a/.vsts-ci.yml b/.vsts-ci.yml index a5c0af13f522..9341134b6bcd 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -117,7 +117,6 @@ jobs: PYTHON_VERSION: 3.6 sdist: TASK: sdist - PYTHON_VERSION: 2.7 bdist: TASK: bdist steps: diff --git a/docker/gpu/README.md b/docker/gpu/README.md index 4baa6302bc93..13ff2bc686c9 100644 --- a/docker/gpu/README.md +++ b/docker/gpu/README.md @@ -13,13 +13,12 @@ # Dockerfile for LightGBM GPU Version with Python `dockerfile.gpu` - A docker file with LightGBM utilizing nvidia-docker. The file is based on the `nvidia/cuda:8.0-cudnn5-devel` image. -LightGBM can be utilized in GPU and CPU modes and via Python (2.7 & 3.6). +LightGBM can be utilized in GPU and CPU modes and via Python. ## Contents - LightGBM (cpu + gpu) -- Python 2.7 (conda) + scikit-learn, notebooks, pandas, matplotlib -- Python 3.6 (conda) + scikit-learn, notebooks, pandas, matplotlib +- Python 3.8 (conda) + scikit-learn, notebooks, pandas, matplotlib Running the container starts a Jupyter Notebook at `localhost:8888`. diff --git a/docker/gpu/dockerfile.gpu b/docker/gpu/dockerfile.gpu index 08c243a57bce..1b930b7b99c6 100644 --- a/docker/gpu/dockerfile.gpu +++ b/docker/gpu/dockerfile.gpu @@ -75,8 +75,7 @@ RUN echo "export PATH=$CONDA_DIR/bin:"'$PATH' > /etc/profile.d/conda.sh && \ rm ~/miniconda.sh RUN conda config --set always_yes yes --set changeps1 no && \ - conda create -y -q -n py2 python=2.7 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib && \ - conda create -y -q -n py3 python=3.6 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib + conda create -y -q -n py3 python=3.8 mkl numpy scipy scikit-learn jupyter notebook ipython pandas matplotlib ################################################################################################################# # LightGBM @@ -90,7 +89,6 @@ RUN cd /usr/local/src && mkdir lightgbm && cd lightgbm && \ ENV PATH /usr/local/src/lightgbm/LightGBM:${PATH} -RUN /bin/bash -c "source activate py2 && cd /usr/local/src/lightgbm/LightGBM/python-package && python setup.py install --precompile && source deactivate" RUN /bin/bash -c "source activate py3 && cd /usr/local/src/lightgbm/LightGBM/python-package && python setup.py install --precompile && source deactivate" ################################################################################################################# diff --git a/docs/conf.py b/docs/conf.py index 103e3f51fa16..c0008be029b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,17 +26,13 @@ from docutils.parsers.rst import Directive from sphinx.errors import VersionRequirementError from subprocess import PIPE, Popen +from unittest.mock import Mock CURR_PATH = os.path.abspath(os.path.dirname(__file__)) LIB_PATH = os.path.join(CURR_PATH, os.path.pardir, 'python-package') sys.path.insert(0, LIB_PATH) # -- mock out modules -try: - from unittest.mock import Mock # Python 3.x -except ImportError: - from mock import Mock # Python 2.x - MOCK_MODULES = ['numpy', 'scipy', 'scipy.sparse', 'sklearn', 'matplotlib', 'pandas', 'graphviz'] for mod_name in MOCK_MODULES: @@ -208,9 +204,7 @@ def generate_doxygen_xml(app): "WARN_AS_ERROR=YES", ] doxygen_input = '\n'.join(doxygen_args) - is_py3 = sys.version[0] == "3" - if is_py3: - doxygen_input = bytes(doxygen_input, "utf-8") + doxygen_input = bytes(doxygen_input, "utf-8") if not os.path.exists(os.path.join(CURR_PATH, 'doxyoutput')): os.makedirs(os.path.join(CURR_PATH, 'doxyoutput')) try: @@ -221,8 +215,7 @@ def generate_doxygen_xml(app): process = Popen(["doxygen", "-"], stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate(doxygen_input) - output = '\n'.join([i.decode('utf-8') if is_py3 else i - for i in (stdout, stderr) if i is not None]) + output = '\n'.join([i.decode('utf-8') for i in (stdout, stderr) if i is not None]) if process.returncode != 0: raise RuntimeError(output) else: diff --git a/docs/requirements_base.txt b/docs/requirements_base.txt index 9c3dfc2a5b90..d9f0bfb8c916 100644 --- a/docs/requirements_base.txt +++ b/docs/requirements_base.txt @@ -1,3 +1,2 @@ sphinx sphinx_rtd_theme >= 0.3 -mock; python_version < '3' diff --git a/examples/python-guide/advanced_example.py b/examples/python-guide/advanced_example.py index c38c6e469ff3..382497e6ff89 100644 --- a/examples/python-guide/advanced_example.py +++ b/examples/python-guide/advanced_example.py @@ -1,14 +1,11 @@ # coding: utf-8 import json +import pickle import lightgbm as lgb import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error -try: - import cPickle as pickle -except BaseException: - import pickle print('Loading data...') # load or create your dataset diff --git a/python-package/lightgbm/__init__.py b/python-package/lightgbm/__init__.py index 786693f82412..f8d3bce3078f 100644 --- a/python-package/lightgbm/__init__.py +++ b/python-package/lightgbm/__init__.py @@ -3,16 +3,12 @@ Contributors: https://github.com/microsoft/LightGBM/graphs/contributors. """ -from __future__ import absolute_import - from .basic import Booster, Dataset from .callback import (early_stopping, print_evaluation, record_evaluation, reset_parameter) from .engine import cv, train, CVBooster import os -import sys -import warnings try: from .sklearn import LGBMModel, LGBMRegressor, LGBMClassifier, LGBMRanker @@ -36,8 +32,3 @@ 'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker', 'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping', 'plot_importance', 'plot_split_value_histogram', 'plot_metric', 'plot_tree', 'create_tree_digraph'] - -# REMOVEME: remove warning after 3.1.0 version release -if sys.version_info[0] == 2: - warnings.warn("LightGBM 3.1 version is the last version that supports Python 2.\n" - "Next release will drop the support.", UserWarning) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index 60f6b70f0881..7bea3a5fec44 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1,9 +1,8 @@ # coding: utf-8 """Wrapper for C API of LightGBM.""" -from __future__ import absolute_import, print_function - import copy import ctypes +import json import os import warnings from tempfile import NamedTemporaryFile @@ -12,18 +11,13 @@ import numpy as np import scipy.sparse -from .compat import (PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, - DataTable, - decode_string, string_type, - integer_types, numeric_types, - json, json_default_with_numpy, - range_, zip_) +from .compat import PANDAS_INSTALLED, DataFrame, Series, is_dtype_sparse, DataTable from .libpath import find_lib_path def _log_callback(msg): """Redirect logs from native library into Python console.""" - print("{0:s}".format(decode_string(msg)), end='') + print("{0:s}".format(msg.decode('utf-8')), end='') def _load_lib(): @@ -36,13 +30,16 @@ def _load_lib(): callback = ctypes.CFUNCTYPE(None, ctypes.c_char_p) lib.callback = callback(_log_callback) if lib.LGBM_RegisterLogCallback(lib.callback) != 0: - raise LightGBMError(decode_string(lib.LGBM_GetLastError())) + raise LightGBMError(lib.LGBM_GetLastError().decode('utf-8')) return lib _LIB = _load_lib() +NUMERIC_TYPES = (int, float, bool) + + def _safe_call(ret): """Check the return value from C API call. @@ -52,7 +49,7 @@ def _safe_call(ret): The return value from C API calls. """ if ret != 0: - raise LightGBMError(decode_string(_LIB.LGBM_GetLastError())) + raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8')) def is_numeric(obj): @@ -136,6 +133,16 @@ def c_array(ctype, values): return (ctype * len(values))(*values) +def json_default_with_numpy(obj): + """Convert numpy classes to JSON serializable objects.""" + if isinstance(obj, (np.integer, np.floating, np.bool_)): + return obj.item() + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return obj + + def param_dict_to_str(data): """Convert Python dictionary to string, which is passed to C API.""" if data is None or not data: @@ -149,7 +156,7 @@ def to_string(x): else: return str(x) pairs.append(str(key) + '=' + ','.join(map(to_string, val))) - elif isinstance(val, string_type) or isinstance(val, numeric_types) or is_numeric(val): + elif isinstance(val, (str, NUMERIC_TYPES)) or is_numeric(val): pairs.append(str(key) + '=' + str(val)) elif val is not None: raise TypeError('Unknown type of parameter:%s, got:%s' @@ -157,7 +164,7 @@ def to_string(x): return ' '.join(pairs) -class _TempFile(object): +class _TempFile: def __enter__(self): with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f: self.name = f.name @@ -183,7 +190,14 @@ class LightGBMError(Exception): pass -class _ConfigAliases(object): +# DeprecationWarning is not shown by default, so let's create our own with higher level +class LGBMDeprecationWarning(UserWarning): + """Custom deprecation warning.""" + + pass + + +class _ConfigAliases: aliases = {"bin_construct_sample_cnt": {"bin_construct_sample_cnt", "subsample_for_bin"}, "boosting": {"boosting", @@ -375,7 +389,7 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica else: if len(cat_cols) != len(pandas_categorical): raise ValueError('train and valid dataset categorical_feature do not match.') - for col, category in zip_(cat_cols, pandas_categorical): + for col, category in zip(cat_cols, pandas_categorical): if list(data[col].cat.categories) != list(category): data[col] = data[col].cat.set_categories(category) if len(cat_cols): # cat_cols is list @@ -440,9 +454,9 @@ def _load_pandas_categorical(file_name=None, model_str=None): if len(lines) >= 2: break offset *= 2 - last_line = decode_string(lines[-1]).strip() + last_line = lines[-1].decode('utf-8').strip() if not last_line.startswith(pandas_key): - last_line = decode_string(lines[-2]).strip() + last_line = lines[-2].decode('utf-8').strip() elif model_str is not None: idx = model_str.rfind('\n', 0, offset) last_line = model_str[idx:].strip() @@ -452,7 +466,7 @@ def _load_pandas_categorical(file_name=None, model_str=None): return None -class _InnerPredictor(object): +class _InnerPredictor: """_InnerPredictor of LightGBM. Not exposed to user. @@ -563,7 +577,7 @@ def predict(self, data, start_iteration=0, num_iteration=-1, predict_type = C_API_PREDICT_CONTRIB int_data_has_header = 1 if data_has_header else 0 - if isinstance(data, string_type): + if isinstance(data, str): with _TempFile() as f: _safe_call(_LIB.LGBM_BoosterPredictForFile( self.handle, @@ -668,8 +682,8 @@ def inner_predict(mat, start_iteration, num_iteration, predict_type, preds=None) n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])] n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() preds = np.zeros(sum(n_preds), dtype=np.float64) - for chunk, (start_idx_pred, end_idx_pred) in zip_(np.array_split(mat, sections), - zip_(n_preds_sections, n_preds_sections[1:])): + for chunk, (start_idx_pred, end_idx_pred) in zip(np.array_split(mat, sections), + zip(n_preds_sections, n_preds_sections[1:])): # avoid memory consumption by arrays concatenation operations inner_predict(chunk, start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) return preds, nrow @@ -807,8 +821,8 @@ def inner_predict_sparse(csr, start_iteration, num_iteration, predict_type): n_preds = [self.__get_num_preds(start_iteration, num_iteration, i, predict_type) for i in np.diff(sections)] n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum() preds = np.zeros(sum(n_preds), dtype=np.float64) - for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip_(zip_(sections, sections[1:]), - zip_(n_preds_sections, n_preds_sections[1:])): + for (start_idx, end_idx), (start_idx_pred, end_idx_pred) in zip(zip(sections, sections[1:]), + zip(n_preds_sections, n_preds_sections[1:])): # avoid memory consumption by arrays concatenation operations inner_predict(csr[start_idx:end_idx], start_iteration, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred]) return preds, nrow @@ -906,7 +920,7 @@ def current_iteration(self): return out_cur_iter.value -class Dataset(object): +class Dataset: """Dataset in LightGBM.""" def __init__(self, data, label=None, reference=None, @@ -1018,7 +1032,7 @@ def _free_handle(self): def _set_init_score_by_predictor(self, predictor, data, used_indices=None): data_has_header = False - if isinstance(data, string_type): + if isinstance(data, str): # check data has header or not data_has_header = any(self.params.get(alias, False) for alias in _ConfigAliases.get("header")) num_data = self.num_data() @@ -1029,18 +1043,18 @@ def _set_init_score_by_predictor(self, predictor, data, used_indices=None): is_reshape=False) if used_indices is not None: assert not self.need_slice - if isinstance(data, string_type): + if isinstance(data, str): sub_init_score = np.zeros(num_data * predictor.num_class, dtype=np.float32) assert num_data == len(used_indices) - for i in range_(len(used_indices)): - for j in range_(predictor.num_class): + for i in range(len(used_indices)): + for j in range(predictor.num_class): sub_init_score[i * predictor.num_class + j] = init_score[used_indices[i] * predictor.num_class + j] init_score = sub_init_score if predictor.num_class > 1: # need to regroup init_score new_init_score = np.zeros(init_score.size, dtype=np.float32) - for i in range_(num_data): - for j in range_(predictor.num_class): + for i in range(num_data): + for j in range(predictor.num_class): new_init_score[j * num_data + i] = init_score[i * predictor.num_class + j] init_score = new_init_score elif self.init_score is not None: @@ -1085,9 +1099,9 @@ def _lazy_init(self, data, label=None, reference=None, if feature_name is not None: feature_dict = {name: i for i, name in enumerate(feature_name)} for name in categorical_feature: - if isinstance(name, string_type) and name in feature_dict: + if isinstance(name, str) and name in feature_dict: categorical_indices.add(feature_dict[name]) - elif isinstance(name, integer_types): + elif isinstance(name, int): categorical_indices.add(name) else: raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" @@ -1108,7 +1122,7 @@ def _lazy_init(self, data, label=None, reference=None, elif reference is not None: raise TypeError('Reference dataset should be None or dataset instance') # start construct data - if isinstance(data, string_type): + if isinstance(data, str): self.handle = ctypes.c_void_p() _safe_call(_LIB.LGBM_DatasetCreateFromFile( c_str(data), @@ -1297,7 +1311,7 @@ def construct(self): assert used_indices.flags.c_contiguous if self.reference.group is not None: group_info = np.array(self.reference.group).astype(np.int32, copy=False) - _, self.group = np.unique(np.repeat(range_(len(group_info)), repeats=group_info)[self.used_indices], + _, self.group = np.unique(np.repeat(range(len(group_info)), repeats=group_info)[self.used_indices], return_counts=True) self.handle = ctypes.c_void_p() params_str = param_dict_to_str(self.params) @@ -1433,7 +1447,7 @@ def update(): update() self._free_handle() else: - raise LightGBMError(decode_string(_LIB.LGBM_GetLastError())) + raise LightGBMError(_LIB.LGBM_GetLastError().decode('utf-8')) return self def _reverse_update_params(self): @@ -1727,7 +1741,7 @@ def get_feature_name(self): tmp_out_len = ctypes.c_int(0) reserved_string_buffer_size = 255 required_string_buffer_size = ctypes.c_size_t(0) - string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(num_feature)] + string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_DatasetGetFeatureNames( self.handle, @@ -1743,7 +1757,7 @@ def get_feature_name(self): "Allocated feature name buffer size ({}) was inferior to the needed size ({})." .format(reserved_string_buffer_size, required_string_buffer_size.value) ) - return [string_buffers[i].value.decode('utf-8') for i in range_(num_feature)] + return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)] def get_label(self): """Get the label of the Dataset. @@ -1997,7 +2011,7 @@ def _dump_text(self, filename): return self -class Booster(object): +class Booster: """Booster in LightGBM.""" def __init__(self, params=None, train_set=None, model_file=None, model_str=None, silent=False): @@ -2037,7 +2051,7 @@ def __init__(self, params=None, train_set=None, model_file=None, model_str=None, for alias in _ConfigAliases.get("machines"): if alias in params: machines = params[alias] - if isinstance(machines, string_type): + if isinstance(machines, str): num_machines = len(machines.split(',')) elif isinstance(machines, (list, set)): num_machines = len(machines) @@ -2458,7 +2472,7 @@ def update(self, train_set=None, fobj=None): _safe_call(_LIB.LGBM_BoosterUpdateOneIter( self.handle, ctypes.byref(is_finished))) - self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] + self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] return is_finished.value == 1 else: if not self.__set_objective_to_none: @@ -2501,7 +2515,7 @@ def __boost(self, grad, hess): grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.byref(is_finished))) - self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] + self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] return is_finished.value == 1 def rollback_one_iter(self): @@ -2514,7 +2528,7 @@ def rollback_one_iter(self): """ _safe_call(_LIB.LGBM_BoosterRollbackOneIter( self.handle)) - self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)] + self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] return self def current_iteration(self): @@ -2627,7 +2641,7 @@ def eval(self, data, name, feval=None): if data is self.train_set: data_idx = 0 else: - for i in range_(len(self.valid_sets)): + for i in range(len(self.valid_sets)): if data is self.valid_sets[i]: data_idx = i + 1 break @@ -2700,7 +2714,7 @@ def eval_valid(self, feval=None): result : list List with evaluation results. """ - return [item for i in range_(1, self.__num_dataset) + return [item for i in range(1, self.__num_dataset) for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)] def save_model(self, filename, num_iteration=None, start_iteration=0, importance_type='split'): @@ -3060,7 +3074,7 @@ def feature_name(self): tmp_out_len = ctypes.c_int(0) reserved_string_buffer_size = 255 required_string_buffer_size = ctypes.c_size_t(0) - string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(num_feature)] + string_buffers = [ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(num_feature)] ptr_string_buffers = (ctypes.c_char_p * num_feature)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetFeatureNames( self.handle, @@ -3076,7 +3090,7 @@ def feature_name(self): "Allocated feature name buffer size ({}) was inferior to the needed size ({})." .format(reserved_string_buffer_size, required_string_buffer_size.value) ) - return [string_buffers[i].value.decode('utf-8') for i in range_(num_feature)] + return [string_buffers[i].value.decode('utf-8') for i in range(num_feature)] def feature_importance(self, importance_type='split', iteration=None): """Get feature importances. @@ -3147,12 +3161,12 @@ def get_split_value_histogram(self, feature, bins=None, xgboost_style=False): def add(root): """Recursively add thresholds.""" if 'split_index' in root: # non-leaf - if feature_names is not None and isinstance(feature, string_type): + if feature_names is not None and isinstance(feature, str): split_feature = feature_names[root['split_feature']] else: split_feature = root['split_feature'] if split_feature == feature: - if isinstance(root['threshold'], string_type): + if isinstance(root['threshold'], str): raise LightGBMError('Cannot compute split value histogram for the categorical feature') else: values.append(root['threshold']) @@ -3166,7 +3180,7 @@ def add(root): for tree_info in tree_infos: add(tree_info['tree_structure']) - if bins is None or isinstance(bins, integer_types) and xgboost_style: + if bins is None or isinstance(bins, int) and xgboost_style: n_unique = len(np.unique(values)) bins = max(min(n_unique, bins) if bins is not None else n_unique, 1) hist, bin_edges = np.histogram(values, bins=bins) @@ -3196,7 +3210,7 @@ def __inner_eval(self, data_name, data_idx, feval=None): result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) if tmp_out_len.value != self.__num_inner_eval: raise ValueError("Wrong length of eval results") - for i in range_(self.__num_inner_eval): + for i in range(self.__num_inner_eval): ret.append((data_name, self.__name_inner_eval[i], result[i], self.__higher_better_inner_eval[i])) if callable(feval): @@ -3258,7 +3272,7 @@ def __get_eval_info(self): reserved_string_buffer_size = 255 required_string_buffer_size = ctypes.c_size_t(0) string_buffers = [ - ctypes.create_string_buffer(reserved_string_buffer_size) for i in range_(self.__num_inner_eval) + ctypes.create_string_buffer(reserved_string_buffer_size) for i in range(self.__num_inner_eval) ] ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) _safe_call(_LIB.LGBM_BoosterGetEvalNames( @@ -3276,7 +3290,7 @@ def __get_eval_info(self): .format(reserved_string_buffer_size, required_string_buffer_size.value) ) self.__name_inner_eval = \ - [string_buffers[i].value.decode('utf-8') for i in range_(self.__num_inner_eval)] + [string_buffers[i].value.decode('utf-8') for i in range(self.__num_inner_eval)] self.__higher_better_inner_eval = \ [name.startswith(('auc', 'ndcg@', 'map@')) for name in self.__name_inner_eval] @@ -3312,7 +3326,7 @@ def set_attr(self, **kwargs): """ for key, value in kwargs.items(): if value is not None: - if not isinstance(value, string_type): + if not isinstance(value, str): raise ValueError("Only string values are accepted") self.__attr[key] = value else: diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 5d12539177f8..9140127c846b 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -1,13 +1,10 @@ # coding: utf-8 """Callbacks library.""" -from __future__ import absolute_import - import collections import warnings from operator import gt, lt from .basic import _ConfigAliases -from .compat import range_ class EarlyStopException(Exception): @@ -23,7 +20,7 @@ def __init__(self, best_iteration, best_score): best_score : float The score of the best iteration. """ - super(EarlyStopException, self).__init__() + super().__init__() self.best_iteration = best_iteration self.best_score = best_score @@ -219,7 +216,7 @@ def _callback(env): _init(env) if not enabled[0]: return - for i in range_(len(env.evaluation_result_list)): + for i in range(len(env.evaluation_result_list)): score = env.evaluation_result_list[i][2] if best_score_list[i] is None or cmp_op[i](score, best_score[i]): best_score[i] = score diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py index 5d951a56800a..fa12ae2c975a 100644 --- a/python-package/lightgbm/compat.py +++ b/python-package/lightgbm/compat.py @@ -1,62 +1,5 @@ # coding: utf-8 """Compatibility library.""" -from __future__ import absolute_import - -import inspect -import sys - -import numpy as np - -is_py3 = (sys.version_info[0] == 3) - -"""Compatibility between Python2 and Python3""" -if is_py3: - zip_ = zip - string_type = str - numeric_types = (int, float, bool) - integer_types = (int, ) - range_ = range - - def argc_(func): - """Count the number of arguments of a function.""" - return len(inspect.signature(func).parameters) - - def decode_string(bytestring): - """Decode C bytestring to ordinary string.""" - return bytestring.decode('utf-8') -else: - from itertools import izip as zip_ - string_type = basestring - numeric_types = (int, long, float, bool) - integer_types = (int, long) - range_ = xrange - - def argc_(func): - """Count the number of arguments of a function.""" - return len(inspect.getargspec(func).args) - - def decode_string(bytestring): - """Decode C bytestring to ordinary string.""" - return bytestring - -"""json""" -try: - import simplejson as json -except (ImportError, SyntaxError): - # simplejson does not support Python 3.2, it throws a SyntaxError - # because of u'...' Unicode literals. - import json - - -def json_default_with_numpy(obj): - """Convert numpy classes to JSON serializable objects.""" - if isinstance(obj, (np.integer, np.floating, np.bool_)): - return obj.item() - elif isinstance(obj, np.ndarray): - return obj.tolist() - else: - return obj - """pandas""" try: @@ -66,12 +9,12 @@ def json_default_with_numpy(obj): except ImportError: PANDAS_INSTALLED = False - class Series(object): + class Series: """Dummy class for pandas.Series.""" pass - class DataFrame(object): + class DataFrame: """Dummy class for pandas.DataFrame.""" pass @@ -103,7 +46,7 @@ class DataFrame(object): except ImportError: DATATABLE_INSTALLED = False - class DataTable(object): + class DataTable: """Dummy class for DataTable.""" pass @@ -162,10 +105,3 @@ def _check_sample_weight(sample_weight, X, dtype=None): _LGBMAssertAllFinite = None _LGBMCheckClassificationTargets = None _LGBMComputeSampleWeight = None - - -# DeprecationWarning is not shown by default, so let's create our own with higher level -class LGBMDeprecationWarning(UserWarning): - """Custom deprecation warning.""" - - pass diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 601b52cf1030..ee41a1b903b8 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -1,7 +1,5 @@ # coding: utf-8 """Library with training routines of LightGBM.""" -from __future__ import absolute_import - import collections import copy import warnings @@ -11,8 +9,7 @@ from . import callback from .basic import Booster, Dataset, LightGBMError, _ConfigAliases, _InnerPredictor -from .compat import (SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold, - string_type, integer_types, range_, zip_) +from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold def train(params, train_set, num_boost_round=100, @@ -159,7 +156,7 @@ def train(params, train_set, num_boost_round=100, if num_boost_round <= 0: raise ValueError("num_boost_round should be greater than zero.") - if isinstance(init_model, string_type): + if isinstance(init_model, str): predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) elif isinstance(init_model, Booster): predictor = init_model._to_predictor(dict(init_model.params, **params)) @@ -182,7 +179,7 @@ def train(params, train_set, num_boost_round=100, if valid_sets is not None: if isinstance(valid_sets, Dataset): valid_sets = [valid_sets] - if isinstance(valid_names, string_type): + if isinstance(valid_names, str): valid_names = [valid_names] for i, valid_data in enumerate(valid_sets): # reduce cost for prediction training data @@ -209,7 +206,7 @@ def train(params, train_set, num_boost_round=100, # Most of legacy advanced options becomes callbacks if verbose_eval is True: callbacks.add(callback.print_evaluation()) - elif isinstance(verbose_eval, integer_types): + elif isinstance(verbose_eval, int): callbacks.add(callback.print_evaluation(verbose_eval)) if early_stopping_rounds is not None and early_stopping_rounds > 0: @@ -231,7 +228,7 @@ def train(params, train_set, num_boost_round=100, booster = Booster(params=params, train_set=train_set) if is_valid_contain_train: booster.set_train_data_name(train_data_name) - for valid_set, name_valid_set in zip_(reduced_valid_sets, name_valid_sets): + for valid_set, name_valid_set in zip(reduced_valid_sets, name_valid_sets): booster.add_valid(valid_set, name_valid_set) finally: train_set._reverse_update_params() @@ -240,7 +237,7 @@ def train(params, train_set, num_boost_round=100, booster.best_iteration = 0 # start training - for i in range_(init_iteration, init_iteration + num_boost_round): + for i in range(init_iteration, init_iteration + num_boost_round): for cb in callbacks_before_iter: cb(callback.CallbackEnv(model=booster, params=params, @@ -277,7 +274,7 @@ def train(params, train_set, num_boost_round=100, return booster -class CVBooster(object): +class CVBooster: """CVBooster in LightGBM. Auxiliary data structure to hold and redirect all boosters of ``cv`` function. @@ -328,7 +325,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi group_info = full_data.get_group() if group_info is not None: group_info = np.array(group_info, dtype=np.int32, copy=False) - flatted_group = np.repeat(range_(len(group_info)), repeats=group_info) + flatted_group = np.repeat(range(len(group_info)), repeats=group_info) else: flatted_group = np.zeros(num_data, dtype=np.int32) folds = folds.split(X=np.zeros(num_data), y=full_data.get_label(), groups=flatted_group) @@ -340,7 +337,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi raise LightGBMError('Scikit-learn is required for ranking cv.') # ranking task, split according to groups group_info = np.array(full_data.get_group(), dtype=np.int32, copy=False) - flatted_group = np.repeat(range_(len(group_info)), repeats=group_info) + flatted_group = np.repeat(range(len(group_info)), repeats=group_info) group_kfold = _LGBMGroupKFold(n_splits=nfold) folds = group_kfold.split(X=np.zeros(num_data), groups=flatted_group) elif stratified: @@ -354,9 +351,9 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi else: randidx = np.arange(num_data) kstep = int(num_data / nfold) - test_id = [randidx[i: i + kstep] for i in range_(0, num_data, kstep)] - train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)] - folds = zip_(train_id, test_id) + test_id = [randidx[i: i + kstep] for i in range(0, num_data, kstep)] + train_id = [np.concatenate([test_id[i] for i in range(nfold) if k != i]) for k in range(nfold)] + folds = zip(train_id, test_id) ret = CVBooster() for train_idx, test_idx in folds: @@ -539,7 +536,7 @@ def cv(params, train_set, num_boost_round=100, if num_boost_round <= 0: raise ValueError("num_boost_round should be greater than zero.") - if isinstance(init_model, string_type): + if isinstance(init_model, str): predictor = _InnerPredictor(model_file=init_model, pred_parameter=params) elif isinstance(init_model, Booster): predictor = init_model._to_predictor(dict(init_model.params, **params)) @@ -573,7 +570,7 @@ def cv(params, train_set, num_boost_round=100, callbacks.add(callback.early_stopping(early_stopping_rounds, first_metric_only, verbose=False)) if verbose_eval is True: callbacks.add(callback.print_evaluation(show_stdv=show_stdv)) - elif isinstance(verbose_eval, integer_types): + elif isinstance(verbose_eval, int): callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv)) callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)} @@ -581,7 +578,7 @@ def cv(params, train_set, num_boost_round=100, callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order')) callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order')) - for i in range_(num_boost_round): + for i in range(num_boost_round): for cb in callbacks_before_iter: cb(callback.CallbackEnv(model=cvfolds, params=params, diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index ae57a449cb74..03dbc1e86818 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -1,7 +1,5 @@ # coding: utf-8 """Plotting library.""" -from __future__ import absolute_import, division - import warnings from copy import deepcopy from io import BytesIO @@ -9,8 +7,7 @@ import numpy as np from .basic import Booster -from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, - range_, zip_, string_type) +from .compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED from .sklearn import LGBMModel @@ -22,7 +19,7 @@ def _check_not_tuple_of_2_elements(obj, obj_name='obj'): def _float2str(value, precision=None): return ("{0:.{1}f}".format(value, precision) - if precision is not None and not isinstance(value, string_type) + if precision is not None and not isinstance(value, str) else str(value)) @@ -97,12 +94,12 @@ def plot_importance(booster, ax=None, height=0.2, if not len(importance): raise ValueError("Booster's feature_importance is empty.") - tuples = sorted(zip_(feature_name, importance), key=lambda x: x[1]) + tuples = sorted(zip(feature_name, importance), key=lambda x: x[1]) if ignore_zero: tuples = [x for x in tuples if x[1] > 0] if max_num_features is not None and max_num_features > 0: tuples = tuples[-max_num_features:] - labels, values = zip_(*tuples) + labels, values = zip(*tuples) if ax is None: if figsize is not None: @@ -112,7 +109,7 @@ def plot_importance(booster, ax=None, height=0.2, ylocs = np.arange(len(values)) ax.barh(ylocs, values, align='center', height=height, **kwargs) - for x, y in zip_(values, ylocs): + for x, y in zip(values, ylocs): ax.text(x + 1, y, _float2str(x, precision) if importance_type == 'gain' else x, va='center') @@ -238,7 +235,7 @@ def plot_split_value_histogram(booster, feature, bins=None, ax=None, width_coef= if title is not None: title = title.replace('@feature@', str(feature)) - title = title.replace('@index/name@', ('name' if isinstance(feature, string_type) else 'index')) + title = title.replace('@index/name@', ('name' if isinstance(feature, str) else 'index')) ax.set_title(title) if xlabel is not None: ax.set_xlabel(xlabel) @@ -337,7 +334,7 @@ def plot_metric(booster, metric=None, dataset_names=None, raise KeyError('No given metric in eval results.') results = metrics_for_one[metric] num_iteration, max_result, min_result = len(results), max(results), min(results) - x_ = range_(num_iteration) + x_ = range(num_iteration) ax.plot(x_, results, label=name) for name in dataset_names: diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index ed3ccea6c238..45c3d04e77f6 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -1,10 +1,10 @@ # coding: utf-8 """Scikit-learn wrapper interface for LightGBM.""" -from __future__ import absolute_import - import copy import warnings +from inspect import signature + import numpy as np from .basic import Dataset, LightGBMError, _ConfigAliases @@ -12,11 +12,11 @@ LGBMNotFittedError, _LGBMLabelEncoder, _LGBMModelBase, _LGBMRegressorBase, _LGBMCheckXY, _LGBMCheckArray, _LGBMCheckSampleWeight, _LGBMAssertAllFinite, _LGBMCheckClassificationTargets, _LGBMComputeSampleWeight, - argc_, range_, zip_, string_type, DataFrame, DataTable) + DataFrame, DataTable) from .engine import train -class _ObjectiveFunctionWrapper(object): +class _ObjectiveFunctionWrapper: """Proxy class for objective function.""" def __init__(self, func): @@ -69,7 +69,7 @@ def __call__(self, preds, dataset): The value of the second order derivative (Hessian) for each sample point. """ labels = dataset.get_label() - argc = argc_(self.func) + argc = len(signature(self.func).parameters) if argc == 2: grad, hess = self.func(labels, preds) elif argc == 3: @@ -88,15 +88,15 @@ def __call__(self, preds, dataset): num_class = len(grad) // num_data if num_class * num_data != len(grad): raise ValueError("Length of grad and hess should equal to num_class * num_data") - for k in range_(num_class): - for i in range_(num_data): + for k in range(num_class): + for i in range(num_data): idx = k * num_data + i grad[idx] *= weight[i] hess[idx] *= weight[i] return grad, hess -class _EvalFunctionWrapper(object): +class _EvalFunctionWrapper: """Proxy class for evaluation function.""" def __init__(self, func): @@ -158,7 +158,7 @@ def __call__(self, preds, dataset): Is eval result higher better, e.g. AUC is ``is_higher_better``. """ labels = dataset.get_label() - argc = argc_(self.func) + argc = len(signature(self.func).parameters) if argc == 2: return self.func(labels, preds) elif argc == 3: @@ -340,7 +340,7 @@ def get_params(self, deep=True): params : dict Parameter names mapped to their values. """ - params = super(LGBMModel, self).get_params(deep=deep) + params = super().get_params(deep=deep) params.update(self._other_params) return params @@ -518,10 +518,10 @@ def fit(self, X, y, # Separate built-in from callable evaluation metrics eval_metrics_callable = [_EvalFunctionWrapper(f) for f in eval_metric_list if callable(f)] - eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, string_type)] + eval_metrics_builtin = [m for m in eval_metric_list if isinstance(m, str)] # register default metric for consistency with callable eval_metric case - original_metric = self._objective if isinstance(self._objective, string_type) else None + original_metric = self._objective if isinstance(self._objective, str) else None if original_metric is None: # try to deduce from class instance if isinstance(self, LGBMRegressor): @@ -537,7 +537,7 @@ def fit(self, X, y, original_metric = params.pop(metric_alias) # concatenate metric from params (or default if not provided in params) and eval_metric - original_metric = [original_metric] if isinstance(original_metric, (string_type, type(None))) else original_metric + original_metric = [original_metric] if isinstance(original_metric, (str, type(None))) else original_metric params['metric'] = [e for e in eval_metrics_builtin if e not in original_metric] + original_metric params['metric'] = [metric for metric in params['metric'] if metric is not None] @@ -767,16 +767,11 @@ def fit(self, X, y, verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None, init_model=None): """Docstring is inherited from the LGBMModel.""" - super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight, - init_score=init_score, eval_set=eval_set, - eval_names=eval_names, - eval_sample_weight=eval_sample_weight, - eval_init_score=eval_init_score, - eval_metric=eval_metric, - early_stopping_rounds=early_stopping_rounds, - verbose=verbose, feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, init_model=init_model) + super().fit(X, y, sample_weight=sample_weight, init_score=init_score, + eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_init_score=eval_init_score, eval_metric=eval_metric, + early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, + categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) return self _base_doc = LGBMModel.fit.__doc__ @@ -803,7 +798,7 @@ def fit(self, X, y, _LGBMCheckClassificationTargets(y) self._le = _LGBMLabelEncoder().fit(y) _y = self._le.transform(y) - self._class_map = dict(zip_(self._le.classes_, self._le.transform(self._le.classes_))) + self._class_map = dict(zip(self._le.classes_, self._le.transform(self._le.classes_))) if isinstance(self.class_weight, dict): self._class_weight = {self._class_map[k]: v for k, v in self.class_weight.items()} @@ -817,7 +812,7 @@ def fit(self, X, y, self._objective = "multiclass" if not callable(eval_metric): - if isinstance(eval_metric, (string_type, type(None))): + if isinstance(eval_metric, (str, type(None))): eval_metric = [eval_metric] if self._n_classes > 2: for index, metric in enumerate(eval_metric): @@ -844,17 +839,12 @@ def fit(self, X, y, else: valid_sets[i] = (valid_x, self._le.transform(valid_y)) - super(LGBMClassifier, self).fit(X, _y, sample_weight=sample_weight, - init_score=init_score, eval_set=valid_sets, - eval_names=eval_names, - eval_sample_weight=eval_sample_weight, - eval_class_weight=eval_class_weight, - eval_init_score=eval_init_score, - eval_metric=eval_metric, - early_stopping_rounds=early_stopping_rounds, - verbose=verbose, feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, init_model=init_model) + super().fit(X, _y, sample_weight=sample_weight, init_score=init_score, eval_set=valid_sets, + eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_class_weight=eval_class_weight, eval_init_score=eval_init_score, + eval_metric=eval_metric, early_stopping_rounds=early_stopping_rounds, + verbose=verbose, feature_name=feature_name, categorical_feature=categorical_feature, + callbacks=callbacks, init_model=init_model) return self _base_doc = LGBMModel.fit.__doc__ @@ -919,8 +909,7 @@ def predict_proba(self, X, raw_score=False, start_iteration=0, num_iteration=Non X_SHAP_values : array-like of shape = [n_samples, (n_features + 1) * n_classes] or list with n_classes length of such objects If ``pred_contrib=True``, the feature contributions for each sample. """ - result = super(LGBMClassifier, self).predict(X, raw_score, start_iteration, num_iteration, - pred_leaf, pred_contrib, **kwargs) + result = super().predict(X, raw_score, start_iteration, num_iteration, pred_leaf, pred_contrib, **kwargs) if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib): warnings.warn("Cannot compute class probabilities or labels " "due to the usage of customized objective function.\n" @@ -967,23 +956,18 @@ def fit(self, X, y, elif len(eval_group) != len(eval_set): raise ValueError("Length of eval_group should be equal to eval_set") elif (isinstance(eval_group, dict) - and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group))) + and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group))) or isinstance(eval_group, list) and any(group is None for group in eval_group)): raise ValueError("Should set group for all eval datasets for ranking task; " "if you use dict, the index should start from 0") self._eval_at = eval_at - super(LGBMRanker, self).fit(X, y, sample_weight=sample_weight, - init_score=init_score, group=group, - eval_set=eval_set, eval_names=eval_names, - eval_sample_weight=eval_sample_weight, - eval_init_score=eval_init_score, eval_group=eval_group, - eval_metric=eval_metric, - early_stopping_rounds=early_stopping_rounds, - verbose=verbose, feature_name=feature_name, - categorical_feature=categorical_feature, - callbacks=callbacks, init_model=init_model) + super().fit(X, y, sample_weight=sample_weight, init_score=init_score, group=group, + eval_set=eval_set, eval_names=eval_names, eval_sample_weight=eval_sample_weight, + eval_init_score=eval_init_score, eval_group=eval_group, eval_metric=eval_metric, + early_stopping_rounds=early_stopping_rounds, verbose=verbose, feature_name=feature_name, + categorical_feature=categorical_feature, callbacks=callbacks, init_model=init_model) return self _base_doc = LGBMModel.fit.__doc__ diff --git a/python-package/setup.py b/python-package/setup.py index bc05628fe3e5..140c679bddd3 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -1,8 +1,5 @@ # coding: utf-8 """Setup lightgbm package.""" -from __future__ import absolute_import - -import io import logging import os import struct @@ -329,8 +326,8 @@ def run(self): copy_file(os.path.join(CURRENT_DIR, os.path.pardir, 'VERSION.txt'), os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), verbose=0) - version = io.open(os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), encoding='utf-8').read().strip() - readme = io.open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read() + version = open(os.path.join(CURRENT_DIR, 'lightgbm', 'VERSION.txt'), encoding='utf-8').read().strip() + readme = open(os.path.join(CURRENT_DIR, 'README.rst'), encoding='utf-8').read() sys.path.insert(0, CURRENT_DIR) @@ -368,8 +365,6 @@ def run(self): 'Operating System :: Microsoft :: Windows', 'Operating System :: POSIX', 'Operating System :: Unix', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', diff --git a/tests/python_package_test/test_consistency.py b/tests/python_package_test/test_consistency.py index 63a5834cf619..782dac4368e3 100644 --- a/tests/python_package_test/test_consistency.py +++ b/tests/python_package_test/test_consistency.py @@ -7,7 +7,7 @@ from sklearn.datasets import load_svmlight_file -class FileLoader(object): +class FileLoader: def __init__(self, directory, prefix, config_file='train.conf'): directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), directory) diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py index de8689fd3ea5..84f5a8cc1071 100644 --- a/tests/python_package_test/test_engine.py +++ b/tests/python_package_test/test_engine.py @@ -3,6 +3,7 @@ import itertools import math import os +import pickle import psutil import random import unittest @@ -14,11 +15,6 @@ from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, roc_auc_score, average_precision_score from sklearn.model_selection import train_test_split, TimeSeriesSplit, GroupKFold -try: - import cPickle as pickle -except ImportError: - import pickle - from .utils import load_boston, load_breast_cancer, load_digits, load_iris diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py index f0b160d60dfb..758f34d6e76f 100644 --- a/tests/python_package_test/utils.py +++ b/tests/python_package_test/utils.py @@ -1,23 +1,7 @@ # coding: utf-8 -import sklearn.datasets +from functools import lru_cache -try: - from functools import lru_cache -except ImportError: - import warnings - warnings.warn("Could not import functools.lru_cache", RuntimeWarning) - - def lru_cache(maxsize=None): - cache = {} - - def _lru_wrapper(user_function): - def wrapper(*args, **kwargs): - arg_key = (args, tuple(kwargs.items())) - if arg_key not in cache: - cache[arg_key] = user_function(*args, **kwargs) - return cache[arg_key] - return wrapper - return _lru_wrapper +import sklearn.datasets @lru_cache(maxsize=None)