From 50561fe1b51d69019d23f9fe77f96dece9f798ac Mon Sep 17 00:00:00 2001
From: Jeroen van Zundert <mail@jeroenvanzundert.nl>
Date: Wed, 20 Jul 2022 22:43:20 +0200
Subject: [PATCH] Add pylint dependency, fix a couple of files (partially) to
 show impact

---
 py-polars/build.requirements.txt          |  1 +
 py-polars/polars/__init__.py              |  4 +-
 py-polars/polars/convert.py               | 16 +++---
 py-polars/polars/datatypes.py             | 24 ++++----
 py-polars/polars/datatypes_constructor.py |  6 +-
 py-polars/polars/io.py                    | 13 ++---
 py-polars/polars/testing.py               | 42 +++++++-------
 py-polars/polars/utils.py                 | 69 +++++++++++------------
 py-polars/pyproject.toml                  | 11 ++++
 9 files changed, 97 insertions(+), 89 deletions(-)

diff --git a/py-polars/build.requirements.txt b/py-polars/build.requirements.txt
index f08444a2cd65..ea78e8274f50 100644
--- a/py-polars/build.requirements.txt
+++ b/py-polars/build.requirements.txt
@@ -20,6 +20,7 @@ isort~=5.10.1
 mypy==0.961
 ghp-import==2.1.0
 flake8==4.0.1
+pylint==2.14.5
 sphinx==4.2.0
 pydata-sphinx-theme==0.6.3
 sphinx-panels==0.6.0
diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py
index d3faab2b6352..d770d8f1b064 100644
--- a/py-polars/polars/__init__.py
+++ b/py-polars/polars/__init__.py
@@ -1,3 +1,4 @@
+import os
 import warnings
 
 try:
@@ -10,7 +11,7 @@ def version() -> str:
     # this is only useful for documentation
     warnings.warn("polars binary missing!")
 
-import polars.testing as testing
+from polars import testing
 from polars.cfg import Config, toggle_string_cache  # We do not export in __all__
 from polars.convert import (
     from_arrow,
@@ -238,6 +239,5 @@ def version() -> str:
 
 __version__ = version()
 
-import os
 
 os.environ["POLARS_ALLOW_EXTENSION"] = "true"
diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py
index 2c7e0638f578..d7c2c0ad40fa 100644
--- a/py-polars/polars/convert.py
+++ b/py-polars/polars/convert.py
@@ -165,8 +165,8 @@ def from_records(
             DeprecationWarning,
         )
         return DataFrame._from_numpy(data, columns=columns, orient=orient)
-    else:
-        return DataFrame._from_records(data, columns=columns, orient=orient)
+
+    return DataFrame._from_records(data, columns=columns, orient=orient)
 
 
 def from_numpy(
@@ -281,10 +281,10 @@ def from_arrow(
         raise ImportError("'pyarrow' is required when using from_arrow().")
     if isinstance(a, pa.Table):
         return DataFrame._from_arrow(a, rechunk=rechunk)
-    elif isinstance(a, (pa.Array, pa.ChunkedArray)):
+    if isinstance(a, (pa.Array, pa.ChunkedArray)):
         return Series._from_arrow("", a, rechunk)
-    else:
-        raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.")
+
+    raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.")
 
 
 @overload
@@ -369,7 +369,7 @@ def from_pandas(
 
     if isinstance(df, (pd.Series, pd.DatetimeIndex)):
         return Series._from_pandas("", df, nan_to_none=nan_to_none)
-    elif isinstance(df, pd.DataFrame):
+    if isinstance(df, pd.DataFrame):
         return DataFrame._from_pandas(df, rechunk=rechunk, nan_to_none=nan_to_none)
-    else:
-        raise ValueError(f"Expected pandas DataFrame or Series, got {type(df)}.")
+
+    raise ValueError(f"Expected pandas DataFrame or Series, got {type(df)}.")
diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py
index 2b7e6ec90175..ea86a9713efa 100644
--- a/py-polars/polars/datatypes.py
+++ b/py-polars/polars/datatypes.py
@@ -126,13 +126,13 @@ def __eq__(self, other: type[DataType]) -> bool:  # type: ignore[override]
         # List[i64] == List[f32] == False
 
         # allow comparing object instances to class
-        if type(other) is type and issubclass(other, List):
+        if isinstance(other, type) and issubclass(other, List):  # type: ignore[redundant-expr]
             return True
         if isinstance(other, List):
             if self.inner is None or other.inner is None:
                 return True
-            else:
-                return self.inner == other.inner
+
+            return self.inner == other.inner
         else:
             return False
 
@@ -421,26 +421,26 @@ def dtype_to_ctype(dtype: PolarsDataType) -> type[_SimpleCData]:
     try:
         return _DTYPE_TO_CTYPE[dtype]
     except KeyError:  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError from None
 
 
 def dtype_to_ffiname(dtype: PolarsDataType) -> str:
     try:
         return _DTYPE_TO_FFINAME[dtype]
     except KeyError:  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError from None
 
 
 def dtype_to_py_type(dtype: PolarsDataType) -> type:
     try:
         return _DTYPE_TO_PY_TYPE[dtype]
     except KeyError:  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError from None
 
 
 def is_polars_dtype(data_type: Any) -> bool:
     return (
-        type(data_type) is type
+        isinstance(data_type, type)
         and issubclass(data_type, DataType)
         or isinstance(data_type, DataType)
     )
@@ -453,7 +453,7 @@ def py_type_to_dtype(data_type: Any) -> type[DataType]:
     try:
         return _PY_TYPE_TO_DTYPE[data_type]
     except KeyError:  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError from None
 
 
 def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType:
@@ -463,7 +463,7 @@ def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType:
     try:
         return _PY_TYPE_TO_ARROW_TYPE[dtype]
     except KeyError:  # pragma: no cover
-        raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.")
+        raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") from None
 
 
 def dtype_to_arrow_type(dtype: PolarsDataType) -> pa.lib.DataType:
@@ -473,7 +473,7 @@ def dtype_to_arrow_type(dtype: PolarsDataType) -> pa.lib.DataType:
     try:
         return _DTYPE_TO_ARROW_TYPE[dtype]
     except KeyError:  # pragma: no cover
-        raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.")
+        raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") from None
 
 
 def supported_numpy_char_code(dtype: str) -> bool:
@@ -484,7 +484,7 @@ def numpy_char_code_to_dtype(dtype: str) -> type[DataType]:
     try:
         return _NUMPY_CHAR_CODE_TO_DTYPE[dtype]
     except KeyError:  # pragma: no cover
-        raise NotImplementedError
+        raise NotImplementedError from None
 
 
 def maybe_cast(
@@ -495,7 +495,7 @@ def maybe_cast(
 
     if isinstance(el, datetime):
         return _datetime_to_pl_timestamp(el, time_unit)
-    elif isinstance(el, timedelta):
+    if isinstance(el, timedelta):
         return _timedelta_to_pl_timedelta(el, time_unit)
     py_type = dtype_to_py_type(dtype)
     if not isinstance(el, py_type):
diff --git a/py-polars/polars/datatypes_constructor.py b/py-polars/polars/datatypes_constructor.py
index 72ba02396435..dd4ec1390173 100644
--- a/py-polars/polars/datatypes_constructor.py
+++ b/py-polars/polars/datatypes_constructor.py
@@ -73,7 +73,7 @@ def polars_type_to_constructor(
     try:
         return _POLARS_TYPE_TO_CONSTRUCTOR[dtype]
     except KeyError:  # pragma: no cover
-        raise ValueError(f"Cannot construct PySeries for type {dtype}.")
+        raise ValueError(f"Cannot construct PySeries for type {dtype}.") from None
 
 
 if _NUMPY_AVAILABLE and not _DOCUMENTING:
@@ -101,8 +101,8 @@ def numpy_type_to_constructor(dtype: type[np.dtype]) -> Callable[..., PySeries]:
         return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype]
     except KeyError:
         return PySeries.new_object
-    except NameError:  # pragma: no cover
-        raise ImportError("'numpy' is required for this functionality.")
+    except NameError as err:  # pragma: no cover
+        raise ImportError("'numpy' is required for this functionality.") from err
 
 
 if not _DOCUMENTING:
diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py
index db589f2e02a2..acc1b7de4a95 100644
--- a/py-polars/polars/io.py
+++ b/py-polars/polars/io.py
@@ -4,8 +4,6 @@
 from pathlib import Path
 from typing import Any, BinaryIO, Callable, Mapping, TextIO, cast
 
-from polars.utils import format_path, handle_projection_columns
-
 try:
     import pyarrow as pa
 
@@ -22,6 +20,7 @@
 from polars.datatypes import DataType, Utf8
 from polars.internals import DataFrame, LazyFrame, _scan_ds
 from polars.internals.io import _prepare_file_arg
+from polars.utils import format_path, handle_projection_columns
 
 try:
     import connectorx as cx
@@ -967,10 +966,10 @@ def read_sql(
             protocol=protocol,
         )
         return cast(DataFrame, from_arrow(tbl))
-    else:
-        raise ImportError(
-            "connectorx is not installed. Please run `pip install connectorx>=0.2.2`."
-        )
+
+    raise ImportError(
+        "connectorx is not installed. Please run `pip install connectorx>=0.2.2`."
+    )
 
 
 def read_excel(
@@ -1060,7 +1059,7 @@ def read_excel(
     except ImportError:
         raise ImportError(
             "xlsx2csv is not installed. Please run `pip install xlsx2csv`."
-        )
+        ) from None
 
     if isinstance(file, (str, Path)):
         file = format_path(file)
diff --git a/py-polars/polars/testing.py b/py-polars/polars/testing.py
index 43e9b863468f..f29b08e96f49 100644
--- a/py-polars/polars/testing.py
+++ b/py-polars/polars/testing.py
@@ -275,11 +275,11 @@ def _getattr_multi(obj: object, op: str) -> Any:
     get the attribute "str", and then the attribute "lengths"
     """
     op_list = op.split(".")
-    return reduce(lambda o, m: getattr(o, m), op_list, obj)
+    return reduce(getattr, op_list, obj)
 
 
 def verify_series_and_expr_api(
-    input: Series, expected: Series | None, op: str, *args: Any, **kwargs: Any
+    result: Series, expected: Series | None, op: str, *args: Any, **kwargs: Any
 ) -> None:
     """
     Small helper function to test element-wise functions for both the series and expressions api.
@@ -291,8 +291,8 @@ def verify_series_and_expr_api(
     >>> verify_series_and_expr_api(s, expected, "sort")
     """
     expr = _getattr_multi(col("*"), op)(*args, **kwargs)
-    result_expr: Series = input.to_frame().select(expr)[:, 0]  # type: ignore[assignment]
-    result_series = _getattr_multi(input, op)(*args, **kwargs)
+    result_expr: Series = result.to_frame().select(expr)[:, 0]  # type: ignore[assignment]
+    result_series = _getattr_multi(result, op)(*args, **kwargs)
     if expected is None:
         assert_series_equal(result_series, result_expr)
     else:
@@ -305,7 +305,7 @@ def is_categorical_dtype(data_type: Any) -> bool:
     Check if the input is a polars Categorical dtype.
     """
     return (
-        type(data_type) is type
+        isinstance(data_type, type)
         and issubclass(data_type, Categorical)
         or isinstance(data_type, Categorical)
     )
@@ -409,23 +409,23 @@ def __post_init__(self) -> None:
                     raise InvalidArgument(
                         f"No strategy (currently) available for {self.dtype} type"
                     )
+
+                # given a custom strategy, but no explicit dtype. infer one
+                # from the first non-None value that the strategy produces.
+                with warnings.catch_warnings():
+                    # note: usually you should not call "example()" outside of an interactive shell, hence
+                    # the warning. however, here it is reasonable to do so, so we catch and ignore it
+                    warnings.simplefilter("ignore", NonInteractiveExampleWarning)
+                    sample_value_iter = (self.strategy.example() for _ in range(100))  # type: ignore[union-attr]
+                    sample_value_type = type(
+                        next(e for e in sample_value_iter if e is not None)
+                    )
+                if sample_value_type is not None:
+                    self.dtype = py_type_to_dtype(sample_value_type)
                 else:
-                    # given a custom strategy, but no explicit dtype. infer one
-                    # from the first non-None value that the strategy produces.
-                    with warnings.catch_warnings():
-                        # note: usually you should not call "example()" outside of an interactive shell, hence
-                        # the warning. however, here it is reasonable to do so, so we catch and ignore it
-                        warnings.simplefilter("ignore", NonInteractiveExampleWarning)
-                        sample_value_iter = (self.strategy.example() for _ in range(100))  # type: ignore[union-attr]
-                        sample_value_type = type(
-                            next(e for e in sample_value_iter if e is not None)
-                        )
-                    if sample_value_type is not None:
-                        self.dtype = py_type_to_dtype(sample_value_type)
-                    else:
-                        raise InvalidArgument(
-                            f"Unable to determine dtype for strategy {self.dtype} type"
-                        )
+                    raise InvalidArgument(
+                        f"Unable to determine dtype for strategy {self.dtype} type"
+                    )
 
     def columns(
         cols: int | Sequence[str] | None = None,
diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py
index 04fc4826e267..2b8d99a9b27b 100644
--- a/py-polars/polars/utils.py
+++ b/py-polars/polars/utils.py
@@ -36,19 +36,19 @@ def _process_null_values(
 ) -> None | str | list[str] | list[tuple[str, str]]:
     if isinstance(null_values, dict):
         return list(null_values.items())
-    else:
-        return null_values
+
+    return null_values
 
 
 # https://stackoverflow.com/questions/4355524/getting-data-from-ctypes-array-into-numpy
-def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:
+def _ptr_to_numpy(ptr: int, length: int, ptr_type: Any) -> np.ndarray:
     """
 
     Parameters
     ----------
     ptr
         C/Rust ptr casted to usize.
-    len
+    length
         Length of the array values.
     ptr_type
         Example:
@@ -62,7 +62,7 @@ def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray:
     if not _NUMPY_AVAILABLE:
         raise ImportError("'numpy' is required for this functionality.")
     ptr_ctype = ctypes.cast(ptr, ctypes.POINTER(ptr_type))
-    return np.ctypeslib.as_array(ptr_ctype, (len,))
+    return np.ctypeslib.as_array(ptr_ctype, (length,))
 
 
 def _timedelta_to_pl_duration(td: timedelta) -> str:
@@ -77,37 +77,32 @@ def timedelta_in_nanoseconds_window(td: timedelta) -> bool:
     return in_nanoseconds_window(datetime(1970, 1, 1) + td)
 
 
+def _seconds_scalar(tu: str) -> float:
+    scalar = dict(ns=1e9, us=1e6, ms=1e3)
+    try:
+        return scalar[tu]
+    except KeyError:
+        raise ValueError("expected one of {'ns', 'us', 'ms'}") from None
+
+
 def _datetime_to_pl_timestamp(dt: datetime, tu: str | None) -> int:
     """
     Converts a python datetime to a timestamp in nanoseconds
     """
-    if tu == "ns":
-        return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e9)
-    elif tu == "us":
-        return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e6)
-    elif tu == "ms":
-        return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e3)
     if tu is None:
         # python has us precision
-        return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e6)
-    else:
-        raise ValueError("expected one of {'ns', 'us', 'ms'}")
+        tu = "us"
+
+    s = _seconds_scalar(tu)
+    return int(dt.replace(tzinfo=timezone.utc).timestamp() * s)
 
 
 def _timedelta_to_pl_timedelta(td: timedelta, tu: str | None = None) -> int:
-    if tu == "ns":
-        return int(td.total_seconds() * 1e9)
-    elif tu == "us":
-        return int(td.total_seconds() * 1e6)
-    elif tu == "ms":
-        return int(td.total_seconds() * 1e3)
     if tu is None:
-        if timedelta_in_nanoseconds_window(td):
-            return int(td.total_seconds() * 1e9)
-        else:
-            return int(td.total_seconds() * 1e3)
-    else:
-        raise ValueError("expected one of {'ns', 'us, 'ms'}")
+        tu = "ns" if timedelta_in_nanoseconds_window(td) else "ms"
+
+    s = _seconds_scalar(tu)
+    return int(td.total_seconds() * s)
 
 
 def _date_to_pl_date(d: date) -> int:
@@ -170,15 +165,17 @@ def _to_python_time(value: int) -> time:
     return time(hour=hours, minute=minutes, second=seconds, microsecond=microsecond)
 
 
-def _to_python_timedelta(value: int | float, tu: str | None = "ns") -> timedelta:
+def _to_python_timedelta(value: int | float, tu: str = "ns") -> timedelta:
     if tu == "ns":
         return timedelta(microseconds=value // 1e3)
-    elif tu == "us":
+    if tu == "us":
         return timedelta(microseconds=value)
-    elif tu == "ms":
+    if tu == "ms":
         return timedelta(milliseconds=value)
-    else:
-        raise ValueError(f"time unit: {tu} not expected")
+
+    raise ValueError(
+        f"time unit: {tu} not expected, expected one of {'ns', 'us', 'ms'}"
+    )
 
 
 def _prepare_row_count_args(
@@ -187,8 +184,7 @@ def _prepare_row_count_args(
 ) -> tuple[str, int] | None:
     if row_count_name is not None:
         return (row_count_name, row_count_offset)
-    else:
-        return None
+    return None
 
 
 EPOCH = datetime(1970, 1, 1).replace(tzinfo=None)
@@ -205,7 +201,8 @@ def _to_python_datetime(
         # important to create from utc. Not doing this leads
         # to inconsistencies dependent on the timezone you are in.
         return datetime.utcfromtimestamp(value * 3600 * 24).date()
-    elif dtype == Datetime:
+
+    if dtype == Datetime:
         if tu == "ns":
             # nanoseconds to seconds
             dt = EPOCH + timedelta(microseconds=value / 1000)
@@ -219,8 +216,8 @@ def _to_python_datetime(
         if tz is not None and len(tz) > 0:
             import pytz
 
-            timezone = pytz.timezone(tz)
-            return timezone.localize(dt)
+            return pytz.timezone(tz).localize(dt)
+
         return dt
 
     else:
diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
index 02f830e8dec6..6b3ab93fd4ab 100644
--- a/py-polars/pyproject.toml
+++ b/py-polars/pyproject.toml
@@ -59,3 +59,14 @@ exclude_lines = [
   "if TYPE_CHECKING:",
   "from typing_extensions import ",
 ]
+
+[tool.pylint]
+max-line-length = 200  # disable, leave to black
+disable = [
+  "missing-docstring",
+  "fixme",
+  "invalid-name",
+  "import-outside-toplevel",
+  "redefined-builtin",
+  "too-few-public-methods",
+]
\ No newline at end of file