From 50561fe1b51d69019d23f9fe77f96dece9f798ac Mon Sep 17 00:00:00 2001 From: Jeroen van Zundert Date: Wed, 20 Jul 2022 22:43:20 +0200 Subject: [PATCH] Add pylint dependency, fix a couple of files (partially) to show impact --- py-polars/build.requirements.txt | 1 + py-polars/polars/__init__.py | 4 +- py-polars/polars/convert.py | 16 +++--- py-polars/polars/datatypes.py | 24 ++++---- py-polars/polars/datatypes_constructor.py | 6 +- py-polars/polars/io.py | 13 ++--- py-polars/polars/testing.py | 42 +++++++------- py-polars/polars/utils.py | 69 +++++++++++------------ py-polars/pyproject.toml | 11 ++++ 9 files changed, 97 insertions(+), 89 deletions(-) diff --git a/py-polars/build.requirements.txt b/py-polars/build.requirements.txt index f08444a2cd65..ea78e8274f50 100644 --- a/py-polars/build.requirements.txt +++ b/py-polars/build.requirements.txt @@ -20,6 +20,7 @@ isort~=5.10.1 mypy==0.961 ghp-import==2.1.0 flake8==4.0.1 +pylint==2.14.5 sphinx==4.2.0 pydata-sphinx-theme==0.6.3 sphinx-panels==0.6.0 diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py index d3faab2b6352..d770d8f1b064 100644 --- a/py-polars/polars/__init__.py +++ b/py-polars/polars/__init__.py @@ -1,3 +1,4 @@ +import os import warnings try: @@ -10,7 +11,7 @@ def version() -> str: # this is only useful for documentation warnings.warn("polars binary missing!") -import polars.testing as testing +from polars import testing from polars.cfg import Config, toggle_string_cache # We do not export in __all__ from polars.convert import ( from_arrow, @@ -238,6 +239,5 @@ def version() -> str: __version__ = version() -import os os.environ["POLARS_ALLOW_EXTENSION"] = "true" diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py index 2c7e0638f578..d7c2c0ad40fa 100644 --- a/py-polars/polars/convert.py +++ b/py-polars/polars/convert.py @@ -165,8 +165,8 @@ def from_records( DeprecationWarning, ) return DataFrame._from_numpy(data, columns=columns, orient=orient) - else: - return DataFrame._from_records(data, columns=columns, orient=orient) + + return DataFrame._from_records(data, columns=columns, orient=orient) def from_numpy( @@ -281,10 +281,10 @@ def from_arrow( raise ImportError("'pyarrow' is required when using from_arrow().") if isinstance(a, pa.Table): return DataFrame._from_arrow(a, rechunk=rechunk) - elif isinstance(a, (pa.Array, pa.ChunkedArray)): + if isinstance(a, (pa.Array, pa.ChunkedArray)): return Series._from_arrow("", a, rechunk) - else: - raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.") + + raise ValueError(f"Expected Arrow Table or Array, got {type(a)}.") @overload @@ -369,7 +369,7 @@ def from_pandas( if isinstance(df, (pd.Series, pd.DatetimeIndex)): return Series._from_pandas("", df, nan_to_none=nan_to_none) - elif isinstance(df, pd.DataFrame): + if isinstance(df, pd.DataFrame): return DataFrame._from_pandas(df, rechunk=rechunk, nan_to_none=nan_to_none) - else: - raise ValueError(f"Expected pandas DataFrame or Series, got {type(df)}.") + + raise ValueError(f"Expected pandas DataFrame or Series, got {type(df)}.") diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py index 2b7e6ec90175..ea86a9713efa 100644 --- a/py-polars/polars/datatypes.py +++ b/py-polars/polars/datatypes.py @@ -126,13 +126,13 @@ def __eq__(self, other: type[DataType]) -> bool: # type: ignore[override] # List[i64] == List[f32] == False # allow comparing object instances to class - if type(other) is type and issubclass(other, List): + if isinstance(other, type) and issubclass(other, List): # type: ignore[redundant-expr] return True if isinstance(other, List): if self.inner is None or other.inner is None: return True - else: - return self.inner == other.inner + + return self.inner == other.inner else: return False @@ -421,26 +421,26 @@ def dtype_to_ctype(dtype: PolarsDataType) -> type[_SimpleCData]: try: return _DTYPE_TO_CTYPE[dtype] except KeyError: # pragma: no cover - raise NotImplementedError + raise NotImplementedError from None def dtype_to_ffiname(dtype: PolarsDataType) -> str: try: return _DTYPE_TO_FFINAME[dtype] except KeyError: # pragma: no cover - raise NotImplementedError + raise NotImplementedError from None def dtype_to_py_type(dtype: PolarsDataType) -> type: try: return _DTYPE_TO_PY_TYPE[dtype] except KeyError: # pragma: no cover - raise NotImplementedError + raise NotImplementedError from None def is_polars_dtype(data_type: Any) -> bool: return ( - type(data_type) is type + isinstance(data_type, type) and issubclass(data_type, DataType) or isinstance(data_type, DataType) ) @@ -453,7 +453,7 @@ def py_type_to_dtype(data_type: Any) -> type[DataType]: try: return _PY_TYPE_TO_DTYPE[data_type] except KeyError: # pragma: no cover - raise NotImplementedError + raise NotImplementedError from None def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType: @@ -463,7 +463,7 @@ def py_type_to_arrow_type(dtype: type[Any]) -> pa.lib.DataType: try: return _PY_TYPE_TO_ARROW_TYPE[dtype] except KeyError: # pragma: no cover - raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") + raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") from None def dtype_to_arrow_type(dtype: PolarsDataType) -> pa.lib.DataType: @@ -473,7 +473,7 @@ def dtype_to_arrow_type(dtype: PolarsDataType) -> pa.lib.DataType: try: return _DTYPE_TO_ARROW_TYPE[dtype] except KeyError: # pragma: no cover - raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") + raise ValueError(f"Cannot parse dtype {dtype} into Arrow dtype.") from None def supported_numpy_char_code(dtype: str) -> bool: @@ -484,7 +484,7 @@ def numpy_char_code_to_dtype(dtype: str) -> type[DataType]: try: return _NUMPY_CHAR_CODE_TO_DTYPE[dtype] except KeyError: # pragma: no cover - raise NotImplementedError + raise NotImplementedError from None def maybe_cast( @@ -495,7 +495,7 @@ def maybe_cast( if isinstance(el, datetime): return _datetime_to_pl_timestamp(el, time_unit) - elif isinstance(el, timedelta): + if isinstance(el, timedelta): return _timedelta_to_pl_timedelta(el, time_unit) py_type = dtype_to_py_type(dtype) if not isinstance(el, py_type): diff --git a/py-polars/polars/datatypes_constructor.py b/py-polars/polars/datatypes_constructor.py index 72ba02396435..dd4ec1390173 100644 --- a/py-polars/polars/datatypes_constructor.py +++ b/py-polars/polars/datatypes_constructor.py @@ -73,7 +73,7 @@ def polars_type_to_constructor( try: return _POLARS_TYPE_TO_CONSTRUCTOR[dtype] except KeyError: # pragma: no cover - raise ValueError(f"Cannot construct PySeries for type {dtype}.") + raise ValueError(f"Cannot construct PySeries for type {dtype}.") from None if _NUMPY_AVAILABLE and not _DOCUMENTING: @@ -101,8 +101,8 @@ def numpy_type_to_constructor(dtype: type[np.dtype]) -> Callable[..., PySeries]: return _NUMPY_TYPE_TO_CONSTRUCTOR[dtype] except KeyError: return PySeries.new_object - except NameError: # pragma: no cover - raise ImportError("'numpy' is required for this functionality.") + except NameError as err: # pragma: no cover + raise ImportError("'numpy' is required for this functionality.") from err if not _DOCUMENTING: diff --git a/py-polars/polars/io.py b/py-polars/polars/io.py index db589f2e02a2..acc1b7de4a95 100644 --- a/py-polars/polars/io.py +++ b/py-polars/polars/io.py @@ -4,8 +4,6 @@ from pathlib import Path from typing import Any, BinaryIO, Callable, Mapping, TextIO, cast -from polars.utils import format_path, handle_projection_columns - try: import pyarrow as pa @@ -22,6 +20,7 @@ from polars.datatypes import DataType, Utf8 from polars.internals import DataFrame, LazyFrame, _scan_ds from polars.internals.io import _prepare_file_arg +from polars.utils import format_path, handle_projection_columns try: import connectorx as cx @@ -967,10 +966,10 @@ def read_sql( protocol=protocol, ) return cast(DataFrame, from_arrow(tbl)) - else: - raise ImportError( - "connectorx is not installed. Please run `pip install connectorx>=0.2.2`." - ) + + raise ImportError( + "connectorx is not installed. Please run `pip install connectorx>=0.2.2`." + ) def read_excel( @@ -1060,7 +1059,7 @@ def read_excel( except ImportError: raise ImportError( "xlsx2csv is not installed. Please run `pip install xlsx2csv`." - ) + ) from None if isinstance(file, (str, Path)): file = format_path(file) diff --git a/py-polars/polars/testing.py b/py-polars/polars/testing.py index 43e9b863468f..f29b08e96f49 100644 --- a/py-polars/polars/testing.py +++ b/py-polars/polars/testing.py @@ -275,11 +275,11 @@ def _getattr_multi(obj: object, op: str) -> Any: get the attribute "str", and then the attribute "lengths" """ op_list = op.split(".") - return reduce(lambda o, m: getattr(o, m), op_list, obj) + return reduce(getattr, op_list, obj) def verify_series_and_expr_api( - input: Series, expected: Series | None, op: str, *args: Any, **kwargs: Any + result: Series, expected: Series | None, op: str, *args: Any, **kwargs: Any ) -> None: """ Small helper function to test element-wise functions for both the series and expressions api. @@ -291,8 +291,8 @@ def verify_series_and_expr_api( >>> verify_series_and_expr_api(s, expected, "sort") """ expr = _getattr_multi(col("*"), op)(*args, **kwargs) - result_expr: Series = input.to_frame().select(expr)[:, 0] # type: ignore[assignment] - result_series = _getattr_multi(input, op)(*args, **kwargs) + result_expr: Series = result.to_frame().select(expr)[:, 0] # type: ignore[assignment] + result_series = _getattr_multi(result, op)(*args, **kwargs) if expected is None: assert_series_equal(result_series, result_expr) else: @@ -305,7 +305,7 @@ def is_categorical_dtype(data_type: Any) -> bool: Check if the input is a polars Categorical dtype. """ return ( - type(data_type) is type + isinstance(data_type, type) and issubclass(data_type, Categorical) or isinstance(data_type, Categorical) ) @@ -409,23 +409,23 @@ def __post_init__(self) -> None: raise InvalidArgument( f"No strategy (currently) available for {self.dtype} type" ) + + # given a custom strategy, but no explicit dtype. infer one + # from the first non-None value that the strategy produces. + with warnings.catch_warnings(): + # note: usually you should not call "example()" outside of an interactive shell, hence + # the warning. however, here it is reasonable to do so, so we catch and ignore it + warnings.simplefilter("ignore", NonInteractiveExampleWarning) + sample_value_iter = (self.strategy.example() for _ in range(100)) # type: ignore[union-attr] + sample_value_type = type( + next(e for e in sample_value_iter if e is not None) + ) + if sample_value_type is not None: + self.dtype = py_type_to_dtype(sample_value_type) else: - # given a custom strategy, but no explicit dtype. infer one - # from the first non-None value that the strategy produces. - with warnings.catch_warnings(): - # note: usually you should not call "example()" outside of an interactive shell, hence - # the warning. however, here it is reasonable to do so, so we catch and ignore it - warnings.simplefilter("ignore", NonInteractiveExampleWarning) - sample_value_iter = (self.strategy.example() for _ in range(100)) # type: ignore[union-attr] - sample_value_type = type( - next(e for e in sample_value_iter if e is not None) - ) - if sample_value_type is not None: - self.dtype = py_type_to_dtype(sample_value_type) - else: - raise InvalidArgument( - f"Unable to determine dtype for strategy {self.dtype} type" - ) + raise InvalidArgument( + f"Unable to determine dtype for strategy {self.dtype} type" + ) def columns( cols: int | Sequence[str] | None = None, diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py index 04fc4826e267..2b8d99a9b27b 100644 --- a/py-polars/polars/utils.py +++ b/py-polars/polars/utils.py @@ -36,19 +36,19 @@ def _process_null_values( ) -> None | str | list[str] | list[tuple[str, str]]: if isinstance(null_values, dict): return list(null_values.items()) - else: - return null_values + + return null_values # https://stackoverflow.com/questions/4355524/getting-data-from-ctypes-array-into-numpy -def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray: +def _ptr_to_numpy(ptr: int, length: int, ptr_type: Any) -> np.ndarray: """ Parameters ---------- ptr C/Rust ptr casted to usize. - len + length Length of the array values. ptr_type Example: @@ -62,7 +62,7 @@ def _ptr_to_numpy(ptr: int, len: int, ptr_type: Any) -> np.ndarray: if not _NUMPY_AVAILABLE: raise ImportError("'numpy' is required for this functionality.") ptr_ctype = ctypes.cast(ptr, ctypes.POINTER(ptr_type)) - return np.ctypeslib.as_array(ptr_ctype, (len,)) + return np.ctypeslib.as_array(ptr_ctype, (length,)) def _timedelta_to_pl_duration(td: timedelta) -> str: @@ -77,37 +77,32 @@ def timedelta_in_nanoseconds_window(td: timedelta) -> bool: return in_nanoseconds_window(datetime(1970, 1, 1) + td) +def _seconds_scalar(tu: str) -> float: + scalar = dict(ns=1e9, us=1e6, ms=1e3) + try: + return scalar[tu] + except KeyError: + raise ValueError("expected one of {'ns', 'us', 'ms'}") from None + + def _datetime_to_pl_timestamp(dt: datetime, tu: str | None) -> int: """ Converts a python datetime to a timestamp in nanoseconds """ - if tu == "ns": - return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e9) - elif tu == "us": - return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e6) - elif tu == "ms": - return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e3) if tu is None: # python has us precision - return int(dt.replace(tzinfo=timezone.utc).timestamp() * 1e6) - else: - raise ValueError("expected one of {'ns', 'us', 'ms'}") + tu = "us" + + s = _seconds_scalar(tu) + return int(dt.replace(tzinfo=timezone.utc).timestamp() * s) def _timedelta_to_pl_timedelta(td: timedelta, tu: str | None = None) -> int: - if tu == "ns": - return int(td.total_seconds() * 1e9) - elif tu == "us": - return int(td.total_seconds() * 1e6) - elif tu == "ms": - return int(td.total_seconds() * 1e3) if tu is None: - if timedelta_in_nanoseconds_window(td): - return int(td.total_seconds() * 1e9) - else: - return int(td.total_seconds() * 1e3) - else: - raise ValueError("expected one of {'ns', 'us, 'ms'}") + tu = "ns" if timedelta_in_nanoseconds_window(td) else "ms" + + s = _seconds_scalar(tu) + return int(td.total_seconds() * s) def _date_to_pl_date(d: date) -> int: @@ -170,15 +165,17 @@ def _to_python_time(value: int) -> time: return time(hour=hours, minute=minutes, second=seconds, microsecond=microsecond) -def _to_python_timedelta(value: int | float, tu: str | None = "ns") -> timedelta: +def _to_python_timedelta(value: int | float, tu: str = "ns") -> timedelta: if tu == "ns": return timedelta(microseconds=value // 1e3) - elif tu == "us": + if tu == "us": return timedelta(microseconds=value) - elif tu == "ms": + if tu == "ms": return timedelta(milliseconds=value) - else: - raise ValueError(f"time unit: {tu} not expected") + + raise ValueError( + f"time unit: {tu} not expected, expected one of {'ns', 'us', 'ms'}" + ) def _prepare_row_count_args( @@ -187,8 +184,7 @@ def _prepare_row_count_args( ) -> tuple[str, int] | None: if row_count_name is not None: return (row_count_name, row_count_offset) - else: - return None + return None EPOCH = datetime(1970, 1, 1).replace(tzinfo=None) @@ -205,7 +201,8 @@ def _to_python_datetime( # important to create from utc. Not doing this leads # to inconsistencies dependent on the timezone you are in. return datetime.utcfromtimestamp(value * 3600 * 24).date() - elif dtype == Datetime: + + if dtype == Datetime: if tu == "ns": # nanoseconds to seconds dt = EPOCH + timedelta(microseconds=value / 1000) @@ -219,8 +216,8 @@ def _to_python_datetime( if tz is not None and len(tz) > 0: import pytz - timezone = pytz.timezone(tz) - return timezone.localize(dt) + return pytz.timezone(tz).localize(dt) + return dt else: diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index 02f830e8dec6..6b3ab93fd4ab 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -59,3 +59,14 @@ exclude_lines = [ "if TYPE_CHECKING:", "from typing_extensions import ", ] + +[tool.pylint] +max-line-length = 200 # disable, leave to black +disable = [ + "missing-docstring", + "fixme", + "invalid-name", + "import-outside-toplevel", + "redefined-builtin", + "too-few-public-methods", +] \ No newline at end of file