From 6b0baccb72c82f46842ebeb9a0e8dd3678e8bde0 Mon Sep 17 00:00:00 2001 From: J van Zundert Date: Mon, 17 Jul 2023 22:14:40 +0100 Subject: [PATCH] docs(python): Address ignored Ruff doc rules (#9919) --- py-polars/docs/source/conf.py | 4 +- py-polars/polars/dataframe/frame.py | 6 +-- py-polars/polars/dataframe/groupby.py | 26 ++++++++++ py-polars/polars/datatypes/classes.py | 39 +++++++++++++-- py-polars/polars/datatypes/convert.py | 13 +++-- py-polars/polars/expr/datetime.py | 59 +++++++++++++++++------ py-polars/polars/io/csv/batched_reader.py | 2 + py-polars/polars/io/delta.py | 10 ++-- py-polars/polars/utils/convert.py | 4 +- py-polars/polars/utils/various.py | 2 +- py-polars/pyproject.toml | 9 ++-- py-polars/scripts/check_stacklevels.py | 4 +- py-polars/tests/docs/run_doctest.py | 7 ++- py-polars/tests/unit/utils/test_utils.py | 6 ++- 14 files changed, 140 insertions(+), 51 deletions(-) diff --git a/py-polars/docs/source/conf.py b/py-polars/docs/source/conf.py index 6c4f0891e9a6..ffb71b2cfde2 100644 --- a/py-polars/docs/source/conf.py +++ b/py-polars/docs/source/conf.py @@ -235,14 +235,14 @@ def _minify_classpaths(s: str) -> str: ) -def process_signature(app, what, name, obj, opts, sig, ret): +def process_signature(app, what, name, obj, opts, sig, ret): # noqa: D103 return ( _minify_classpaths(sig) if sig else sig, _minify_classpaths(ret) if ret else ret, ) -def setup(app): +def setup(app): # noqa: D103 # TODO: a handful of methods do not seem to trigger the event for # some reason (possibly @overloads?) - investigate further... app.connect("autodoc-process-signature", process_signature) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index f86fbb4c45b1..34d270254124 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -3306,9 +3306,9 @@ def write_delta( ... ) # doctest: +SKIP """ - from polars.io.delta import check_if_delta_available, resolve_delta_lake_uri + from polars.io.delta import _check_if_delta_available, _resolve_delta_lake_uri - check_if_delta_available() + _check_if_delta_available() from deltalake.writer import ( try_get_deltatable, @@ -3319,7 +3319,7 @@ def write_delta( delta_write_options = {} if isinstance(target, (str, Path)): - target = resolve_delta_lake_uri(str(target), strict=False) + target = _resolve_delta_lake_uri(str(target), strict=False) unsupported_cols = {} unsupported_types = [Time, Categorical, Null] diff --git a/py-polars/polars/dataframe/groupby.py b/py-polars/polars/dataframe/groupby.py index 573938c59a08..3677a5466a89 100644 --- a/py-polars/polars/dataframe/groupby.py +++ b/py-polars/polars/dataframe/groupby.py @@ -845,6 +845,19 @@ def agg( *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr, ) -> DataFrame: + """ + Compute aggregations for each group of a groupby operation. + + Parameters + ---------- + *aggs + Aggregations to compute for each group of the groupby operation, + specified as positional arguments. + Accepts expression input. Strings are parsed as column names. + **named_aggs + Additional aggregations, specified as keyword arguments. + The resulting columns will be renamed to the keyword used. + """ return ( self.df.lazy() .groupby_rolling( @@ -1046,6 +1059,19 @@ def agg( *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr, ) -> DataFrame: + """ + Compute aggregations for each group of a groupby operation. + + Parameters + ---------- + *aggs + Aggregations to compute for each group of the groupby operation, + specified as positional arguments. + Accepts expression input. Strings are parsed as column names. + **named_aggs + Additional aggregations, specified as keyword arguments. + The resulting columns will be renamed to the keyword used. + """ return ( self.df.lazy() .groupby_dynamic( diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py index ec03b5584a26..70272232359c 100644 --- a/py-polars/polars/datatypes/classes.py +++ b/py-polars/polars/datatypes/classes.py @@ -3,7 +3,7 @@ import contextlib from datetime import timezone from inspect import isclass -from typing import TYPE_CHECKING, Any, Callable, Iterator, Mapping, Sequence +from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Mapping, Sequence import polars.datatypes @@ -31,7 +31,7 @@ def __init__(self, method: Callable[..., Any] | None = None) -> None: def __get__(self, instance: Any, cls: type | None = None) -> Any: return self.fget(cls) # type: ignore[misc] - def getter(self, method: Callable[..., Any]) -> Any: + def getter(self, method: Callable[..., Any]) -> Any: # noqa: D102 self.fget = method return self @@ -46,25 +46,29 @@ def _string_repr(cls) -> str: return _dtype_str_repr(cls) def base_type(cls) -> PolarsDataType: + """Return the base type.""" return cls @classproperty def is_nested(self) -> bool: + """Check if this data type is nested.""" return False @classmethod def is_(cls, other: PolarsDataType) -> bool: + """Check if this DataType is the same as another DataType.""" return cls == other and hash(cls) == hash(other) @classmethod def is_not(cls, other: PolarsDataType) -> bool: + """Check if this DataType is NOT the same as another DataType.""" return not cls.is_(other) class DataType(metaclass=DataTypeClass): """Base class for all Polars data types.""" - def __new__(cls, *args: Any, **kwargs: Any) -> PolarsDataType: # type: ignore[misc] + def __new__(cls, *args: Any, **kwargs: Any) -> PolarsDataType: # type: ignore[misc] # noqa: D102 # this formulation allows for equivalent use of "pl.Type" and "pl.Type()", while # still respecting types that take initialisation params (eg: Duration/Datetime) if args or kwargs: @@ -95,6 +99,7 @@ def base_type(cls) -> DataTypeClass: @classproperty def is_nested(self) -> bool: + """Check if this data type is nested.""" return False @classinstmethod # type: ignore[arg-type] @@ -158,15 +163,30 @@ def _custom_reconstruct( class DataTypeGroup(frozenset): # type: ignore[type-arg] + """Group of data types.""" + _match_base_type: bool - def __new__(cls, items: Any, *, match_base_type: bool = True) -> DataTypeGroup: + def __new__( + cls, items: Iterable[DataType | DataTypeClass], *, match_base_type: bool = True + ) -> DataTypeGroup: + """ + Construct a DataTypeGroup. + + Parameters + ---------- + items : + iterable of data types + match_base_type: + match the base type + + """ for it in items: if not isinstance(it, (DataType, DataTypeClass)): raise TypeError( f"DataTypeGroup items must be dtypes; found {type(it).__name__!r}" ) - dtype_group = super().__new__(cls, items) + dtype_group = super().__new__(cls, items) # type: ignore[arg-type] dtype_group._match_base_type = match_base_type return dtype_group @@ -201,6 +221,7 @@ class NestedType(DataType): @classproperty def is_nested(self) -> bool: + """Check if this data type is nested.""" return True @@ -406,6 +427,8 @@ class Unknown(DataType): class List(NestedType): + """Nested list/array type with variable length of inner lists.""" + inner: PolarsDataType | None = None def __init__(self, inner: PolarsDataType | PythonDataType): @@ -466,6 +489,8 @@ def __repr__(self) -> str: class Array(NestedType): + """Nested list/array type with fixed length of inner arrays.""" + inner: PolarsDataType | None = None width: int @@ -524,6 +549,8 @@ def __repr__(self) -> str: class Field: + """Definition of a single field within a `Struct` DataType.""" + def __init__(self, name: str, dtype: PolarsDataType): """ Definition of a single field within a `Struct` DataType. @@ -551,6 +578,8 @@ def __repr__(self) -> str: class Struct(NestedType): + """Struct composite type.""" + def __init__(self, fields: Sequence[Field] | SchemaDict): """ Struct composite type. diff --git a/py-polars/polars/datatypes/convert.py b/py-polars/polars/datatypes/convert.py index ec06b8bbe44c..9b4f1a2a985c 100644 --- a/py-polars/polars/datatypes/convert.py +++ b/py-polars/polars/datatypes/convert.py @@ -73,7 +73,7 @@ T = TypeVar("T") -def cache(function: Callable[..., T]) -> T: +def cache(function: Callable[..., T]) -> T: # noqa: D103 # need this to satisfy mypy issue with "@property/@cache combination" # See: https://github.com/python/mypy/issues/5858 return functools.lru_cache()(function) # type: ignore[return-value] @@ -98,7 +98,10 @@ def cache(function: Callable[..., T]) -> T: @functools.lru_cache(16) -def map_py_type_to_dtype(python_dtype: PythonDataType | type[object]) -> PolarsDataType: +def _map_py_type_to_dtype( + python_dtype: PythonDataType | type[object], +) -> PolarsDataType: + """Convert Python data type to Polars data type.""" if python_dtype is float: return Float64 if python_dtype is int: @@ -134,14 +137,14 @@ def map_py_type_to_dtype(python_dtype: PythonDataType | type[object]) -> PolarsD if hasattr(python_dtype, "__origin__") and hasattr(python_dtype, "__args__"): base_type = python_dtype.__origin__ if base_type is not None: - dtype = map_py_type_to_dtype(base_type) + dtype = _map_py_type_to_dtype(base_type) nested = python_dtype.__args__ if len(nested) == 1: nested = nested[0] return ( dtype if nested is None - else dtype(map_py_type_to_dtype(nested)) # type: ignore[operator] + else dtype(_map_py_type_to_dtype(nested)) # type: ignore[operator] ) raise TypeError("Invalid type") @@ -424,7 +427,7 @@ def py_type_to_dtype( if is_polars_dtype(data_type): return data_type try: - return map_py_type_to_dtype(data_type) + return _map_py_type_to_dtype(data_type) except (KeyError, TypeError): # pragma: no cover if not raise_unmatched: return None diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 993853da0f64..8aa800cdb4c3 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -75,7 +75,7 @@ def truncate( Returns ------- - Date/Datetime series + Expression of data type `Date`/`Datetime` Examples -------- @@ -199,7 +199,7 @@ def round( Returns ------- - Date/Datetime series + Expression of data type `Date`/`Datetime` Warnings -------- @@ -890,12 +890,39 @@ def ordinal_day(self) -> Expr: return wrap_expr(self._pyexpr.dt_ordinal_day()) def time(self) -> Expr: + """ + Extract time. + + Applies to Datetime columns only; fails on Date. + + Returns + ------- + Expression of data type `Time` + """ return wrap_expr(self._pyexpr.dt_time()) def date(self) -> Expr: + """ + Extract date from date(time). + + Applies to Date and Datetime columns. + + Returns + ------- + Expression of data type `Date` + """ return wrap_expr(self._pyexpr.dt_date()) def datetime(self) -> Expr: + """ + Return datetime. + + Applies to Datetime columns. + + Returns + ------- + Expression of data type `Datetime` + """ return wrap_expr(self._pyexpr.dt_datetime()) def hour(self) -> Expr: @@ -1256,7 +1283,7 @@ def timestamp(self, time_unit: TimeUnit = "us") -> Expr: def with_time_unit(self, time_unit: TimeUnit) -> Expr: """ - Set time unit of a Series of dtype Datetime or Duration. + Set time unit of an expression of dtype Datetime or Duration. This does not modify underlying data, and should be used to fix an incorrect time unit. @@ -1264,7 +1291,7 @@ def with_time_unit(self, time_unit: TimeUnit) -> Expr: Parameters ---------- time_unit : {'ns', 'us', 'ms'} - Unit of time for the ``Datetime`` Series. + Unit of time for the ``Datetime`` expression. Examples -------- @@ -1307,7 +1334,7 @@ def cast_time_unit(self, time_unit: TimeUnit) -> Expr: Parameters ---------- time_unit : {'ns', 'us', 'ms'} - Time unit for the ``Datetime`` Series. + Time unit for the ``Datetime`` expression. Examples -------- @@ -1342,12 +1369,12 @@ def cast_time_unit(self, time_unit: TimeUnit) -> Expr: def convert_time_zone(self, time_zone: str) -> Expr: """ - Convert to given time zone for a Series of type Datetime. + Convert to given time zone for an expression of type Datetime. Parameters ---------- time_zone - Time zone for the `Datetime` Series. + Time zone for the `Datetime` expression. Examples -------- @@ -1388,7 +1415,7 @@ def replace_time_zone( self, time_zone: str | None, *, use_earliest: bool | None = None ) -> Expr: """ - Replace time zone for a Series of type Datetime. + Replace time zone for an expression of type Datetime. Different from ``convert_time_zone``, this will also modify the underlying timestamp and will ignore the original time zone. @@ -1396,7 +1423,7 @@ def replace_time_zone( Parameters ---------- time_zone - Time zone for the `Datetime` Series. Pass `None` to unset time zone. + Time zone for the `Datetime` expression. Pass `None` to unset time zone. use_earliest If localizing an ambiguous datetime (say, due to daylight saving time), determine whether to localize to the earliest datetime or not. @@ -1487,7 +1514,7 @@ def days(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- @@ -1525,7 +1552,7 @@ def hours(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- @@ -1564,7 +1591,7 @@ def minutes(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- @@ -1603,7 +1630,7 @@ def seconds(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type `Int64` Examples -------- @@ -1646,7 +1673,7 @@ def milliseconds(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- @@ -1693,7 +1720,7 @@ def microseconds(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- @@ -1740,7 +1767,7 @@ def nanoseconds(self) -> Expr: Returns ------- - A series of dtype Int64 + Expression of data type Int64 Examples -------- diff --git a/py-polars/polars/io/csv/batched_reader.py b/py-polars/polars/io/csv/batched_reader.py index 62dda710dec1..493a48fef610 100644 --- a/py-polars/polars/io/csv/batched_reader.py +++ b/py-polars/polars/io/csv/batched_reader.py @@ -26,6 +26,8 @@ class BatchedCsvReader: + """Read a CSV file in batches.""" + def __init__( self, source: str | Path, diff --git a/py-polars/polars/io/delta.py b/py-polars/polars/io/delta.py index eb52e66f88b2..ab55acebd18e 100644 --- a/py-polars/polars/io/delta.py +++ b/py-polars/polars/io/delta.py @@ -126,7 +126,7 @@ def read_delta( if pyarrow_options is None: pyarrow_options = {} - resolved_uri = resolve_delta_lake_uri(source) + resolved_uri = _resolve_delta_lake_uri(source) dl_tbl = _get_delta_lake_table( table_path=resolved_uri, @@ -254,7 +254,7 @@ def scan_delta( if pyarrow_options is None: pyarrow_options = {} - resolved_uri = resolve_delta_lake_uri(source) + resolved_uri = _resolve_delta_lake_uri(source) dl_tbl = _get_delta_lake_table( table_path=resolved_uri, version=version, @@ -266,7 +266,7 @@ def scan_delta( return scan_pyarrow_dataset(pa_ds) -def resolve_delta_lake_uri(table_uri: str, strict: bool = True) -> str: +def _resolve_delta_lake_uri(table_uri: str, strict: bool = True) -> str: parsed_result = urlparse(table_uri) resolved_uri = str( @@ -297,7 +297,7 @@ def _get_delta_lake_table( DeltaTable """ - check_if_delta_available() + _check_if_delta_available() if delta_table_options is None: delta_table_options = {} @@ -312,7 +312,7 @@ def _get_delta_lake_table( return dl_tbl -def check_if_delta_available() -> None: +def _check_if_delta_available() -> None: if not _DELTALAKE_AVAILABLE: raise ImportError( "deltalake is not installed. Please run `pip install deltalake>=0.9.0`." diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/utils/convert.py index a102573930bf..892943429ed4 100644 --- a/py-polars/polars/utils/convert.py +++ b/py-polars/polars/utils/convert.py @@ -30,13 +30,13 @@ elif _ZONEINFO_AVAILABLE: from backports.zoneinfo._zoneinfo import ZoneInfo - def get_zoneinfo(key: str) -> ZoneInfo: + def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 pass else: @lru_cache(None) - def get_zoneinfo(key: str) -> ZoneInfo: + def get_zoneinfo(key: str) -> ZoneInfo: # noqa: D103 return zoneinfo.ZoneInfo(key) diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/utils/various.py index d6820aaf3b23..ab7030835ce3 100644 --- a/py-polars/polars/utils/various.py +++ b/py-polars/polars/utils/various.py @@ -320,7 +320,7 @@ def str_duration_(td: str | None) -> int | None: NS = TypeVar("NS") -class sphinx_accessor(property): +class sphinx_accessor(property): # noqa: D101 def __get__( # type: ignore[override] self, instance: Any, diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml index b6a615424d50..30624fca245f 100644 --- a/py-polars/pyproject.toml +++ b/py-polars/pyproject.toml @@ -144,12 +144,9 @@ ignore = [ # pycodestyle # TODO: Remove errors below to further improve docstring linting # Ordered from most common to least common errors. - "D105", - "D100", - "D103", - "D102", - "D104", - "D101", + "D105", # Missing docstring in magic method + "D100", # Missing docstring in public module + "D104", # Missing docstring in public package ] [tool.ruff.pycodestyle] diff --git a/py-polars/scripts/check_stacklevels.py b/py-polars/scripts/check_stacklevels.py index 2ff14283ea01..5bcdca3bd57a 100644 --- a/py-polars/scripts/check_stacklevels.py +++ b/py-polars/scripts/check_stacklevels.py @@ -15,12 +15,12 @@ EXCLUDE = frozenset(["polars/utils/polars_version.py"]) -class StackLevelChecker(NodeVisitor): +class StackLevelChecker(NodeVisitor): # noqa: D101 def __init__(self, file) -> None: self.file = file self.violations = set() - def visit_Call(self, node: ast.Call) -> None: + def visit_Call(self, node: ast.Call) -> None: # noqa: D102 for keyword in node.keywords: if keyword.arg == "stacklevel" and isinstance(keyword.value, ast.Constant): self.violations.add( diff --git a/py-polars/tests/docs/run_doctest.py b/py-polars/tests/docs/run_doctest.py index 7ac692a3bea3..f071cfe91e7c 100644 --- a/py-polars/tests/docs/run_doctest.py +++ b/py-polars/tests/docs/run_doctest.py @@ -75,8 +75,11 @@ def modules_in_path(p: Path) -> Iterator[ModuleType]: OutputChecker = doctest.OutputChecker - class CustomOutputChecker(OutputChecker): + class IgnoreResultOutputChecker(OutputChecker): + """Python doctest output checker with support for IGNORE_RESULT.""" + def check_output(self, want: str, got: str, optionflags: Any) -> bool: + """Return True iff the actual output from an example matches the output.""" if IGNORE_RESULT_ALL: return True if IGNORE_RESULT & optionflags: @@ -84,7 +87,7 @@ def check_output(self, want: str, got: str, optionflags: Any) -> bool: else: return OutputChecker.check_output(self, want, got, optionflags) - doctest.OutputChecker = CustomOutputChecker # type: ignore[misc] + doctest.OutputChecker = IgnoreResultOutputChecker # type: ignore[misc] # We want to be relaxed about whitespace, but strict on True vs 1 doctest.NORMALIZE_WHITESPACE = True diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 7119dab5cfed..e49412b70fef 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -119,9 +119,11 @@ def test_parse_version(v1: Any, v2: Any) -> None: assert parse_version(v2) < parse_version(v1) -class Foo: +class Foo: # noqa: D101 @deprecate_nonkeyword_arguments(allowed_args=["self", "baz"]) - def bar(self, baz: str, ham: str | None = None, foobar: str | None = None) -> None: + def bar( # noqa: D102 + self, baz: str, ham: str | None = None, foobar: str | None = None + ) -> None: ...