Skip to content

Commit

Permalink
Rework groupby and resample core modules (pandas-dev#848)
Browse files Browse the repository at this point in the history
* Rework groupby and resample core modules

* Runtime Series is not generic

* Remove default values and deprecated

* Use np.integer in tests

* Add a comment for Incomplete

* Remove private objects

* Remove deprecated Resampler.fillna

* Remove private constructors

* Remove more private constructors

* Temporarily type labelsize as int

Needs fix everywhere and the upstream docs should be updated

* Tighten rolling and expanding method type

* Remove step from groupby rolling

* Fix resample rule

* Fix groupby fillna

* Add missing test for linked issue

* Remove pandas.core.apply as it is not used in public code

* Address CR

* Address remaining CR

* revert pyproject change

* Temporarily pin pyright to unblock CI

* New deprecations

* Windows tests

* Apply suggestions from code review
  • Loading branch information
hamdanal authored Feb 6, 2024
1 parent 56eafc1 commit e35c3ca
Show file tree
Hide file tree
Showing 28 changed files with 2,325 additions and 851 deletions.
5 changes: 4 additions & 1 deletion pandas-stubs/_libs/properties.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ class CachedProperty:
def __get__(self, obj, typ): ...
def __set__(self, obj, value) -> None: ...

cache_readonly: CachedProperty = ...
# note: this is a lie to make type checkers happy (they special
# case property). cache_readonly uses attribute names similar to
# property (fget) but it does not provide fset and fdel.
cache_readonly = property

class AxisProperty:
def __init__(self, axis: int = ..., doc: str = ...) -> None: ...
Expand Down
20 changes: 20 additions & 0 deletions pandas-stubs/_typing.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ from pandas.core.dtypes.dtypes import (

from pandas.io.formats.format import EngFormatter

# `Incomplete` is equivalent to `Any`. Use it to annotate symbols that you don't
# know the type of yet and that should be changed in the future. Use `Any` only
# where it is the only acceptable type.
Incomplete: TypeAlias = Any

ArrayLike: TypeAlias = ExtensionArray | np.ndarray
AnyArrayLike: TypeAlias = Index | Series | np.ndarray
PythonScalar: TypeAlias = str | bool | complex
Expand Down Expand Up @@ -80,6 +85,10 @@ class FulldatetimeDict(YearMonthDayDict, total=False):
us: DatetimeDictArg
ns: DatetimeDictArg

CorrelationMethod: TypeAlias = (
Literal["pearson", "kendall", "spearman"]
| Callable[[np.ndarray, np.ndarray], float]
)
# dtypes
NpDtype: TypeAlias = str | np.dtype[np.generic] | type[str | complex | bool | object]
Dtype: TypeAlias = ExtensionDtype | NpDtype
Expand Down Expand Up @@ -444,6 +453,7 @@ class SequenceNotStr(Protocol[_T_co]):
IndexLabel: TypeAlias = Hashable | Sequence[Hashable]
Label: TypeAlias = Hashable | None
Level: TypeAlias = Hashable | int
Shape: TypeAlias = tuple[int, ...]
Suffixes: TypeAlias = tuple[str | None, str | None]
Ordered: TypeAlias = bool | None
JSONSerializable: TypeAlias = PythonScalar | list | dict
Expand All @@ -469,8 +479,11 @@ AggFuncTypeSeriesToFrame: TypeAlias = list[AggFuncTypeBase] | AggFuncTypeDictSer
AggFuncTypeFrame: TypeAlias = (
AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDictFrame
)
AggFuncTypeDict: TypeAlias = AggFuncTypeDictSeries | AggFuncTypeDictFrame
AggFuncType: TypeAlias = AggFuncTypeBase | list[AggFuncTypeBase] | AggFuncTypeDict

num: TypeAlias = complex
AxisInt: TypeAlias = int
AxisIndex: TypeAlias = Literal["index", 0]
AxisColumn: TypeAlias = Literal["columns", 1]
Axis: TypeAlias = AxisIndex | AxisColumn
Expand Down Expand Up @@ -563,6 +576,9 @@ IndexT = TypeVar("IndexT", bound=Index)
IntervalT = TypeVar("IntervalT", bound=Interval)
IntervalClosedType: TypeAlias = Literal["left", "right", "both", "neither"]

ScalarIndexer: TypeAlias = int | np.integer
SequenceIndexer: TypeAlias = slice | list[int] | np.ndarray
PositionalIndexer: TypeAlias = ScalarIndexer | SequenceIndexer
TakeIndexer: TypeAlias = Sequence[int] | Sequence[np.integer] | npt.NDArray[np.integer]

IgnoreRaiseCoerce: TypeAlias = Literal["ignore", "raise", "coerce"]
Expand Down Expand Up @@ -758,5 +774,9 @@ RandomState: TypeAlias = (
| np.random.BitGenerator
| np.random.RandomState
)
Frequency: TypeAlias = str | BaseOffset
TimeGrouperOrigin: TypeAlias = (
Timestamp | Literal["epoch", "start", "start_day", "end", "end_day"]
)

__all__ = ["npt", "type_t"]
15 changes: 13 additions & 2 deletions pandas-stubs/core/base.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from collections.abc import Iterator
from collections.abc import (
Hashable,
Iterator,
)
from typing import (
Any,
Generic,
Literal,
final,
)

import numpy as np
Expand All @@ -19,13 +24,19 @@ from pandas._typing import (
Scalar,
npt,
)
from pandas.util._decorators import cache_readonly

class NoNewAttributesMixin:
def __setattr__(self, key, value) -> None: ...
def __setattr__(self, key: str, value: Any) -> None: ...

class SelectionMixin(Generic[NDFrameT]):
obj: NDFrameT
exclusions: frozenset[Hashable]
@final
@cache_readonly
def ndim(self) -> int: ...
def __getitem__(self, key): ...
def aggregate(self, func, *args, **kwargs): ...

class IndexOpsMixin(OpsMixin, Generic[S1]):
__array_priority__: int = ...
Expand Down
57 changes: 17 additions & 40 deletions pandas-stubs/core/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ from pandas.core.indexing import (
_LocIndexer,
)
from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window import (
Expanding,
Expand All @@ -54,6 +53,7 @@ from pandas.core.window.rolling import (
from typing_extensions import Self
import xarray as xr

from pandas._libs.lib import NoDefault
from pandas._libs.missing import NAType
from pandas._libs.tslibs import BaseOffset
from pandas._libs.tslibs.nattype import NaTType
Expand Down Expand Up @@ -1006,104 +1006,96 @@ class DataFrame(NDFrame, OpsMixin):
def groupby(
self,
by: Scalar,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Scalar]: ...
@overload
def groupby(
self,
by: DatetimeIndex,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Timestamp]: ...
@overload
def groupby(
self,
by: TimedeltaIndex,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Timedelta]: ...
@overload
def groupby(
self,
by: PeriodIndex,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Period]: ...
@overload
def groupby(
self,
by: IntervalIndex[IntervalT],
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[IntervalT]: ...
@overload
def groupby(
self,
by: MultiIndex | GroupByObjectNonScalar | None = ...,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[tuple]: ...
@overload
def groupby(
self,
by: Series[SeriesByT],
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[SeriesByT]: ...
@overload
def groupby(
self,
by: CategoricalIndex | Index | Series,
axis: AxisIndex = ...,
axis: AxisIndex | NoDefault = ...,
level: IndexLabel | None = ...,
as_index: _bool = ...,
sort: _bool = ...,
group_keys: _bool = ...,
squeeze: _bool = ...,
observed: _bool = ...,
observed: _bool | NoDefault = ...,
dropna: _bool = ...,
) -> DataFrameGroupBy[Any]: ...
def pivot(
Expand Down Expand Up @@ -1921,21 +1913,6 @@ class DataFrame(NDFrame, OpsMixin):
*,
inplace: Literal[False] = ...,
) -> DataFrame: ...
def resample(
self,
rule,
axis: Axis = ...,
closed: _str | None = ...,
label: _str | None = ...,
convention: TimestampConvention = ...,
kind: Literal["timestamp", "period"] | None = ...,
on: _str | None = ...,
level: Level | None = ...,
origin: Timestamp
| Literal["epoch", "start", "start_day", "end", "end_day"] = ...,
offset: dt.timedelta | Timedelta | _str | None = ...,
group_keys: _bool = ...,
) -> Resampler[DataFrame]: ...
def rfloordiv(
self,
other,
Expand Down
23 changes: 23 additions & 0 deletions pandas-stubs/core/generic.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ from collections.abc import (
Mapping,
Sequence,
)
import datetime as dt
import sqlite3
from typing import (
Any,
Expand All @@ -17,13 +18,15 @@ from typing import (
import numpy as np
from pandas import Index
import pandas.core.indexing as indexing
from pandas.core.resample import DatetimeIndexResampler
from pandas.core.series import Series
import sqlalchemy.engine
from typing_extensions import (
Concatenate,
Self,
)

from pandas._libs.lib import NoDefault
from pandas._typing import (
S1,
ArrayLike,
Expand All @@ -37,6 +40,7 @@ from pandas._typing import (
FilePath,
FileWriteMode,
FillnaOptions,
Frequency,
HashableT1,
HashableT2,
HDFCompLib,
Expand All @@ -48,6 +52,10 @@ from pandas._typing import (
SortKind,
StorageOptions,
T,
TimedeltaConvertibleTypes,
TimeGrouperOrigin,
TimestampConvention,
TimestampConvertibleTypes,
WriteBuffer,
)

Expand Down Expand Up @@ -432,6 +440,21 @@ class NDFrame(indexing.IndexingMixin):
end_time,
axis=...,
) -> Self: ...
@final
def resample(
self,
rule: Frequency | dt.timedelta,
axis: Axis | NoDefault = ...,
closed: Literal["right", "left"] | None = ...,
label: Literal["right", "left"] | None = ...,
convention: TimestampConvention = ...,
kind: Literal["period", "timestamp"] | None = ...,
on: Level | None = ...,
level: Level | None = ...,
origin: TimeGrouperOrigin | TimestampConvertibleTypes = ...,
offset: TimedeltaConvertibleTypes | None = ...,
group_keys: _bool = ...,
) -> DatetimeIndexResampler[Self]: ...
def first(self, offset) -> Self: ...
def last(self, offset) -> Self: ...
def rank(
Expand Down
15 changes: 14 additions & 1 deletion pandas-stubs/core/groupby/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,2 +1,15 @@
from pandas.core.groupby.generic import NamedAgg as NamedAgg
from pandas.core.groupby.generic import (
DataFrameGroupBy as DataFrameGroupBy,
NamedAgg as NamedAgg,
SeriesGroupBy as SeriesGroupBy,
)
from pandas.core.groupby.groupby import GroupBy as GroupBy
from pandas.core.groupby.grouper import Grouper as Grouper

__all__ = [
"DataFrameGroupBy",
"NamedAgg",
"SeriesGroupBy",
"GroupBy",
"Grouper",
]
1 change: 0 additions & 1 deletion pandas-stubs/core/groupby/base.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# from pandas.core.dtypes.common import is_list_like as is_list_like, is_scalar as is_scalar
from collections.abc import Hashable
import dataclasses

Expand Down
6 changes: 0 additions & 6 deletions pandas-stubs/core/groupby/categorical.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +0,0 @@
from pandas.core.arrays.categorical import ( # , CategoricalDtype as CategoricalDtype
Categorical,
)

def recode_for_groupby(c: Categorical, sort: bool, observed: bool): ...
def recode_from_groupby(c: Categorical, sort: bool, ci): ...
Loading

0 comments on commit e35c3ca

Please sign in to comment.