diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f65c42dab1852..e4ab74091d6b6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -85,9 +85,9 @@ repos: - repo: local hooks: - id: pyright + # note: assumes python env is setup and activated name: pyright entry: pyright - # note: assumes python env is setup and activated language: node pass_filenames: false types: [python] @@ -95,22 +95,32 @@ repos: additional_dependencies: &pyright_dependencies - pyright@1.1.262 - id: pyright_reportGeneralTypeIssues + # note: assumes python env is setup and activated name: pyright reportGeneralTypeIssues entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json - # note: assumes python env is setup and activated language: node pass_filenames: false types: [python] stages: [manual] additional_dependencies: *pyright_dependencies - id: mypy + # note: assumes python env is setup and activated name: mypy entry: mypy - # note: assumes python env is setup and activated language: system pass_filenames: false types: [python] stages: [manual] + - id: stubtest + # note: assumes python env is setup and activated + # note: requires pandas dev to be installed + name: mypy (stubtest) + entry: python + language: system + pass_filenames: false + types: [pyi] + args: [scripts/run_stubtest.py] + stages: [manual] - id: flake8-rst name: flake8-rst description: Run flake8 on code snippets in docstrings or RST files diff --git a/pandas/_libs/hashtable.pyi b/pandas/_libs/hashtable.pyi index a6d593076777d..8500fdf2f602e 100644 --- a/pandas/_libs/hashtable.pyi +++ b/pandas/_libs/hashtable.pyi @@ -39,72 +39,72 @@ class Int64Factorizer(Factorizer): ) -> npt.NDArray[np.intp]: ... class Int64Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.int64]: ... class Int32Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.int32]: ... class Int16Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.int16]: ... class Int8Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.int8]: ... class UInt64Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.uint64]: ... class UInt32Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.uint32]: ... class UInt16Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.uint16]: ... class UInt8Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.uint8]: ... class Float64Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.float64]: ... class Float32Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.float32]: ... class Complex128Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.complex128]: ... class Complex64Vector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.complex64]: ... class StringVector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.object_]: ... class ObjectVector: - def __init__(self): ... + def __init__(self, *args): ... def __len__(self) -> int: ... def to_array(self) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 7969c0901e08f..d100108e7dd2b 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -85,7 +85,7 @@ class Timedelta(timedelta): def __new__( cls: type[_S], value=..., - unit: str = ..., + unit: str | None = ..., **kwargs: float | np.integer | np.floating, ) -> _S: ... # GH 46171 diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index f35d744763478..a3b2003b0caf3 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -163,6 +163,7 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): """ _data: pa.ChunkedArray + _dtype: ArrowDtype def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: if pa_version_under1p01: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index bb2fefabd6ae5..caddd12a2c2b4 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -108,6 +108,10 @@ class ArrowStringArray(ArrowExtensionArray, BaseStringArray, ObjectStringArrayMi Length: 4, dtype: string """ + # error: Incompatible types in assignment (expression has type "StringDtype", + # base class "ArrowExtensionArray" defined the type as "ArrowDtype") + _dtype: StringDtype # type: ignore[assignment] + def __init__(self, values) -> None: super().__init__(values) # TODO: Migrate to ArrowDtype instead diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 84915e2f52f17..c92c448304de2 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -144,7 +144,7 @@ def __init__( self._win_type = win_type self.axis = obj._get_axis_number(axis) if axis is not None else None self.method = method - self._win_freq_i8 = None + self._win_freq_i8: int | None = None if self.on is None: if self.axis == 0: self._on = self.obj.index @@ -1838,15 +1838,13 @@ def _validate(self): "compatible with a datetimelike index" ) from err if isinstance(self._on, PeriodIndex): - # error: Incompatible types in assignment (expression has type "float", - # variable has type "None") + # error: Incompatible types in assignment (expression has type + # "float", variable has type "Optional[int]") self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment] self._on.freq.nanos / self._on.freq.n ) else: - # error: Incompatible types in assignment (expression has type "int", - # variable has type "None") - self._win_freq_i8 = freq.nanos # type: ignore[assignment] + self._win_freq_i8 = freq.nanos # min_periods must be an integer if self.min_periods is None: @@ -2867,7 +2865,9 @@ def _get_window_indexer(self) -> GroupbyIndexer: window = self.window elif self._win_freq_i8 is not None: rolling_indexer = VariableWindowIndexer - window = self._win_freq_i8 + # error: Incompatible types in assignment (expression has type + # "int", variable has type "BaseIndexer") + window = self._win_freq_i8 # type: ignore[assignment] else: rolling_indexer = FixedWindowIndexer window = self.window diff --git a/scripts/run_stubtest.py b/scripts/run_stubtest.py new file mode 100644 index 0000000000000..cea9665e649d6 --- /dev/null +++ b/scripts/run_stubtest.py @@ -0,0 +1,85 @@ +import os +from pathlib import Path +import sys +import tempfile +import warnings + +from mypy import stubtest + +import pandas as pd + +# fail early if pandas is not installed +if "dev" not in getattr(pd, "__version__", ""): + # fail on the CI, soft fail during local development + warnings.warn("You need to install the development version of pandas") + if pd.compat.is_ci_environment(): + sys.exit(1) + else: + sys.exit(0) + + +_ALLOWLIST = [ # should be empty + # TODO (child classes implement these methods) + "pandas._libs.hashtable.HashTable.__contains__", + "pandas._libs.hashtable.HashTable.__len__", + "pandas._libs.hashtable.HashTable.factorize", + "pandas._libs.hashtable.HashTable.get_item", + "pandas._libs.hashtable.HashTable.get_labels", + "pandas._libs.hashtable.HashTable.get_state", + "pandas._libs.hashtable.HashTable.lookup", + "pandas._libs.hashtable.HashTable.map_locations", + "pandas._libs.hashtable.HashTable.set_item", + "pandas._libs.hashtable.HashTable.sizeof", + "pandas._libs.hashtable.HashTable.unique", + # stubtest might be too sensitive + "pandas._libs.lib.NoDefault", + "pandas._libs.lib._NoDefault.no_default", + # internal type alias (should probably be private) + "pandas._libs.lib.ndarray_obj_2d", + # workaround for mypy (cache_readonly = property) + "pandas._libs.properties.cache_readonly.__get__", + "pandas._libs.properties.cache_readonly.deleter", + "pandas._libs.properties.cache_readonly.getter", + "pandas._libs.properties.cache_readonly.setter", + # TODO (child classes implement these methods) + "pandas._libs.sparse.SparseIndex.__init__", + "pandas._libs.sparse.SparseIndex.equals", + "pandas._libs.sparse.SparseIndex.indices", + "pandas._libs.sparse.SparseIndex.intersect", + "pandas._libs.sparse.SparseIndex.lookup", + "pandas._libs.sparse.SparseIndex.lookup_array", + "pandas._libs.sparse.SparseIndex.make_union", + "pandas._libs.sparse.SparseIndex.nbytes", + "pandas._libs.sparse.SparseIndex.ngaps", + "pandas._libs.sparse.SparseIndex.to_block_index", + "pandas._libs.sparse.SparseIndex.to_int_index", + # TODO (decorator changes argument names) + "pandas._libs.tslibs.offsets.BaseOffset._apply_array", + "pandas._libs.tslibs.offsets.BusinessHour.rollback", + "pandas._libs.tslibs.offsets.BusinessHour.rollforward ", + # type alias + "pandas._libs.tslibs.timedeltas.UnitChoices", +] + +if __name__ == "__main__": + # find pyi files + root = Path.cwd() + pyi_modules = [ + str(pyi.relative_to(root).with_suffix("")).replace(os.sep, ".") + for pyi in root.glob("pandas/**/*.pyi") + ] + + # create allowlist + with tempfile.NamedTemporaryFile(mode="w+t") as allow: + allow.write("\n".join(_ALLOWLIST)) + allow.flush() + + args = pyi_modules + [ + "--ignore-missing-stub", + "--concise", + "--mypy-config-file", + "pyproject.toml", + "--allowlist", + allow.name, + ] + sys.exit(stubtest.test_stubs(stubtest.parse_options(args)))