TYP: Check untyped defs (except vendored) (#37556)

pandas-dev · Nov 9, 2020 · 8b6f091 · 8b6f091
1 parent 82cd86c
commit 8b6f091
Show file tree

Hide file tree

Showing 27 changed files with 616 additions and 232 deletions.
diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -117,14 +117,24 @@ def set_testing_mode():
     # set the testing mode filters
     testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
     if "deprecate" in testing_mode:
-        warnings.simplefilter("always", _testing_mode_warnings)
+        # pandas\_testing.py:119: error: Argument 2 to "simplefilter" has
+        # incompatible type "Tuple[Type[DeprecationWarning],
+        # Type[ResourceWarning]]"; expected "Type[Warning]"
+        warnings.simplefilter(
+            "always", _testing_mode_warnings  # type: ignore[arg-type]
+        )
 
 
 def reset_testing_mode():
     # reset the testing mode filters
     testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
     if "deprecate" in testing_mode:
-        warnings.simplefilter("ignore", _testing_mode_warnings)
+        # pandas\_testing.py:126: error: Argument 2 to "simplefilter" has
+        # incompatible type "Tuple[Type[DeprecationWarning],
+        # Type[ResourceWarning]]"; expected "Type[Warning]"
+        warnings.simplefilter(
+            "ignore", _testing_mode_warnings  # type: ignore[arg-type]
+        )
 
 
 set_testing_mode()
@@ -241,16 +251,22 @@ def decompress_file(path, compression):
     if compression is None:
         f = open(path, "rb")
     elif compression == "gzip":
-        f = gzip.open(path, "rb")
+        # pandas\_testing.py:243: error: Incompatible types in assignment
+        # (expression has type "IO[Any]", variable has type "BinaryIO")
+        f = gzip.open(path, "rb")  # type: ignore[assignment]
     elif compression == "bz2":
-        f = bz2.BZ2File(path, "rb")
+        # pandas\_testing.py:245: error: Incompatible types in assignment
+        # (expression has type "BZ2File", variable has type "BinaryIO")
+        f = bz2.BZ2File(path, "rb")  # type: ignore[assignment]
     elif compression == "xz":
         f = get_lzma_file(lzma)(path, "rb")
     elif compression == "zip":
         zip_file = zipfile.ZipFile(path)
         zip_names = zip_file.namelist()
         if len(zip_names) == 1:
-            f = zip_file.open(zip_names.pop())
+            # pandas\_testing.py:252: error: Incompatible types in assignment
+            # (expression has type "IO[bytes]", variable has type "BinaryIO")
+            f = zip_file.open(zip_names.pop())  # type: ignore[assignment]
         else:
             raise ValueError(f"ZIP file {path} error. Only one file per ZIP.")
     else:
@@ -286,9 +302,15 @@ def write_to_compressed(compression, path, data, dest="test"):
     if compression == "zip":
         compress_method = zipfile.ZipFile
     elif compression == "gzip":
-        compress_method = gzip.GzipFile
+        # pandas\_testing.py:288: error: Incompatible types in assignment
+        # (expression has type "Type[GzipFile]", variable has type
+        # "Type[ZipFile]")
+        compress_method = gzip.GzipFile  # type: ignore[assignment]
     elif compression == "bz2":
-        compress_method = bz2.BZ2File
+        # pandas\_testing.py:290: error: Incompatible types in assignment
+        # (expression has type "Type[BZ2File]", variable has type
+        # "Type[ZipFile]")
+        compress_method = bz2.BZ2File  # type: ignore[assignment]
     elif compression == "xz":
         compress_method = get_lzma_file(lzma)
     else:
@@ -300,7 +322,10 @@ def write_to_compressed(compression, path, data, dest="test"):
         method = "writestr"
     else:
         mode = "wb"
-        args = (data,)
+        # pandas\_testing.py:302: error: Incompatible types in assignment
+        # (expression has type "Tuple[Any]", variable has type "Tuple[Any,
+        # Any]")
+        args = (data,)  # type: ignore[assignment]
         method = "write"
 
     with compress_method(path, mode=mode) as f:
@@ -1996,7 +2021,8 @@ def all_timeseries_index_generator(k=10):
     """
     make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex]
     for make_index_func in make_index_funcs:
-        yield make_index_func(k=k)
+        # pandas\_testing.py:1986: error: Cannot call function of unknown type
+        yield make_index_func(k=k)  # type: ignore[operator]
 
 
 # make series
@@ -2130,7 +2156,8 @@ def makeCustomIndex(
         p=makePeriodIndex,
     ).get(idx_type)
     if idx_func:
-        idx = idx_func(nentries)
+        # pandas\_testing.py:2120: error: Cannot call function of unknown type
+        idx = idx_func(nentries)  # type: ignore[operator]
         # but we need to fill in the name
         if names:
             idx.name = names[0]
@@ -2158,7 +2185,8 @@ def keyfunc(x):
 
         # build a list of lists to create the index from
         div_factor = nentries // ndupe_l[i] + 1
-        cnt = Counter()
+        # pandas\_testing.py:2148: error: Need type annotation for 'cnt'
+        cnt = Counter()  # type: ignore[var-annotated]
         for j in range(div_factor):
             label = f"{prefix}_l{i}_g{j}"
             cnt[label] = ndupe_l[i]
@@ -2316,7 +2344,14 @@ def _gen_unique_rand(rng, _extra_size):
 
 def makeMissingDataframe(density=0.9, random_state=None):
     df = makeDataFrame()
-    i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
+    # pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple
+    # values for keyword argument "density"  [misc]
+
+    # pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple
+    # values for keyword argument "random_state"  [misc]
+    i, j = _create_missing_idx(  # type: ignore[misc]
+        *df.shape, density=density, random_state=random_state
+    )
     df.values[i, j] = np.nan
     return df
 
@@ -2341,7 +2376,10 @@ def dec(f):
         is_decorating = not kwargs and len(args) == 1 and callable(args[0])
         if is_decorating:
             f = args[0]
-            args = []
+            # pandas\_testing.py:2331: error: Incompatible types in assignment
+            # (expression has type "List[<nothing>]", variable has type
+            # "Tuple[Any, ...]")
+            args = []  # type: ignore[assignment]
             return dec(f)
         else:
             return dec
@@ -2534,7 +2572,9 @@ def wrapper(*args, **kwargs):
         except Exception as err:
             errno = getattr(err, "errno", None)
             if not errno and hasattr(errno, "reason"):
-                errno = getattr(err.reason, "errno", None)
+                # pandas\_testing.py:2521: error: "Exception" has no attribute
+                # "reason"
+                errno = getattr(err.reason, "errno", None)  # type: ignore[attr-defined]
 
             if errno in skip_errnos:
                 skip(f"Skipping test due to known errno and error {err}")

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -141,7 +141,11 @@ def get_result(self):
         """ compute the results """
         # dispatch to agg
         if is_list_like(self.f) or is_dict_like(self.f):
-            return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds)
+            # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets
+            # multiple values for keyword argument "axis"
+            return self.obj.aggregate(  # type: ignore[misc]
+                self.f, axis=self.axis, *self.args, **self.kwds
+            )
 
         # all empty
         if len(self.columns) == 0 and len(self.index) == 0:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -431,7 +431,9 @@ def _validate_comparison_value(self, other):
                 raise InvalidComparison(other)
 
         if isinstance(other, self._recognized_scalars) or other is NaT:
-            other = self._scalar_type(other)
+            # pandas\core\arrays\datetimelike.py:432: error: Too many arguments
+            # for "object"  [call-arg]
+            other = self._scalar_type(other)  # type: ignore[call-arg]
             try:
                 self._check_compatible_with(other)
             except TypeError as err:
@@ -491,14 +493,18 @@ def _validate_shift_value(self, fill_value):
         if is_valid_nat_for_dtype(fill_value, self.dtype):
             fill_value = NaT
         elif isinstance(fill_value, self._recognized_scalars):
-            fill_value = self._scalar_type(fill_value)
+            # pandas\core\arrays\datetimelike.py:746: error: Too many arguments
+            # for "object"  [call-arg]
+            fill_value = self._scalar_type(fill_value)  # type: ignore[call-arg]
         else:
             # only warn if we're not going to raise
             if self._scalar_type is Period and lib.is_integer(fill_value):
                 # kludge for #31971 since Period(integer) tries to cast to str
                 new_fill = Period._from_ordinal(fill_value, freq=self.freq)
             else:
-                new_fill = self._scalar_type(fill_value)
+                # pandas\core\arrays\datetimelike.py:753: error: Too many
+                # arguments for "object"  [call-arg]
+                new_fill = self._scalar_type(fill_value)  # type: ignore[call-arg]
 
             # stacklevel here is chosen to be correct when called from
             #  DataFrame.shift or Series.shift

diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -186,7 +186,10 @@ def __init__(self, values, copy=False):
         values = extract_array(values)
 
         super().__init__(values, copy=copy)
-        self._dtype = StringDtype()
+        # pandas\core\arrays\string_.py:188: error: Incompatible types in
+        # assignment (expression has type "StringDtype", variable has type
+        # "PandasDtype")  [assignment]
+        self._dtype = StringDtype()  # type: ignore[assignment]
         if not isinstance(values, type(self)):
             self._validate()
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -95,7 +95,9 @@ def __sizeof__(self):
         either a value or Series of values
         """
         if hasattr(self, "memory_usage"):
-            mem = self.memory_usage(deep=True)
+            # pandas\core\base.py:84: error: "PandasObject" has no attribute
+            # "memory_usage"  [attr-defined]
+            mem = self.memory_usage(deep=True)  # type: ignore[attr-defined]
             return int(mem if is_scalar(mem) else mem.sum())
 
         # no memory_usage attribute, so fall back to object's 'sizeof'
@@ -204,32 +206,65 @@ def _selection_list(self):
 
     @cache_readonly
     def _selected_obj(self):
-        if self._selection is None or isinstance(self.obj, ABCSeries):
-            return self.obj
+        # pandas\core\base.py:195: error: "SelectionMixin" has no attribute
+        # "obj"  [attr-defined]
+        if self._selection is None or isinstance(
+            self.obj, ABCSeries  # type: ignore[attr-defined]
+        ):
+            # pandas\core\base.py:194: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
+            return self.obj  # type: ignore[attr-defined]
         else:
-            return self.obj[self._selection]
+            # pandas\core\base.py:204: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
+            return self.obj[self._selection]  # type: ignore[attr-defined]
 
     @cache_readonly
     def ndim(self) -> int:
         return self._selected_obj.ndim
 
     @cache_readonly
     def _obj_with_exclusions(self):
-        if self._selection is not None and isinstance(self.obj, ABCDataFrame):
-            return self.obj.reindex(columns=self._selection_list)
+        # pandas\core\base.py:209: error: "SelectionMixin" has no attribute
+        # "obj"  [attr-defined]
+        if self._selection is not None and isinstance(
+            self.obj, ABCDataFrame  # type: ignore[attr-defined]
+        ):
+            # pandas\core\base.py:217: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
+            return self.obj.reindex(  # type: ignore[attr-defined]
+                columns=self._selection_list
+            )
+
+        # pandas\core\base.py:207: error: "SelectionMixin" has no attribute
+        # "exclusions"  [attr-defined]
+        if len(self.exclusions) > 0:  # type: ignore[attr-defined]
+            # pandas\core\base.py:208: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
 
-        if len(self.exclusions) > 0:
-            return self.obj.drop(self.exclusions, axis=1)
+            # pandas\core\base.py:208: error: "SelectionMixin" has no attribute
+            # "exclusions"  [attr-defined]
+            return self.obj.drop(self.exclusions, axis=1)  # type: ignore[attr-defined]
         else:
-            return self.obj
+            # pandas\core\base.py:210: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
+            return self.obj  # type: ignore[attr-defined]
 
     def __getitem__(self, key):
         if self._selection is not None:
             raise IndexError(f"Column(s) {self._selection} already selected")
 
         if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)):
-            if len(self.obj.columns.intersection(key)) != len(key):
-                bad_keys = list(set(key).difference(self.obj.columns))
+            # pandas\core\base.py:217: error: "SelectionMixin" has no attribute
+            # "obj"  [attr-defined]
+            if len(
+                self.obj.columns.intersection(key)  # type: ignore[attr-defined]
+            ) != len(key):
+                # pandas\core\base.py:218: error: "SelectionMixin" has no
+                # attribute "obj"  [attr-defined]
+                bad_keys = list(
+                    set(key).difference(self.obj.columns)  # type: ignore[attr-defined]
+                )
                 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
             return self._gotitem(list(key), ndim=2)
 
@@ -559,7 +594,11 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
               dtype='datetime64[ns]')
         """
         if is_extension_array_dtype(self.dtype):
-            return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
+            # pandas\core\base.py:837: error: Too many arguments for "to_numpy"
+            # of "ExtensionArray"  [call-arg]
+            return self.array.to_numpy(  # type: ignore[call-arg]
+                dtype, copy=copy, na_value=na_value, **kwargs
+            )
         elif kwargs:
             bad_keys = list(kwargs.keys())[0]
             raise TypeError(
@@ -851,8 +890,15 @@ def _map_values(self, mapper, na_action=None):
             if is_categorical_dtype(self.dtype):
                 # use the built in categorical series mapper which saves
                 # time by mapping the categories instead of all values
-                self = cast("Categorical", self)
-                return self._values.map(mapper)
+
+                # pandas\core\base.py:893: error: Incompatible types in
+                # assignment (expression has type "Categorical", variable has
+                # type "IndexOpsMixin")  [assignment]
+                self = cast("Categorical", self)  # type: ignore[assignment]
+                # pandas\core\base.py:894: error: Item "ExtensionArray" of
+                # "Union[ExtensionArray, Any]" has no attribute "map"
+                # [union-attr]
+                return self._values.map(mapper)  # type: ignore[union-attr]
 
             values = self._values
 
@@ -869,7 +915,9 @@ def _map_values(self, mapper, na_action=None):
                 raise NotImplementedError
             map_f = lambda values, f: values.map(f)
         else:
-            values = self.astype(object)._values
+            # pandas\core\base.py:1142: error: "IndexOpsMixin" has no attribute
+            # "astype"  [attr-defined]
+            values = self.astype(object)._values  # type: ignore[attr-defined]
             if na_action == "ignore":
 
                 def map_f(values, f):
@@ -1111,7 +1159,9 @@ def memory_usage(self, deep=False):
         are not components of the array if deep=False or if used on PyPy
         """
         if hasattr(self.array, "memory_usage"):
-            return self.array.memory_usage(deep=deep)
+            # pandas\core\base.py:1379: error: "ExtensionArray" has no
+            # attribute "memory_usage"  [attr-defined]
+            return self.array.memory_usage(deep=deep)  # type: ignore[attr-defined]
 
         v = self.array.nbytes
         if deep and is_object_dtype(self) and not PYPY:
@@ -1245,7 +1295,9 @@ def searchsorted(self, value, side="left", sorter=None) -> np.ndarray:
 
     def drop_duplicates(self, keep="first"):
         duplicated = self.duplicated(keep=keep)
-        result = self[np.logical_not(duplicated)]
+        # pandas\core\base.py:1507: error: Value of type "IndexOpsMixin" is not
+        # indexable  [index]
+        result = self[np.logical_not(duplicated)]  # type: ignore[index]
         return result
 
     def duplicated(self, keep="first"):

diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -659,7 +659,11 @@ def visit_Call(self, node, side=None, **kwargs):
                     raise
 
         if res is None:
-            raise ValueError(f"Invalid function call {node.func.id}")
+            # pandas\core\computation\expr.py:663: error: "expr" has no
+            # attribute "id"  [attr-defined]
+            raise ValueError(
+                f"Invalid function call {node.func.id}"  # type: ignore[attr-defined]
+            )
         if hasattr(res, "value"):
             res = res.value
 
@@ -680,7 +684,12 @@ def visit_Call(self, node, side=None, **kwargs):
 
             for key in node.keywords:
                 if not isinstance(key, ast.keyword):
-                    raise ValueError(f"keyword error in function call '{node.func.id}'")
+                    # pandas\core\computation\expr.py:684: error: "expr" has no
+                    # attribute "id"  [attr-defined]
+                    raise ValueError(
+                        "keyword error in function call "  # type: ignore[attr-defined]
+                        f"'{node.func.id}'"
+                    )
 
                 if key.arg:
                     kwargs[key.arg] = self.visit(key.value).value