Merge branch 'main' into 37715-remove-mypy-ignore-VI

Rylie-W · May 6, 2023 · 6a18b99 · 6a18b99
2 parents 3c50078 + 073156e
commit 6a18b99
Show file tree

Hide file tree

Showing 53 changed files with 218 additions and 220 deletions.
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
@@ -17,7 +17,7 @@
 try:
     import pandas._testing as tm
 except ImportError:
-    import pandas.util.testing as tm  # noqa:F401
+    import pandas.util.testing as tm  # noqa: F401
 
 
 numeric_dtypes = [

diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst
@@ -96,7 +96,7 @@ be calculated with :meth:`~Rolling.apply` by specifying a separate column of wei
        return arr
 
    df = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
-   df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba")  # noqa:E501
+   df.rolling(2, method="table", min_periods=0).apply(weighted_mean, raw=True, engine="numba")  # noqa: E501
 
 .. versionadded:: 1.3
 

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -21,7 +21,7 @@
 try:
     # numpy compat
     from pandas.compat import (
-        is_numpy_dev as _is_numpy_dev,  # pyright: ignore # noqa:F401
+        is_numpy_dev as _is_numpy_dev,  # pyright: ignore[reportUnusedImport] # noqa: F401,E501
     )
 except ImportError as _err:  # pragma: no cover
     _module = _err.name
@@ -41,7 +41,7 @@
 )
 
 # let init-time option registration happen
-import pandas.core.config_init  # pyright: ignore # noqa:F401
+import pandas.core.config_init  # pyright: ignore[reportUnusedImport] # noqa: F401
 
 from pandas.core.api import (
     # dtype

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -17,7 +17,7 @@
     "using_copy_on_write",
 ]
 from pandas._config import config
-from pandas._config import dates  # pyright: ignore # noqa:F401
+from pandas._config import dates  # pyright: ignore[reportUnusedImport]  # noqa: F401
 from pandas._config.config import (
     _global_config,
     describe_option,

diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py
@@ -13,8 +13,8 @@
 # Below imports needs to happen first to ensure pandas top level
 # module gets monkeypatched with the pandas_datetime_CAPI
 # see pandas_datetime_exec in pd_datetime.c
-import pandas._libs.pandas_parser  # noqa # isort: skip # type: ignore[reportUnusedImport]
-import pandas._libs.pandas_datetime  # noqa # isort: skip # type: ignore[reportUnusedImport]
+import pandas._libs.pandas_parser  # noqa: F401,E501 # isort: skip # type: ignore[reportUnusedImport]
+import pandas._libs.pandas_datetime  # noqa: F401,E501 # isort: skip # type: ignore[reportUnusedImport]
 from pandas._libs.interval import Interval
 from pandas._libs.tslibs import (
     NaT,

diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -30,7 +30,7 @@ from enum import Enum
 class _NoDefault(Enum):
     no_default = ...
 
-no_default: Final = _NoDefault.no_default  # noqa
+no_default: Final = _NoDefault.no_default  # noqa: PYI015
 NoDefault = Literal[_NoDefault.no_default]
 
 i8max: int

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -534,7 +534,7 @@ def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
     # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
     # incompatible with return type "ExtensionDtype" in supertype
     # "ExtensionArray"
-    def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype:  # type: ignore[override]  # noqa:E501
+    def dtype(self) -> np.dtype[np.datetime64] | DatetimeTZDtype:  # type: ignore[override]  # noqa: E501
         """
         The dtype for the DatetimeArray.
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1200,7 +1200,7 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
     Traceback (most recent call last):
         ...
     TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'
-    """  # noqa:E501
+    """  # noqa: E501
     msg = (
         f"The '{dtype.name}' dtype has no unit. "
         f"Please pass in '{dtype.name}[ns]' instead."

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -902,7 +902,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype):
     # error: Incompatible types in assignment (expression has type
     # "Dict[int, PandasExtensionDtype]", base class "PandasExtensionDtype"
     # defined the type as "Dict[str, PandasExtensionDtype]")  [assignment]
-    _cache_dtypes: dict[BaseOffset, PeriodDtype] = {}  # type: ignore[assignment] # noqa:E501
+    _cache_dtypes: dict[BaseOffset, PeriodDtype] = {}  # type: ignore[assignment] # noqa: E501
     __hash__ = PeriodDtypeBase.__hash__
     _freq: BaseOffset
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -10431,7 +10431,7 @@ def corr(
               dogs  cats
         dogs   1.0   NaN
         cats   NaN   1.0
-        """  # noqa:E501
+        """  # noqa: E501
         data = self._get_numeric_data() if numeric_only else self
         cols = data.columns
         idx = cols.copy()
@@ -10676,7 +10676,7 @@ def corrwith(
         d    1.0
         e    NaN
         dtype: float64
-        """  # noqa:E501
+        """  # noqa: E501
         axis = self._get_axis_number(axis)
         this = self._get_numeric_data() if numeric_only else self
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -210,7 +210,7 @@
 _shared_doc_kwargs = {
     "axes": "keywords for axes",
     "klass": "Series/DataFrame",
-    "axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame",  # noqa:E501
+    "axes_single_arg": "{0 or 'index'} for Series, {0 or 'index', 1 or 'columns'} for DataFrame",  # noqa: E501
     "inplace": """
     inplace : bool, default False
         If True, performs operation inplace and returns None.""",
@@ -2904,7 +2904,7 @@ def to_sql(
         >>> with engine.connect() as conn:
         ...   conn.execute(text("SELECT * FROM integers")).fetchall()
         [(1,), (None,), (2,)]
-        """  # noqa:E501
+        """  # noqa: E501
         from pandas.io import sql
 
         return sql.to_sql(
@@ -5901,7 +5901,7 @@ def sample(
                 num_legs  num_wings  num_specimen_seen
         falcon         2          2                 10
         fish           0          0                  8
-        """  # noqa:E501
+        """  # noqa: E501
         if axis is None:
             axis = 0
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -4227,7 +4227,7 @@ def sample(
         5  black  5
         2   blue  2
         0    red  0
-        """  # noqa:E501
+        """  # noqa: E501
         if self._selected_obj.empty:
             # GH48459 prevent ValueError when object is empty
             return self._selected_obj

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1537,7 +1537,7 @@ def delete(self, loc) -> list[Block]:
             else:
                 # No overload variant of "__getitem__" of "ExtensionArray" matches
                 # argument type "Tuple[slice, slice]"
-                values = self.values[previous_loc + 1 : idx, :]  # type: ignore[call-overload]  # noqa
+                values = self.values[previous_loc + 1 : idx, :]  # type: ignore[call-overload]  # noqa: E501
                 locs = mgr_locs_arr[previous_loc + 1 : idx]
                 nb = type(self)(
                     values, placement=BlockPlacement(locs), ndim=self.ndim, refs=refs

diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py
@@ -193,7 +193,7 @@ def _select_data(self) -> DataFrame:
                 include=self.include,
                 exclude=self.exclude,
             )
-        return data  # pyright: ignore
+        return data  # pyright: ignore[reportGeneralTypeIssues]
 
 
 def reorder_columns(ldesc: Sequence[Series]) -> list[Hashable]:

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2682,7 +2682,7 @@ def corr(
         >>> s2 = pd.Series([.3, .6, .0, .1])
         >>> s1.corr(s2, method=histogram_intersection)
         0.3
-        """  # noqa:E501
+        """  # noqa: E501
         this, other = self.align(other, join="inner", copy=False)
         if len(this) == 0:
             return np.nan

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
@@ -218,7 +218,7 @@ def to_numeric(
         values = ensure_object(values)
         coerce_numeric = errors not in ("ignore", "raise")
         try:
-            values, new_mask = lib.maybe_convert_numeric(  # type: ignore[call-overload]  # noqa
+            values, new_mask = lib.maybe_convert_numeric(  # type: ignore[call-overload]  # noqa: E501
                 values,
                 set(),
                 coerce_numeric=coerce_numeric,

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -161,7 +161,7 @@ def __init__(self) -> None:
         import pyarrow.parquet
 
         # import utils to register the pyarrow extension types
-        import pandas.core.arrays.arrow.extension_types  # pyright: ignore # noqa:F401
+        import pandas.core.arrays.arrow.extension_types  # pyright: ignore[reportUnusedImport] # noqa: F401,E501
 
         self.api = pyarrow
 
@@ -243,7 +243,7 @@ def read(
             mapping = _arrow_dtype_mapping()
             to_pandas_kwargs["types_mapper"] = mapping.get
         elif dtype_backend == "pyarrow":
-            to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]  # noqa
+            to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]  # noqa: E501
 
         manager = get_option("mode.data_manager")
         if manager == "array":

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -713,7 +713,7 @@ def _infer_types(
                     values,
                     na_values,
                     False,
-                    convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]  # noqa
+                    convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]  # noqa: E501
                 )
             except (ValueError, TypeError):
                 # e.g. encountering datetime string gets ValueError
@@ -749,7 +749,7 @@ def _infer_types(
                 np.asarray(values),
                 true_values=self.true_values,
                 false_values=self.false_values,
-                convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]  # noqa
+                convert_to_masked_nullable=non_default_dtype_backend,  # type: ignore[arg-type]  # noqa: E501
             )
             if result.dtype == np.bool_ and non_default_dtype_backend:
                 if bool_mask is None:
@@ -812,7 +812,7 @@ def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLi
                 if is_bool_dtype(cast_type):
                     # error: Unexpected keyword argument "true_values" for
                     # "_from_sequence_of_strings" of "ExtensionArray"
-                    return array_type._from_sequence_of_strings(  # type: ignore[call-arg]  # noqa:E501
+                    return array_type._from_sequence_of_strings(  # type: ignore[call-arg]  # noqa: E501
                         values,
                         dtype=cast_type,
                         true_values=self.true_values,

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -365,25 +365,34 @@ def _convert_data(
             clean_dtypes,
         )
 
+    @cache_readonly
+    def _have_mi_columns(self) -> bool:
+        if self.header is None:
+            return False
+
+        header = self.header
+        if isinstance(header, (list, tuple, np.ndarray)):
+            return len(header) > 1
+        else:
+            return False
+
     def _infer_columns(
         self,
     ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]:
         names = self.names
         num_original_columns = 0
         clear_buffer = True
         unnamed_cols: set[Scalar | None] = set()
-        self._header_line = None
 
         if self.header is not None:
             header = self.header
+            have_mi_columns = self._have_mi_columns
 
             if isinstance(header, (list, tuple, np.ndarray)):
-                have_mi_columns = len(header) > 1
                 # we have a mi columns, so read an extra line
                 if have_mi_columns:
                     header = list(header) + [header[-1] + 1]
             else:
-                have_mi_columns = False
                 header = [header]
 
             columns: list[list[Scalar | None]] = []
@@ -531,27 +540,14 @@ def _infer_columns(
                     columns, columns[0], num_original_columns
                 )
         else:
-            try:
-                line = self._buffered_line()
-
-            except StopIteration as err:
-                if not names:
-                    raise EmptyDataError("No columns to parse from file") from err
-
-                line = names[:]
-
-            # Store line, otherwise it is lost for guessing the index
-            self._header_line = line
-            ncols = len(line)
+            ncols = len(self._header_line)
             num_original_columns = ncols
 
             if not names:
                 columns = [list(range(ncols))]
-                columns = self._handle_usecols(
-                    columns, columns[0], num_original_columns
-                )
-            elif self.usecols is None or len(names) >= num_original_columns:
-                columns = self._handle_usecols([names], names, num_original_columns)
+                columns = self._handle_usecols(columns, columns[0], ncols)
+            elif self.usecols is None or len(names) >= ncols:
+                columns = self._handle_usecols([names], names, ncols)
                 num_original_columns = len(names)
             elif not callable(self.usecols) and len(names) != len(self.usecols):
                 raise ValueError(
@@ -560,12 +556,26 @@ def _infer_columns(
                 )
             else:
                 # Ignore output but set used columns.
-                self._handle_usecols([names], names, ncols)
                 columns = [names]
-                num_original_columns = ncols
+                self._handle_usecols(columns, columns[0], ncols)
 
         return columns, num_original_columns, unnamed_cols
 
+    @cache_readonly
+    def _header_line(self):
+        # Store line for reuse in _get_index_name
+        if self.header is not None:
+            return None
+
+        try:
+            line = self._buffered_line()
+        except StopIteration as err:
+            if not self.names:
+                raise EmptyDataError("No columns to parse from file") from err
+
+            line = self.names[:]
+        return line
+
     def _handle_usecols(
         self,
         columns: list[list[Scalar | None]],

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -758,7 +758,7 @@ def to_sql(
     rows as stipulated in the
     `sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
     `SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
-    """  # noqa:E501
+    """  # noqa: E501
     if if_exists not in ("fail", "replace", "append"):
         raise ValueError(f"'{if_exists}' is not valid for if_exists")