diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f68080d05bea6..a36420556ae24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,12 +15,12 @@ jobs: runs-on: ubuntu-latest steps: + - name: Setting conda path + run: echo "::add-path::${HOME}/miniconda3/bin" + - name: Checkout uses: actions/checkout@v1 - - name: Setting conda path - run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}" - - name: Looking for unwanted patterns run: ci/code_checks.sh patterns if: true diff --git a/ci/deps/azure-36-locale.yaml b/ci/deps/azure-36-locale.yaml index c3c94e365c259..14cc4f2726e96 100644 --- a/ci/deps/azure-36-locale.yaml +++ b/ci/deps/azure-36-locale.yaml @@ -27,6 +27,9 @@ dependencies: - xlrd=1.1.0 - xlsxwriter=0.9.8 - xlwt=1.2.0 + # lowest supported version of pyarrow (putting it here instead of in + # azure-36-minimum_versions because it needs numpy >= 1.14) + - pyarrow=0.12 - pip - pip: - html5lib==1.0b2 diff --git a/ci/deps/azure-37-locale.yaml b/ci/deps/azure-37-locale.yaml index 3319afed173b5..a10fa0904a451 100644 --- a/ci/deps/azure-37-locale.yaml +++ b/ci/deps/azure-37-locale.yaml @@ -1,6 +1,5 @@ name: pandas-dev channels: - - defaults - conda-forge dependencies: - python=3.7.* @@ -33,3 +32,4 @@ dependencies: - xlrd - xlsxwriter - xlwt + - pyarrow>=0.15 diff --git a/ci/deps/travis-36-cov.yaml b/ci/deps/travis-36-cov.yaml index 04ff06a99a1f4..c1403f8eb8409 100644 --- a/ci/deps/travis-36-cov.yaml +++ b/ci/deps/travis-36-cov.yaml @@ -33,8 +33,7 @@ dependencies: # https://github.com/pydata/pandas-gbq/issues/271 - google-cloud-bigquery<=1.11 - psycopg2 - # pyarrow segfaults on load: https://github.com/pandas-dev/pandas/issues/26716 - # - pyarrow=0.9.0 + - pyarrow>=0.12.0 - pymysql - pytables - python-snappy diff --git a/ci/print_skipped.py b/ci/print_skipped.py index 51a2460e05fab..8fd92ab9622ba 100755 --- a/ci/print_skipped.py +++ b/ci/print_skipped.py @@ -5,12 +5,12 @@ def main(filename): if not os.path.isfile(filename): - return + raise RuntimeError(f"Could not find junit file {filename!r}") tree = et.parse(filename) root = tree.getroot() current_class = "" - for el in root.findall("testcase"): + for el in root.iter("testcase"): cn = el.attrib["classname"] for sk in el.findall("skipped"): old_class = current_class @@ -32,8 +32,7 @@ def main(filename): print("-" * 80) else: print( - "#{i} {class_name}.{test_name}: {message}".format( - **dict(test_data, i=i) - ) + f"#{i} {test_data['class_name']}." + f"{test_data['test_name']}: {test_data['message']}" ) i += 1 diff --git a/doc/redirects.csv b/doc/redirects.csv index 599ad6d28a8f5..61902f3134a4d 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -1119,7 +1119,6 @@ generated/pandas.Series.pow,../reference/api/pandas.Series.pow generated/pandas.Series.prod,../reference/api/pandas.Series.prod generated/pandas.Series.product,../reference/api/pandas.Series.product generated/pandas.Series.ptp,../reference/api/pandas.Series.ptp -generated/pandas.Series.put,../reference/api/pandas.Series.put generated/pandas.Series.quantile,../reference/api/pandas.Series.quantile generated/pandas.Series.radd,../reference/api/pandas.Series.radd generated/pandas.Series.rank,../reference/api/pandas.Series.rank diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 2485b94ab4d09..e13b4ed98a38b 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -39,7 +39,6 @@ Attributes Series.empty Series.dtypes Series.name - Series.put Conversion ---------- diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4eb5f350cad8e..5c9543580be26 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -565,6 +565,9 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) - Removed previously deprecated :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`) - Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) +- :meth:`Series.str.partition` and :meth:`Series.str.rpartition` no longer accept "pat" keyword, use "sep" instead (:issue:`23767`) +- Removed the previously deprecated :meth:`Series.put` (:issue:`27106`) +- Removed the previously deprecated :attr:`Series.real`, :attr:`Series.imag` (:issue:`27106`) - Removed the previously deprecated :meth:`Series.to_dense`, :meth:`DataFrame.to_dense` (:issue:`26684`) - Removed the previously deprecated :meth:`Index.dtype_str`, use ``str(index.dtype)`` instead (:issue:`27106`) - :meth:`Categorical.ravel` returns a :class:`Categorical` instead of a ``ndarray`` (:issue:`27199`) diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py index 1a5ab99b5a94f..fdc5a6b283ba8 100755 --- a/doc/sphinxext/announce.py +++ b/doc/sphinxext/announce.py @@ -113,13 +113,13 @@ def build_string(revision_range, heading="Contributors"): components["authors"] = "* " + "\n* ".join(components["authors"]) tpl = textwrap.dedent( - """\ - {heading} - {uline} + f"""\ + {components['heading']} + {components['uline']} - {author_message} - {authors}""" - ).format(**components) + {components['author_message']} + {components['authors']}""" + ) return tpl diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py index 1a064f71792e9..d9ba2bb2cfb07 100644 --- a/doc/sphinxext/contributors.py +++ b/doc/sphinxext/contributors.py @@ -27,7 +27,7 @@ def run(self): except git.GitCommandError as exc: return [ self.state.document.reporter.warning( - "Cannot find contributors for range '{}': {}".format(range_, exc), + f"Cannot find contributors for range {repr(range_)}: {exc}", line=self.lineno, ) ] diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 8f0f4e17df2f9..adc7a650b745f 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -63,11 +63,6 @@ from pandas.errors import (ParserError, DtypeWarning, lzma = _import_lzma() -# Import CParserError as alias of ParserError for backwards compatibility. -# Ultimately, we want to remove this import. See gh-12665 and gh-14479. -CParserError = ParserError - - cdef: float64_t INF = np.inf float64_t NEGINF = -INF diff --git a/pandas/compat/chainmap.py b/pandas/compat/chainmap.py index 84824207de2a9..479eddf0c0536 100644 --- a/pandas/compat/chainmap.py +++ b/pandas/compat/chainmap.py @@ -10,6 +10,12 @@ def __setitem__(self, key, value): self.maps[0][key] = value def __delitem__(self, key): + """ + Raises + ------ + KeyError + If `key` doesn't exist. + """ for mapping in self.maps: if key in mapping: del mapping[key] diff --git a/pandas/core/base.py b/pandas/core/base.py index 066a7628be364..5e613849ba8d5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -114,9 +114,7 @@ def __setattr__(self, key, value): or key in type(self).__dict__ or getattr(self, key, None) is not None ): - raise AttributeError( - "You cannot add any new attribute '{key}'".format(key=key) - ) + raise AttributeError(f"You cannot add any new attribute '{key}'") object.__setattr__(self, key, value) @@ -220,28 +218,22 @@ def _obj_with_exclusions(self): def __getitem__(self, key): if self._selection is not None: - raise IndexError( - "Column(s) {selection} already selected".format( - selection=self._selection - ) - ) + raise IndexError(f"Column(s) {self._selection} already selected") if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass, np.ndarray)): if len(self.obj.columns.intersection(key)) != len(key): bad_keys = list(set(key).difference(self.obj.columns)) - raise KeyError( - "Columns not found: {missing}".format(missing=str(bad_keys)[1:-1]) - ) + raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") return self._gotitem(list(key), ndim=2) elif not getattr(self, "as_index", False): if key not in self.obj.columns: - raise KeyError("Column not found: {key}".format(key=key)) + raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=2) else: if key not in self.obj: - raise KeyError("Column not found: {key}".format(key=key)) + raise KeyError(f"Column not found: {key}") return self._gotitem(key, ndim=1) def _gotitem(self, key, ndim, subset=None): @@ -293,8 +285,7 @@ def _try_aggregate_string_function(self, arg: str, *args, **kwargs): return f(self, *args, **kwargs) raise AttributeError( - "'{arg}' is not a valid function for " - "'{cls}' object".format(arg=arg, cls=type(self).__name__) + f"'{arg}' is not a valid function for '{type(self).__name__}' object" ) def _aggregate(self, arg, *args, **kwargs): @@ -359,7 +350,7 @@ def _aggregate(self, arg, *args, **kwargs): elif isinstance(obj, ABCSeries): raise SpecificationError("nested renamer is not supported") elif isinstance(obj, ABCDataFrame) and k not in obj.columns: - raise KeyError("Column '{col}' does not exist!".format(col=k)) + raise KeyError(f"Column '{k}' does not exist!") arg = new_arg @@ -1101,9 +1092,7 @@ def _reduce( func = getattr(self, name, None) if func is None: raise TypeError( - "{klass} cannot perform the operation {op}".format( - klass=type(self).__name__, op=name - ) + f"{type(self).__name__} cannot perform the operation {name}" ) return func(skipna=skipna, **kwds) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e19bf9c1c39ea..ea73241e5d078 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -18,6 +18,7 @@ Optional, Sequence, Set, + Tuple, Union, ) import warnings @@ -66,7 +67,7 @@ from pandas.core.dtypes.missing import isna, notna import pandas as pd -from pandas._typing import Dtype, FilePathOrBuffer, JSONSerializable +from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries, JSONSerializable from pandas.core import missing, nanops import pandas.core.algorithms as algos from pandas.core.base import PandasObject, SelectionMixin @@ -362,7 +363,7 @@ def _construct_axes_dict_from(self, axes, **kwargs): return d def _construct_axes_from_arguments( - self, args, kwargs, require_all=False, sentinel=None + self, args, kwargs, require_all: bool = False, sentinel=None ): """Construct and returns axes if supplied in args/kwargs. @@ -509,7 +510,7 @@ def _stat_axis(self): return getattr(self, self._stat_axis_name) @property - def shape(self): + def shape(self) -> Tuple[int, ...]: """ Return a tuple of axis dimensions """ @@ -572,12 +573,12 @@ def size(self): return np.prod(self.shape) @property - def _selected_obj(self): + def _selected_obj(self: FrameOrSeries) -> FrameOrSeries: """ internal compat with SelectionMixin """ return self @property - def _obj_with_exclusions(self): + def _obj_with_exclusions(self: FrameOrSeries) -> FrameOrSeries: """ internal compat with SelectionMixin """ return self @@ -1395,7 +1396,7 @@ def _set_axis_name(self, name, axis=0, inplace=False): # ---------------------------------------------------------------------- # Comparison Methods - def _indexed_same(self, other): + def _indexed_same(self, other) -> bool: return all( self._get_axis(a).equals(other._get_axis(a)) for a in self._AXIS_ORDERS ) @@ -1608,7 +1609,7 @@ def _is_level_reference(self, key, axis=0): and not self._is_label_reference(key, axis=axis) ) - def _is_label_reference(self, key, axis=0): + def _is_label_reference(self, key, axis=0) -> bool_t: """ Test whether a key is a label reference for a given axis. @@ -1637,7 +1638,7 @@ def _is_label_reference(self, key, axis=0): and any(key in self.axes[ax] for ax in other_axes) ) - def _is_label_or_level_reference(self, key, axis=0): + def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t: """ Test whether a key is a label or level reference for a given axis. @@ -1661,7 +1662,7 @@ def _is_label_or_level_reference(self, key, axis=0): key, axis=axis ) - def _check_label_or_level_ambiguity(self, key, axis=0): + def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: """ Check whether `key` is ambiguous. @@ -1710,7 +1711,7 @@ def _check_label_or_level_ambiguity(self, key, axis=0): ) raise ValueError(msg) - def _get_label_or_level_values(self, key, axis=0): + def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: """ Return a 1-D array of values associated with `key`, a label or level from the given `axis`. @@ -1782,7 +1783,7 @@ def _get_label_or_level_values(self, key, axis=0): return values - def _drop_labels_or_levels(self, keys, axis=0): + def _drop_labels_or_levels(self, keys, axis: int = 0): """ Drop labels and/or levels for the given `axis`. @@ -1913,12 +1914,12 @@ def __len__(self) -> int: """Returns length of info axis""" return len(self._info_axis) - def __contains__(self, key): + def __contains__(self, key) -> bool_t: """True if the key is in the info axis""" return key in self._info_axis @property - def empty(self): + def empty(self) -> bool_t: """ Indicator whether DataFrame is empty. @@ -1991,7 +1992,7 @@ def __array_wrap__(self, result, context=None): # ---------------------------------------------------------------------- # Picklability - def __getstate__(self): + def __getstate__(self) -> Dict[str, Any]: meta = {k: getattr(self, k, None) for k in self._metadata} return dict( _data=self._data, @@ -2412,9 +2413,13 @@ def to_hdf( complib: Optional[str] = None, append: bool_t = False, format: Optional[str] = None, + index: bool_t = True, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + dropna: Optional[bool_t] = None, + data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", - **kwargs, ): """ Write the contained data to an HDF5 file using HDFStore. @@ -2471,15 +2476,16 @@ def to_hdf( See the errors argument for :func:`open` for a full list of options. encoding : str, default "UTF-8" + min_itemsize : dict or int, optional + Map column names to minimum string sizes for columns. + nan_rep : Any, optional + How to represent null values as str. + Not allowed with append=True. data_columns : list of columns or True, optional List of columns to create as indexed data columns for on-disk queries, or True to use all columns. By default only the axes of the object are indexed. See :ref:`io.hdf5-query-data-columns`. Applicable only to format='table'. - fletcher32 : bool, default False - If applying compression use the fletcher32 checksum. - dropna : bool, default False - If true, ALL nan rows will not be written to store. See Also -------- @@ -2530,9 +2536,13 @@ def to_hdf( complib=complib, append=append, format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, errors=errors, encoding=encoding, - **kwargs, ) def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): @@ -2582,13 +2592,13 @@ def to_sql( name: str, con, schema=None, - if_exists="fail", - index=True, + if_exists: str = "fail", + index: bool_t = True, index_label=None, chunksize=None, dtype=None, method=None, - ): + ) -> None: """ Write records stored in a DataFrame to a SQL database. @@ -2735,7 +2745,12 @@ def to_sql( method=method, ) - def to_pickle(self, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL): + def to_pickle( + self, + path, + compression: Optional[str] = "infer", + protocol: int = pickle.HIGHEST_PROTOCOL, + ) -> None: """ Pickle (serialize) object to file. @@ -2791,7 +2806,7 @@ def to_pickle(self, path, compression="infer", protocol=pickle.HIGHEST_PROTOCOL) to_pickle(self, path, compression=compression, protocol=protocol) - def to_clipboard(self, excel=True, sep=None, **kwargs): + def to_clipboard(self, excel: bool_t = True, sep: Optional[str] = None, **kwargs): r""" Copy object to the system clipboard. @@ -3276,7 +3291,7 @@ def to_csv( # Fancy Indexing @classmethod - def _create_indexer(cls, name, indexer): + def _create_indexer(cls, name: str, indexer) -> None: """Create an indexer like _name in the class.""" if getattr(cls, name, None) is None: _indexer = functools.partial(indexer, name) @@ -3285,24 +3300,24 @@ def _create_indexer(cls, name, indexer): # ---------------------------------------------------------------------- # Lookup Caching - def _set_as_cached(self, item, cacher): + def _set_as_cached(self, item, cacher) -> None: """Set the _cacher attribute on the calling object with a weakref to cacher. """ self._cacher = (item, weakref.ref(cacher)) - def _reset_cacher(self): + def _reset_cacher(self) -> None: """Reset the cacher.""" if hasattr(self, "_cacher"): del self._cacher - def _maybe_cache_changed(self, item, value): + def _maybe_cache_changed(self, item, value) -> None: """The object has called back to us saying maybe it has changed. """ self._data.set(item, value) @property - def _is_cached(self): + def _is_cached(self) -> bool_t: """Return boolean indicating if self is cached or not.""" return getattr(self, "_cacher", None) is not None @@ -3313,7 +3328,9 @@ def _get_cacher(self): cacher = cacher[1]() return cacher - def _maybe_update_cacher(self, clear=False, verify_is_copy=True): + def _maybe_update_cacher( + self, clear: bool_t = False, verify_is_copy: bool_t = True + ) -> None: """ See if we need to update our parent cacher if clear, then clear our cache. @@ -3350,13 +3367,13 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): if clear: self._clear_item_cache() - def _clear_item_cache(self): + def _clear_item_cache(self) -> None: self._item_cache.clear() # ---------------------------------------------------------------------- # Indexing Methods - def take(self, indices, axis=0, is_copy=True, **kwargs): + def take(self, indices, axis=0, is_copy: bool_t = True, **kwargs): """ Return the elements in the given *positional* indices along an axis. @@ -3447,7 +3464,7 @@ class max_speed return result - def xs(self, key, axis=0, level=None, drop_level=True): + def xs(self, key, axis=0, level=None, drop_level: bool_t = True): """ Return cross-section from the Series/DataFrame. @@ -3552,9 +3569,9 @@ class animal locomotion loc, new_ax = labels.get_loc_level(key, level=level, drop_level=drop_level) # create the tuple of the indexer - indexer = [slice(None)] * self.ndim - indexer[axis] = loc - indexer = tuple(indexer) + _indexer = [slice(None)] * self.ndim + _indexer[axis] = loc + indexer = tuple(_indexer) result = self.iloc[indexer] setattr(result, result._get_axis_name(axis), new_ax) @@ -3655,11 +3672,11 @@ def _slice(self, slobj: slice, axis=0, kind=None): result._set_is_copy(self, copy=is_copy) return result - def _set_item(self, key, value): + def _set_item(self, key, value) -> None: self._data.set(key, value) self._clear_item_cache() - def _set_is_copy(self, ref=None, copy=True): + def _set_is_copy(self, ref=None, copy: bool_t = True) -> None: if not copy: self._is_copy = None else: @@ -3668,7 +3685,7 @@ def _set_is_copy(self, ref=None, copy=True): else: self._is_copy = None - def _check_is_chained_assignment_possible(self): + def _check_is_chained_assignment_possible(self) -> bool_t: """ Check if we are a view, have a cacher, and are of mixed type. If so, then force a setitem_copy check. @@ -3826,7 +3843,14 @@ def _is_view(self): """Return boolean indicating if self is view of another array """ return self._data.is_view - def reindex_like(self, other, method=None, copy=True, limit=None, tolerance=None): + def reindex_like( + self, + other, + method: Optional[str] = None, + copy: bool_t = True, + limit=None, + tolerance=None, + ): """ Return an object with matching indices as other object. @@ -3938,8 +3962,8 @@ def drop( index=None, columns=None, level=None, - inplace=False, - errors="raise", + inplace: bool_t = False, + errors: str = "raise", ): inplace = validate_bool_kwarg(inplace, "inplace") @@ -3967,7 +3991,7 @@ def drop( else: return obj - def _drop_axis(self, labels, axis, level=None, errors="raise"): + def _drop_axis(self, labels, axis, level=None, errors: str = "raise"): """ Drop labels from specified axis. Used in the ``drop`` method internally. @@ -4020,7 +4044,7 @@ def _drop_axis(self, labels, axis, level=None, errors="raise"): return result - def _update_inplace(self, result, verify_is_copy=True): + def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: """ Replace self internals with result. @@ -4037,7 +4061,7 @@ def _update_inplace(self, result, verify_is_copy=True): self._data = getattr(result, "_data", result) self._maybe_update_cacher(verify_is_copy=verify_is_copy) - def add_prefix(self, prefix): + def add_prefix(self, prefix: str): """ Prefix labels with string `prefix`. @@ -4096,7 +4120,7 @@ def add_prefix(self, prefix): mapper = {self._info_axis_name: f} return self.rename(**mapper) - def add_suffix(self, suffix): + def add_suffix(self, suffix: str): """ Suffix labels with string `suffix`. @@ -4160,9 +4184,9 @@ def sort_values( by=None, axis=0, ascending=True, - inplace=False, - kind="quicksort", - na_position="last", + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", ): """ Sort by the values along either axis. @@ -4257,11 +4281,11 @@ def sort_index( self, axis=0, level=None, - ascending=True, - inplace=False, - kind="quicksort", - na_position="last", - sort_remaining=True, + ascending: bool_t = True, + inplace: bool_t = False, + kind: str = "quicksort", + na_position: str = "last", + sort_remaining: bool_t = True, ): """ Sort object by labels (along an axis). @@ -4583,7 +4607,7 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy) return obj - def _needs_reindex_multi(self, axes, method, level): + def _needs_reindex_multi(self, axes, method, level) -> bool_t: """Check if we do need a multi reindex.""" return ( (com.count_not_none(*axes.values()) == self._AXIS_LEN) @@ -4596,7 +4620,11 @@ def _reindex_multi(self, axes, copy, fill_value): raise AbstractMethodError(self) def _reindex_with_indexers( - self, reindexers, fill_value=None, copy=False, allow_dups=False + self, + reindexers, + fill_value=None, + copy: bool_t = False, + allow_dups: bool_t = False, ): """allow_dups indicates an internal call here """ @@ -4628,7 +4656,13 @@ def _reindex_with_indexers( return self._constructor(new_data).__finalize__(self) - def filter(self, items=None, like=None, regex=None, axis=None): + def filter( + self, + items=None, + like: Optional[str] = None, + regex: Optional[str] = None, + axis=None, + ): """ Subset the dataframe rows or columns according to the specified index labels. @@ -4719,7 +4753,7 @@ def f(x): else: raise TypeError("Must pass either `items`, `like`, or `regex`") - def head(self, n=5): + def head(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: """ Return the first `n` rows. @@ -4778,7 +4812,7 @@ def head(self, n=5): return self.iloc[:n] - def tail(self, n=5): + def tail(self: FrameOrSeries, n: int = 5) -> FrameOrSeries: """ Return the last `n` rows. @@ -5208,7 +5242,9 @@ def pipe(self, func, *args, **kwargs): # ---------------------------------------------------------------------- # Attribute access - def __finalize__(self, other, method=None, **kwargs): + def __finalize__( + self: FrameOrSeries, other, method=None, **kwargs + ) -> FrameOrSeries: """ Propagate metadata from other to self. @@ -5228,7 +5264,7 @@ def __finalize__(self, other, method=None, **kwargs): object.__setattr__(self, name, getattr(other, name, None)) return self - def __getattr__(self, name): + def __getattr__(self, name: str): """After regular attribute access, try looking up the name This allows simpler access to columns for interactive use. """ @@ -5247,7 +5283,7 @@ def __getattr__(self, name): return self[name] return object.__getattribute__(self, name) - def __setattr__(self, name, value): + def __setattr__(self, name: str, value) -> None: """After regular attribute access, try setting the name This allows simpler access to columns for interactive use. """ @@ -5312,7 +5348,7 @@ def _protect_consolidate(self, f): self._clear_item_cache() return result - def _consolidate_inplace(self): + def _consolidate_inplace(self) -> None: """Consolidate data in place and return None""" def f(): @@ -5320,7 +5356,7 @@ def f(): self._protect_consolidate(f) - def _consolidate(self, inplace=False): + def _consolidate(self, inplace: bool_t = False): """ Compute NDFrame with "consolidated" internals (data of each dtype grouped together in a single ndarray). @@ -5357,7 +5393,7 @@ def _is_datelike_mixed_type(self): f = lambda: self._data.is_datelike_mixed_type return self._protect_consolidate(f) - def _check_inplace_setting(self, value): + def _check_inplace_setting(self, value) -> bool_t: """ check whether we allow in-place setting with this type of value """ if self._is_mixed_type: @@ -5559,7 +5595,7 @@ def dtypes(self): return Series(self._data.get_dtypes(), index=self._info_axis, dtype=np.object_) - def _to_dict_of_blocks(self, copy=True): + def _to_dict_of_blocks(self, copy: bool_t = True): """ Return a dict of dtype -> Constructor Types that each is a homogeneous dtype. @@ -5712,7 +5748,7 @@ def astype(self, dtype, copy: bool_t = True, errors: str = "raise"): result.columns = self.columns return result - def copy(self, deep=True): + def copy(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: """ Make a copy of this object's indices and data. @@ -5820,10 +5856,10 @@ def copy(self, deep=True): data = self._data.copy(deep=deep) return self._constructor(data).__finalize__(self) - def __copy__(self, deep=True): + def __copy__(self: FrameOrSeries, deep: bool_t = True) -> FrameOrSeries: return self.copy(deep=deep) - def __deepcopy__(self, memo=None): + def __deepcopy__(self: FrameOrSeries, memo=None) -> FrameOrSeries: """ Parameters ---------- @@ -5835,8 +5871,13 @@ def __deepcopy__(self, memo=None): return self.copy(deep=True) def _convert( - self, datetime=False, numeric=False, timedelta=False, coerce=False, copy=True - ): + self: FrameOrSeries, + datetime: bool_t = False, + numeric: bool_t = False, + timedelta: bool_t = False, + coerce: bool_t = False, + copy: bool_t = True, + ) -> FrameOrSeries: """ Attempt to infer better dtype for object columns @@ -5928,14 +5969,14 @@ def infer_objects(self): # Filling NA's def fillna( - self, + self: FrameOrSeries, value=None, method=None, axis=None, - inplace=False, + inplace: bool_t = False, limit=None, downcast=None, - ): + ) -> Optional[FrameOrSeries]: """ Fill NA/NaN values using the specified method. @@ -5971,8 +6012,8 @@ def fillna( Returns ------- - %(klass)s - Object with missing values filled. + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. See Also -------- @@ -6112,30 +6153,43 @@ def fillna( if inplace: self._update_inplace(new_data) + return None else: return self._constructor(new_data).__finalize__(self) - def ffill(self, axis=None, inplace=False, limit=None, downcast=None): + def ffill( + self: FrameOrSeries, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> Optional[FrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='ffill'``. Returns ------- - %(klass)s - Object with missing values filled. + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. """ return self.fillna( method="ffill", axis=axis, inplace=inplace, limit=limit, downcast=downcast ) - def bfill(self, axis=None, inplace=False, limit=None, downcast=None): + def bfill( + self: FrameOrSeries, + axis=None, + inplace: bool_t = False, + limit=None, + downcast=None, + ) -> Optional[FrameOrSeries]: """ Synonym for :meth:`DataFrame.fillna` with ``method='bfill'``. Returns ------- - %(klass)s - Object with missing values filled. + %(klass)s or None + Object with missing values filled or None if ``inplace=True``. """ return self.fillna( method="bfill", axis=axis, inplace=inplace, limit=limit, downcast=downcast @@ -7197,7 +7251,7 @@ def notna(self): def notnull(self): return notna(self).__finalize__(self) - def _clip_with_scalar(self, lower, upper, inplace=False): + def _clip_with_scalar(self, lower, upper, inplace: bool_t = False): if (lower is not None and np.any(isna(lower))) or ( upper is not None and np.any(isna(upper)) ): @@ -7245,7 +7299,15 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): threshold = _align_method_FRAME(self, threshold, axis) return self.where(subset, threshold, axis=axis, inplace=inplace) - def clip(self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs): + def clip( + self, + lower=None, + upper=None, + axis=None, + inplace: bool_t = False, + *args, + **kwargs, + ): """ Trim values at input threshold(s). @@ -7364,11 +7426,11 @@ def groupby( by=None, axis=0, level=None, - as_index=True, - sort=True, - group_keys=True, - squeeze=False, - observed=False, + as_index: bool_t = True, + sort: bool_t = True, + group_keys: bool_t = True, + squeeze: bool_t = False, + observed: bool_t = False, ): """ Group DataFrame or Series using a mapper or by a Series of columns. @@ -7493,7 +7555,14 @@ def groupby( observed=observed, ) - def asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None): + def asfreq( + self, + freq, + method=None, + how: Optional[str] = None, + normalize: bool_t = False, + fill_value=None, + ): """ Convert TimeSeries to specified frequency. @@ -7596,7 +7665,7 @@ def asfreq(self, freq, method=None, how=None, normalize=False, fill_value=None): fill_value=fill_value, ) - def at_time(self, time, asof=False, axis=None): + def at_time(self, time, asof: bool_t = False, axis=None): """ Select values at particular time of day (e.g. 9:30AM). @@ -7653,7 +7722,12 @@ def at_time(self, time, asof=False, axis=None): return self.take(indexer, axis=axis) def between_time( - self, start_time, end_time, include_start=True, include_end=True, axis=None + self, + start_time, + end_time, + include_start: bool_t = True, + include_end: bool_t = True, + axis=None, ): """ Select values between particular times of the day (e.g., 9:00-9:30 AM). @@ -7732,16 +7806,16 @@ def between_time( def resample( self, rule, - how=None, + how: Optional[str] = None, axis=0, - fill_method=None, - closed=None, - label=None, - convention="start", - kind=None, + fill_method: Optional[str] = None, + closed: Optional[str] = None, + label: Optional[str] = None, + convention: str = "start", + kind: Optional[str] = None, loffset=None, - limit=None, - base=0, + limit: Optional[int] = None, + base: int = 0, on=None, level=None, ): @@ -8165,14 +8239,14 @@ def last(self, offset): return self.iloc[start:] def rank( - self, + self: FrameOrSeries, axis=0, - method="average", - numeric_only=None, - na_option="keep", - ascending=True, - pct=False, - ): + method: str = "average", + numeric_only: Optional[bool_t] = None, + na_option: str = "keep", + ascending: bool_t = True, + pct: bool_t = False, + ) -> FrameOrSeries: """ Compute numerical data ranks (1 through n) along axis. @@ -8424,7 +8498,7 @@ def _align_frame( join="outer", axis=None, level=None, - copy=True, + copy: bool_t = True, fill_value=None, method=None, limit=None, @@ -8484,7 +8558,7 @@ def _align_series( join="outer", axis=None, level=None, - copy=True, + copy: bool_t = True, fill_value=None, method=None, limit=None, @@ -8974,7 +9048,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): return self._constructor(new_data).__finalize__(self) - def slice_shift(self, periods=1, axis=0): + def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries: """ Equivalent to `shift` without copying data. @@ -9011,7 +9085,7 @@ def slice_shift(self, periods=1, axis=0): return new_obj.__finalize__(self) - def tshift(self, periods=1, freq=None, axis=0): + def tshift(self, periods: int = 1, freq=None, axis=0): """ Shift the time index, using the index's frequency if available. @@ -9071,7 +9145,9 @@ def tshift(self, periods=1, freq=None, axis=0): return self._constructor(new_data).__finalize__(self) - def truncate(self, before=None, after=None, axis=None, copy=True): + def truncate( + self: FrameOrSeries, before=None, after=None, axis=None, copy: bool_t = True + ) -> FrameOrSeries: """ Truncate a Series or DataFrame before and after some index value. @@ -9224,7 +9300,9 @@ def truncate(self, before=None, after=None, axis=None, copy=True): return result - def tz_convert(self, tz, axis=0, level=None, copy=True): + def tz_convert( + self: FrameOrSeries, tz, axis=0, level=None, copy: bool_t = True + ) -> FrameOrSeries: """ Convert tz-aware axis to target time zone. @@ -9280,8 +9358,14 @@ def _tz_convert(ax, tz): return result.__finalize__(self) def tz_localize( - self, tz, axis=0, level=None, copy=True, ambiguous="raise", nonexistent="raise" - ): + self: FrameOrSeries, + tz, + axis=0, + level=None, + copy: bool_t = True, + ambiguous="raise", + nonexistent: str = "raise", + ) -> FrameOrSeries: """ Localize tz-naive index of a Series or DataFrame to target time zone. @@ -11099,7 +11183,7 @@ def _doc_parms(cls): def _make_min_count_stat_function( - cls, name, name1, name2, axis_descr, desc, f, see_also="", examples="" + cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" ): @Substitution( desc=desc, @@ -11147,7 +11231,7 @@ def stat_func( def _make_stat_function( - cls, name, name1, name2, axis_descr, desc, f, see_also="", examples="" + cls, name, name1, name2, axis_descr, desc, f, see_also: str = "", examples: str = "" ): @Substitution( desc=desc, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b99e60f8c6278..87c110f95c13a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,6 +69,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing @@ -4489,22 +4490,26 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex # Things like `Series._get_value` (via .at) pass the EA directly here. - s = getattr(series, "_values", series) - if isinstance(s, (ExtensionArray, Index)) and is_scalar(key): - # GH 20882, 21257 - # Unify Index and ExtensionArray treatment - # First try to convert the key to a location - # If that fails, raise a KeyError if an integer - # index, otherwise, see if key is an integer, and - # try that - try: - iloc = self.get_loc(key) - return s[iloc] - except KeyError: - if len(self) > 0 and (self.holds_integer() or self.is_boolean()): - raise - elif is_integer(key): - return s[key] + s = extract_array(series, extract_numpy=True) + if isinstance(s, ExtensionArray): + if is_scalar(key): + # GH 20882, 21257 + # First try to convert the key to a location + # If that fails, raise a KeyError if an integer + # index, otherwise, see if key is an integer, and + # try that + try: + iloc = self.get_loc(key) + return s[iloc] + except KeyError: + if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + raise + elif is_integer(key): + return s[key] + else: + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) s = com.values_from_object(series) k = com.values_from_object(key) diff --git a/pandas/core/series.py b/pandas/core/series.py index 11e87a4eed27f..410b10a69ecd5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -158,7 +158,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _deprecations = ( base.IndexOpsMixin._deprecations | generic.NDFrame._deprecations - | frozenset(["compress", "valid", "real", "imag", "put", "ptp", "nonzero"]) + | frozenset(["compress", "ptp"]) ) # Override cache_readonly bc Series is mutable @@ -528,23 +528,6 @@ def compress(self, condition, *args, **kwargs): nv.validate_compress(args, kwargs) return self[condition] - def put(self, *args, **kwargs): - """ - Apply the `put` method to its `values` attribute if it has one. - - .. deprecated:: 0.25.0 - - See Also - -------- - numpy.ndarray.put - """ - warnings.warn( - "`put` has been deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=2, - ) - self._values.put(*args, **kwargs) - def __len__(self) -> int: """ Return the length of the Series. @@ -777,46 +760,6 @@ def __array__(self, dtype=None): # ---------------------------------------------------------------------- # Unary Methods - @property - def real(self): - """ - Return the real value of vector. - - .. deprecated:: 0.25.0 - """ - warnings.warn( - "`real` is deprecated and will be removed in a future version. " - "To eliminate this warning for a Series `ser`, use " - "`np.real(ser.to_numpy())` or `ser.to_numpy().real`.", - FutureWarning, - stacklevel=2, - ) - return self.values.real - - @real.setter - def real(self, v): - self.values.real = v - - @property - def imag(self): - """ - Return imag value of vector. - - .. deprecated:: 0.25.0 - """ - warnings.warn( - "`imag` is deprecated and will be removed in a future version. " - "To eliminate this warning for a Series `ser`, use " - "`np.imag(ser.to_numpy())` or `ser.to_numpy().imag`.", - FutureWarning, - stacklevel=2, - ) - return self.values.imag - - @imag.setter - def imag(self, v): - self.values.imag = v - # coercion __float__ = _coerce_method(float) __long__ = _coerce_method(int) @@ -2944,7 +2887,7 @@ def _try_kind_sort(arr): sortedIdx[n:] = idx[good][argsorted] sortedIdx[:n] = idx[bad] else: - raise ValueError("invalid na_position: {!r}".format(na_position)) + raise ValueError(f"invalid na_position: {repr(na_position)}") result = self._constructor(arr[sortedIdx], index=self.index[sortedIdx]) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 82eb93dd4c879..ed9ef23132683 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -208,7 +208,7 @@ def lexsort_indexer(keys, orders=None, na_position="last"): cat = Categorical(key, ordered=True) if na_position not in ["last", "first"]: - raise ValueError("invalid na_position: {!r}".format(na_position)) + raise ValueError(f"invalid na_position: {repr(na_position)}") n = len(cat.categories) codes = cat.codes.copy() @@ -264,7 +264,7 @@ def nargsort(items, kind="quicksort", ascending: bool = True, na_position="last" elif na_position == "first": indexer = np.concatenate([nan_idx, indexer]) else: - raise ValueError("invalid na_position: {!r}".format(na_position)) + raise ValueError(f"invalid na_position: {repr(na_position)}") return indexer diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 137c37f938dfa..6cc102dce3b9c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -9,7 +9,7 @@ import pandas._libs.lib as lib import pandas._libs.ops as libops -from pandas.util._decorators import Appender, deprecate_kwarg +from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( ensure_object, @@ -1933,10 +1933,8 @@ def _forbid_nonstring_types(func): def wrapper(self, *args, **kwargs): if self._inferred_dtype not in allowed_types: msg = ( - "Cannot use .str.{name} with values of inferred dtype " - "{inf_type!r}.".format( - name=func_name, inf_type=self._inferred_dtype - ) + f"Cannot use .str.{func_name} with values of inferred dtype " + f"{repr(self._inferred_dtype)}." ) raise TypeError(msg) return func(self, *args, **kwargs) @@ -2630,9 +2628,6 @@ def rsplit(self, pat=None, n=-1, expand=False): ---------- sep : str, default whitespace String to split on. - pat : str, default whitespace - .. deprecated:: 0.24.0 - Use ``sep`` instead. expand : bool, default True If True, return DataFrame/MultiIndex expanding dimensionality. If False, return Series/Index. @@ -2710,7 +2705,6 @@ def rsplit(self, pat=None, n=-1, expand=False): "also": "rpartition : Split the string at the last occurrence of `sep`.", } ) - @deprecate_kwarg(old_arg_name="pat", new_arg_name="sep") @forbid_nonstring_types(["bytes"]) def partition(self, sep=" ", expand=True): f = lambda x: x.partition(sep) @@ -2726,7 +2720,6 @@ def partition(self, sep=" ", expand=True): "also": "partition : Split the string at the first occurrence of `sep`.", } ) - @deprecate_kwarg(old_arg_name="pat", new_arg_name="sep") @forbid_nonstring_types(["bytes"]) def rpartition(self, sep=" ", expand=True): f = lambda x: x.rpartition(sep) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 34838af5fd6e4..43ce8ad4abb45 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -167,9 +167,7 @@ class AbstractMethodError(NotImplementedError): def __init__(self, class_instance, methodtype="method"): types = {"method", "classmethod", "staticmethod", "property"} if methodtype not in types: - msg = "methodtype must be one of {}, got {} instead.".format( - methodtype, types - ) + msg = f"methodtype must be one of {methodtype}, got {types} instead." raise ValueError(msg) self.methodtype = methodtype self.class_instance = class_instance @@ -179,5 +177,5 @@ def __str__(self) -> str: name = self.class_instance.__name__ else: name = type(self.class_instance).__name__ - msg = "This {methodtype} must be defined in the concrete class {name}" + msg = f"This {self.methodtype} must be defined in the concrete class {name}" return msg.format(methodtype=self.methodtype, name=name) diff --git a/pandas/io/common.py b/pandas/io/common.py index c0eddb679c6f8..54253803c4881 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -47,9 +47,6 @@ lzma = _import_lzma() -# gh-12665: Alias for now and remove later. -CParserError = ParserError - # common NA values # no longer excluding inf representations # '1.#INF','-1.#INF', '1.#INF000000', diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a48d9abc3c13b..fca1d3265cac2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -29,6 +29,7 @@ from pandas._libs.tslibs import timezones from pandas.compat._optional import import_optional_dependency from pandas.errors import PerformanceWarning +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( ensure_object, @@ -39,6 +40,7 @@ is_list_like, is_timedelta64_dtype, ) +from pandas.core.dtypes.generic import ABCExtensionArray from pandas.core.dtypes.missing import array_equivalent from pandas import ( @@ -53,7 +55,7 @@ concat, isna, ) -from pandas._typing import FrameOrSeries +from pandas._typing import ArrayLike, FrameOrSeries from pandas.core.arrays.categorical import Categorical import pandas.core.common as com from pandas.core.computation.pytables import PyTablesExpr, maybe_expression @@ -258,19 +260,41 @@ def to_hdf( complib: Optional[str] = None, append: bool = False, format: Optional[str] = None, + index: bool = True, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + dropna: Optional[bool] = None, + data_columns: Optional[List[str]] = None, errors: str = "strict", encoding: str = "UTF-8", - **kwargs, ): """ store this object, close it if we opened it """ if append: f = lambda store: store.append( - key, value, format=format, errors=errors, encoding=encoding, **kwargs + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + dropna=dropna, + data_columns=data_columns, + errors=errors, + encoding=encoding, ) else: + # NB: dropna is not passed to `put` f = lambda store: store.put( - key, value, format=format, errors=errors, encoding=encoding, **kwargs + key, + value, + format=format, + index=index, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + errors=errors, + encoding=encoding, ) path_or_buf = _stringify_path(path_or_buf) @@ -982,7 +1006,21 @@ def func(_start, _stop, _where): return it.get_result(coordinates=True) - def put(self, key: str, value: FrameOrSeries, format=None, append=False, **kwargs): + def put( + self, + key: str, + value: FrameOrSeries, + format=None, + index=True, + append=False, + complib=None, + complevel: Optional[int] = None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + data_columns: Optional[List[str]] = None, + encoding=None, + errors: str = "strict", + ): """ Store object in HDFStore. @@ -1012,7 +1050,20 @@ def put(self, key: str, value: FrameOrSeries, format=None, append=False, **kwarg if format is None: format = get_option("io.hdf.default_format") or "fixed" format = self._validate_format(format) - self._write_to_group(key, value, format=format, append=append, **kwargs) + self._write_to_group( + key, + value, + format=format, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + data_columns=data_columns, + encoding=encoding, + errors=errors, + ) def remove(self, key: str, where=None, start=None, stop=None): """ @@ -1073,10 +1124,20 @@ def append( key: str, value: FrameOrSeries, format=None, + axes=None, + index=True, append=True, + complib=None, + complevel: Optional[int] = None, columns=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, + nan_rep=None, + chunksize=None, + expectedrows=None, dropna: Optional[bool] = None, - **kwargs, + data_columns: Optional[List[str]] = None, + encoding=None, + errors: str = "strict", ): """ Append to Table in file. Node must already exist and be Table @@ -1123,7 +1184,22 @@ def append( format = get_option("io.hdf.default_format") or "table" format = self._validate_format(format) self._write_to_group( - key, value, format=format, append=append, dropna=dropna, **kwargs + key, + value, + format=format, + axes=axes, + index=index, + append=append, + complib=complib, + complevel=complevel, + min_itemsize=min_itemsize, + nan_rep=nan_rep, + chunksize=chunksize, + expectedrows=expectedrows, + dropna=dropna, + data_columns=data_columns, + encoding=encoding, + errors=errors, ) def append_to_multiple( @@ -1584,7 +1660,7 @@ def _write_to_group( complib=None, complevel: Optional[int] = None, fletcher32=None, - min_itemsize=None, + min_itemsize: Optional[Union[int, Dict[str, int]]] = None, chunksize=None, expectedrows=None, dropna=False, @@ -2209,12 +2285,6 @@ def take_data(self): self.data, data = None, self.data return data - def set_metadata(self, metadata): - """ record the metadata """ - if metadata is not None: - metadata = np.array(metadata, copy=False).ravel() - self.metadata = metadata - def set_kind(self): # set my kind if we can @@ -2246,7 +2316,6 @@ def set_kind(self): def set_atom( self, block, - block_items, existing_col, min_itemsize, nan_rep, @@ -2256,13 +2325,15 @@ def set_atom( ): """ create and setup my atom from the block b """ - self.values = list(block_items) - # short-cut certain block types if block.is_categorical: - return self.set_atom_categorical(block, items=block_items, info=info) + self.set_atom_categorical(block) + self.update_info(info) + return elif block.is_datetimetz: - return self.set_atom_datetime64tz(block, info=info) + self.set_atom_datetime64tz(block) + self.update_info(info) + return elif block.is_datetime: return self.set_atom_datetime64(block) elif block.is_timedelta: @@ -2290,13 +2361,7 @@ def set_atom( # end up here ### elif inferred_type == "string" or dtype == "object": self.set_atom_string( - block, - block_items, - existing_col, - min_itemsize, - nan_rep, - encoding, - errors, + block, existing_col, min_itemsize, nan_rep, encoding, errors, ) # set as a data block @@ -2307,7 +2372,7 @@ def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize, shape=block.shape[0]) def set_atom_string( - self, block, block_items, existing_col, min_itemsize, nan_rep, encoding, errors + self, block, existing_col, min_itemsize, nan_rep, encoding, errors ): # fill nan items with myself, don't disturb the blocks by # trying to downcast @@ -2322,13 +2387,14 @@ def set_atom_string( # we cannot serialize this data, so report an exception on a column # by column basis - for i, item in enumerate(block_items): + for i in range(len(block.shape[0])): col = block.iget(i) inferred_type = lib.infer_dtype(col.ravel(), skipna=False) if inferred_type != "string": + iloc = block.mgr_locs.indexer[i] raise TypeError( - f"Cannot serialize the column [{item}] because\n" + f"Cannot serialize the column [{iloc}] because\n" f"its data contents are [{inferred_type}] object dtype" ) @@ -2381,7 +2447,7 @@ def set_atom_data(self, block): self.typ = self.get_atom_data(block) self.set_data(block.values.astype(self.typ.type, copy=False)) - def set_atom_categorical(self, block, items, info=None): + def set_atom_categorical(self, block): # currently only supports a 1-D categorical # in a 1-D block @@ -2391,8 +2457,6 @@ def set_atom_categorical(self, block, items, info=None): self.dtype = codes.dtype.name if values.ndim > 1: raise NotImplementedError("only support 1-d categoricals") - if len(items) > 1: - raise NotImplementedError("only support single block categoricals") # write the codes; must be in a block shape self.ordered = values.ordered @@ -2401,10 +2465,7 @@ def set_atom_categorical(self, block, items, info=None): # write the categories self.meta = "category" - self.set_metadata(block.values.categories) - - # update the info - self.update_info(info) + self.metadata = np.array(block.values.categories, copy=False).ravel() def get_atom_datetime64(self, block): return _tables().Int64Col(shape=block.shape[0]) @@ -2415,7 +2476,7 @@ def set_atom_datetime64(self, block): values = block.values.view("i8") self.set_data(values, "datetime64") - def set_atom_datetime64tz(self, block, info): + def set_atom_datetime64tz(self, block): values = block.values @@ -2424,7 +2485,6 @@ def set_atom_datetime64tz(self, block, info): # store a converted timezone self.tz = _get_tz(block.values.tz) - self.update_info(info) self.kind = "datetime64" self.typ = self.get_atom_datetime64(block) @@ -2959,7 +3019,7 @@ def read_index_node( data = node[start:stop] # If the index was an empty array write_array_empty() will # have written a sentinel. Here we relace it with the original. - if "shape" in node._v_attrs and self._is_empty_array(node._v_attrs.shape): + if "shape" in node._v_attrs and np.prod(node._v_attrs.shape) == 0: data = np.empty(node._v_attrs.shape, dtype=node._v_attrs.value_type,) kind = _ensure_decoded(node._v_attrs.kind) name = None @@ -3005,25 +3065,27 @@ def read_index_node( return index - def write_array_empty(self, key: str, value): + def write_array_empty(self, key: str, value: ArrayLike): """ write a 0-len array """ # ugly hack for length 0 axes arr = np.empty((1,) * value.ndim) self._handle.create_array(self.group, key, arr) - getattr(self.group, key)._v_attrs.value_type = str(value.dtype) - getattr(self.group, key)._v_attrs.shape = value.shape + node = getattr(self.group, key) + node._v_attrs.value_type = str(value.dtype) + node._v_attrs.shape = value.shape - def _is_empty_array(self, shape) -> bool: - """Returns true if any axis is zero length.""" - return any(x == 0 for x in shape) + def write_array(self, key: str, value: ArrayLike, items: Optional[Index] = None): + # TODO: we only have one test that gets here, the only EA + # that gets passed is DatetimeArray, and we never have + # both self._filters and EA + assert isinstance(value, (np.ndarray, ABCExtensionArray)), type(value) - def write_array(self, key: str, value, items=None): if key in self.group: self._handle.remove_node(self.group, key) # Transform needed to interface with pytables row/col notation - empty_array = self._is_empty_array(value.shape) + empty_array = value.size == 0 transposed = False if is_categorical_dtype(value): @@ -3038,29 +3100,29 @@ def write_array(self, key: str, value, items=None): value = value.T transposed = True + atom = None if self._filters is not None: - atom = None try: # get the atom for this datatype atom = _tables().Atom.from_dtype(value.dtype) except ValueError: pass - if atom is not None: - # create an empty chunked array and fill it from value - if not empty_array: - ca = self._handle.create_carray( - self.group, key, atom, value.shape, filters=self._filters - ) - ca[:] = value - getattr(self.group, key)._v_attrs.transposed = transposed + if atom is not None: + # We only get here if self._filters is non-None and + # the Atom.from_dtype call succeeded - else: - self.write_array_empty(key, value) + # create an empty chunked array and fill it from value + if not empty_array: + ca = self._handle.create_carray( + self.group, key, atom, value.shape, filters=self._filters + ) + ca[:] = value - return + else: + self.write_array_empty(key, value) - if value.dtype.type == np.object_: + elif value.dtype.type == np.object_: # infer the type, warn if we have a non-string type here (for # performance) @@ -3070,35 +3132,30 @@ def write_array(self, key: str, value, items=None): elif inferred_type == "string": pass else: - try: - items = list(items) - except TypeError: - pass ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=7) vlarr = self._handle.create_vlarray(self.group, key, _tables().ObjectAtom()) vlarr.append(value) + + elif empty_array: + self.write_array_empty(key, value) + elif is_datetime64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "datetime64" + elif is_datetime64tz_dtype(value.dtype): + # store as UTC + # with a zone + self._handle.create_array(self.group, key, value.asi8) + + node = getattr(self.group, key) + node._v_attrs.tz = _get_tz(value.tz) + node._v_attrs.value_type = "datetime64" + elif is_timedelta64_dtype(value.dtype): + self._handle.create_array(self.group, key, value.view("i8")) + getattr(self.group, key)._v_attrs.value_type = "timedelta64" else: - if empty_array: - self.write_array_empty(key, value) - else: - if is_datetime64_dtype(value.dtype): - self._handle.create_array(self.group, key, value.view("i8")) - getattr(self.group, key)._v_attrs.value_type = "datetime64" - elif is_datetime64tz_dtype(value.dtype): - # store as UTC - # with a zone - self._handle.create_array(self.group, key, value.asi8) - - node = getattr(self.group, key) - node._v_attrs.tz = _get_tz(value.tz) - node._v_attrs.value_type = "datetime64" - elif is_timedelta64_dtype(value.dtype): - self._handle.create_array(self.group, key, value.view("i8")) - getattr(self.group, key)._v_attrs.value_type = "timedelta64" - else: - self._handle.create_array(self.group, key, value) + self._handle.create_array(self.group, key, value) getattr(self.group, key)._v_attrs.transposed = transposed @@ -3522,43 +3579,39 @@ def validate_min_itemsize(self, min_itemsize): "data_column" ) - @property + @cache_readonly def indexables(self): """ create/cache the indexables if they don't exist """ - if self._indexables is None: + _indexables = [] + + # Note: each of the `name` kwargs below are str, ensured + # by the definition in index_cols. + # index columns + _indexables.extend( + [ + IndexCol(name=name, axis=axis, pos=i) + for i, (axis, name) in enumerate(self.attrs.index_cols) + ] + ) - self._indexables = [] + # values columns + dc = set(self.data_columns) + base_pos = len(_indexables) - # Note: each of the `name` kwargs below are str, ensured - # by the definition in index_cols. - # index columns - self._indexables.extend( - [ - IndexCol(name=name, axis=axis, pos=i) - for i, (axis, name) in enumerate(self.attrs.index_cols) - ] + def f(i, c): + assert isinstance(c, str) + klass = DataCol + if c in dc: + klass = DataIndexableCol + return klass.create_for_block( + i=i, name=c, pos=base_pos + i, version=self.version ) - # values columns - dc = set(self.data_columns) - base_pos = len(self._indexables) - - def f(i, c): - assert isinstance(c, str) - klass = DataCol - if c in dc: - klass = DataIndexableCol - return klass.create_for_block( - i=i, name=c, pos=base_pos + i, version=self.version - ) - - # Note: the definition of `values_cols` ensures that each - # `c` below is a str. - self._indexables.extend( - [f(i, c) for i, c in enumerate(self.attrs.values_cols)] - ) + # Note: the definition of `values_cols` ensures that each + # `c` below is a str. + _indexables.extend([f(i, c) for i, c in enumerate(self.attrs.values_cols)]) - return self._indexables + return _indexables def create_index(self, columns=None, optlevel=None, kind: Optional[str] = None): """ @@ -3922,9 +3975,9 @@ def get_blk_items(mgr, blocks): existing_col = None col = klass.create_for_block(i=i, name=name, version=self.version) + col.values = list(b_items) col.set_atom( block=b, - block_items=b_items, existing_col=existing_col, min_itemsize=min_itemsize, nan_rep=nan_rep, @@ -4140,7 +4193,6 @@ def write(self, **kwargs): class AppendableTable(Table): """ support the new appendable table formats """ - _indexables = None table_type = "appendable" def write( @@ -4232,21 +4284,7 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): # broadcast the indexes if needed indexes = [a.cvalues for a in self.index_axes] nindexes = len(indexes) - bindexes = [] - for i, idx in enumerate(indexes): - - # broadcast to all other indexes except myself - if i > 0 and i < nindexes: - repeater = np.prod([indexes[bi].shape[0] for bi in range(0, i)]) - idx = np.tile(idx, repeater) - - if i < nindexes - 1: - repeater = np.prod( - [indexes[bi].shape[0] for bi in range(i + 1, nindexes)] - ) - idx = np.repeat(idx, repeater) - - bindexes.append(idx) + assert nindexes == 1, nindexes # ensures we dont need to broadcast # transpose the values so first dimension is last # reshape the values if needed @@ -4271,7 +4309,7 @@ def write_data(self, chunksize: Optional[int], dropna: bool = False): self.write_data_chunk( rows, - indexes=[a[start_i:end_i] for a in bindexes], + indexes=[a[start_i:end_i] for a in indexes], mask=mask[start_i:end_i] if mask is not None else None, values=[v[start_i:end_i] for v in bvalues], ) @@ -4554,23 +4592,21 @@ def get_attrs(self): ] self.data_columns = [a.name for a in self.values_axes] - @property + @cache_readonly def indexables(self): """ create the indexables from the table description """ - if self._indexables is None: - - d = self.description + d = self.description - # the index columns is just a simple index - self._indexables = [GenericIndexCol(name="index", axis=0)] + # the index columns is just a simple index + _indexables = [GenericIndexCol(name="index", axis=0)] - for i, n in enumerate(d._v_names): - assert isinstance(n, str) + for i, n in enumerate(d._v_names): + assert isinstance(n, str) - dc = GenericDataIndexableCol(name=n, pos=i, values=[n]) - self._indexables.append(dc) + dc = GenericDataIndexableCol(name=n, pos=i, values=[n]) + _indexables.append(dc) - return self._indexables + return _indexables def write(self, **kwargs): raise NotImplementedError("cannot write on an generic table") diff --git a/pandas/tests/arrays/interval/test_ops.py b/pandas/tests/arrays/interval/test_ops.py index 43601ea301568..a55c33c2f22e9 100644 --- a/pandas/tests/arrays/interval/test_ops.py +++ b/pandas/tests/arrays/interval/test_ops.py @@ -83,8 +83,6 @@ def test_overlaps_na(self, constructor, start_shift): ) def test_overlaps_invalid_type(self, constructor, other): interval_container = constructor.from_breaks(range(5)) - msg = "`other` must be Interval-like, got {other}".format( - other=type(other).__name__ - ) + msg = f"`other` must be Interval-like, got {type(other).__name__}" with pytest.raises(TypeError, match=msg): interval_container.overlaps(other) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 755cbfb716fcd..c9f96ed516dc5 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -1007,7 +1007,7 @@ def test_cumsum(self, data, expected, numpy): np.cumsum(SparseArray(data), out=out) else: axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid. - msg = "axis\\(={axis}\\) out of bounds".format(axis=axis) + msg = re.escape(f"axis(={axis}) out of bounds") with pytest.raises(ValueError, match=msg): SparseArray(data).cumsum(axis=axis) diff --git a/pandas/tests/arrays/sparse/test_libsparse.py b/pandas/tests/arrays/sparse/test_libsparse.py index a6836c58348b3..7a85ccf271e76 100644 --- a/pandas/tests/arrays/sparse/test_libsparse.py +++ b/pandas/tests/arrays/sparse/test_libsparse.py @@ -596,6 +596,6 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv", "floordiv"]) def test_op(self, opname): - sparse_op = getattr(splib, "sparse_{opname}_float64".format(opname=opname)) + sparse_op = getattr(splib, f"sparse_{opname}_float64") python_op = getattr(operator, opname) self._op_tests(sparse_op, python_op) diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 6a28f76e474cc..a4554aca1325e 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -33,7 +33,7 @@ def construct_from_string(cls, string): if string == cls.name: return cls() else: - raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") @classmethod def construct_array_type(cls): @@ -56,7 +56,7 @@ def construct_from_string(cls, string): if string == cls.name: return cls() else: - raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") @classmethod def construct_array_type(cls): @@ -79,7 +79,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return cls.from_scalars(scalars) def __repr__(self): - return "{cls}({data})".format(cls=type(self).__name__, data=repr(self._data)) + return f"{type(self).__name__}({repr(self._data)})" def __getitem__(self, item): if pd.api.types.is_scalar(item): diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py index 5d17a4b0cbee2..ad34a83c7cf71 100644 --- a/pandas/tests/extension/base/printing.py +++ b/pandas/tests/extension/base/printing.py @@ -19,7 +19,7 @@ def test_array_repr(self, data, size): result = repr(data) assert type(data).__name__ in result - assert "Length: {}".format(len(data)) in result + assert f"Length: {len(data)}" in result assert str(data.dtype) in result if size == "big": assert "..." in result diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index f9ba4b7a8ba16..0c2f1e845909a 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -23,7 +23,7 @@ def __init__(self, context=None): self.context = context or decimal.getcontext() def __repr__(self) -> str: - return "DecimalDtype(context={})".format(self.context) + return f"DecimalDtype(context={self.context})" @classmethod def construct_array_type(cls): @@ -40,7 +40,7 @@ def construct_from_string(cls, string): if string == cls.name: return cls() else: - raise TypeError("Cannot construct a '{}' from '{}'".format(cls, string)) + raise TypeError(f"Cannot construct a '{cls}' from '{string}'") @property def _is_numeric(self): @@ -178,9 +178,7 @@ def _reduce(self, name, skipna=True, **kwargs): try: op = getattr(self.data, name) except AttributeError: - raise NotImplementedError( - "decimal does not support the {} operation".format(name) - ) + raise NotImplementedError(f"decimal does not support the {name} operation") return op(axis=0) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index ce819c13c4498..b5c3abd8ce8f6 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -478,3 +478,24 @@ def DecimalArray__my_sum(self): s = pd.Series(DecimalArray(data)) result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) + + +def test_indexing_no_materialize(monkeypatch): + # See https://github.com/pandas-dev/pandas/issues/29708 + # Ensure that indexing operations do not materialize (convert to a numpy + # array) the ExtensionArray unnecessary + + def DecimalArray__array__(self, dtype=None): + raise Exception("tried to convert a DecimalArray to a numpy array") + + monkeypatch.setattr(DecimalArray, "__array__", DecimalArray__array__, raising=False) + + data = make_data() + s = pd.Series(DecimalArray(data)) + df = pd.DataFrame({"a": s, "b": range(len(s))}) + + # ensure the following operations do not raise an error + s[s > 0.5] + df[s > 0.5] + s.at[0] + df.at[0, "a"] diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py index 90e795bd5c52f..e8c60870e2a85 100644 --- a/pandas/tests/io/excel/test_openpyxl.py +++ b/pandas/tests/io/excel/test_openpyxl.py @@ -1,11 +1,8 @@ -from distutils.version import LooseVersion import os import numpy as np import pytest -from pandas.compat import PY37, is_platform_mac - import pandas as pd from pandas import DataFrame import pandas.util.testing as tm @@ -16,8 +13,6 @@ pytestmark = pytest.mark.parametrize("ext", [".xlsx"]) -openpyxl_gt301 = LooseVersion(openpyxl.__version__) > LooseVersion("3.0.1") - def test_to_excel_styleconverter(ext): from openpyxl import styles @@ -86,9 +81,6 @@ def test_write_cells_merge_styled(ext): assert xcell_a2.font == openpyxl_sty_merged -@pytest.mark.xfail( - openpyxl_gt301 and PY37 and is_platform_mac(), reason="broken change in openpyxl" -) @pytest.mark.parametrize( "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])] ) @@ -115,9 +107,6 @@ def test_write_append_mode(ext, mode, expected): assert wb2.worksheets[index]["A1"].value == cell_value -@pytest.mark.xfail( - openpyxl_gt301 and PY37 and is_platform_mac(), reason="broken change in openpyxl" -) def test_to_excel_with_openpyxl_engine(ext, tmpdir): # GH 29854 # TODO: Fix this once newer version of openpyxl fixes the bug diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3e687d185df84..a98c93c250070 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -504,6 +504,7 @@ def test_empty_dataframe(self, pa): df = pd.DataFrame() check_round_trip(df, pa) + @pytest.mark.skip(reason="broken test") @td.skip_if_no("pyarrow", min_version="0.15.0") def test_additional_extension_arrays(self, pa): # test additional ExtensionArrays that are supported through the diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index a29f910261b58..f9229e8066be4 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -146,9 +146,7 @@ def test_resample_basic_grouper(series): def test_resample_string_kwargs(series, keyword, value): # see gh-19303 # Check that wrong keyword argument strings raise an error - msg = "Unsupported value {value} for `{keyword}`".format( - value=value, keyword=keyword - ) + msg = f"Unsupported value {value} for `{keyword}`" with pytest.raises(ValueError, match=msg): series.resample("5min", **({keyword: value})) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 574182ae99c5c..4c27d48cff6fd 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -89,7 +89,7 @@ def test_fails_on_no_datetime_index(name, func): msg = ( "Only valid with DatetimeIndex, TimedeltaIndex " - "or PeriodIndex, but got an instance of '{}'".format(name) + f"or PeriodIndex, but got an instance of '{name}'" ) with pytest.raises(TypeError, match=msg): df.groupby(Grouper(freq="D")) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 79608f4fb3cde..e709db980b721 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -141,7 +141,7 @@ def test_round_nat(klass, method, freq): ) def test_nat_methods_raise(method): # see gh-9513, gh-17329 - msg = "NaTType does not support {method}".format(method=method) + msg = f"NaTType does not support {method}" with pytest.raises(ValueError, match=msg): getattr(NaT, method)() diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 0dc64651e8d58..065be966efa49 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -411,26 +411,6 @@ def test_astype_empty_constructor_equality(self, dtype): as_type_empty = Series([]).astype(dtype) tm.assert_series_equal(init_empty, as_type_empty) - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_complex(self): - # see gh-4819: complex access for ndarray compat - a = np.arange(5, dtype=np.float64) - b = Series(a + 4j * a) - - tm.assert_numpy_array_equal(a, np.real(b)) - tm.assert_numpy_array_equal(4 * a, np.imag(b)) - - b.real = np.arange(5) + 5 - tm.assert_numpy_array_equal(a + 5, np.real(b)) - tm.assert_numpy_array_equal(4 * a, np.imag(b)) - - def test_real_imag_deprecated(self): - # GH 18262 - s = pd.Series([1]) - with tm.assert_produces_warning(FutureWarning): - s.imag - s.real - def test_arg_for_errors_in_astype(self): # see gh-14878 s = Series([1, 2, 3]) diff --git a/pandas/tests/series/test_internals.py b/pandas/tests/series/test_internals.py index 187c5d90407ce..efcb500a0b79f 100644 --- a/pandas/tests/series/test_internals.py +++ b/pandas/tests/series/test_internals.py @@ -242,10 +242,3 @@ def test_hasnans_unchached_for_series(): ser.iloc[-1] = np.nan assert ser.hasnans is True assert Series.hasnans.__doc__ == pd.Index.hasnans.__doc__ - - -def test_put_deprecated(): - # GH 18262 - s = pd.Series([1]) - with tm.assert_produces_warning(FutureWarning): - s.put(0, 0) diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 531c511e8c02d..fa2142444ed92 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -39,22 +39,6 @@ def test_catch_oob(): pass -def test_error_rename(): - # see gh-12665 - from pandas.errors import ParserError - from pandas.io.common import CParserError - - try: - raise CParserError() - except ParserError: - pass - - try: - raise ParserError() - except CParserError: - pass - - class Foo: @classmethod def classmethod(cls): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0e2f8ee6543e1..cf52e286a47a5 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2966,23 +2966,21 @@ def test_partition_with_name(self): assert res.nlevels == 1 tm.assert_index_equal(res, exp) - def test_partition_deprecation(self): + def test_partition_sep_kwarg(self): # GH 22676; depr kwarg "pat" in favor of "sep" values = Series(["a_b_c", "c_d_e", np.nan, "f_g_h"]) # str.partition # using sep -> no warning expected = values.str.partition(sep="_") - with tm.assert_produces_warning(FutureWarning): - result = values.str.partition(pat="_") - tm.assert_frame_equal(result, expected) + result = values.str.partition("_") + tm.assert_frame_equal(result, expected) # str.rpartition # using sep -> no warning expected = values.str.rpartition(sep="_") - with tm.assert_produces_warning(FutureWarning): - result = values.str.rpartition(pat="_") - tm.assert_frame_equal(result, expected) + result = values.str.rpartition("_") + tm.assert_frame_equal(result, expected) def test_pipe_failures(self): # #2119 diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py index fbf4454109ec0..a097636bbf0b4 100644 --- a/pandas/tests/tseries/offsets/common.py +++ b/pandas/tests/tseries/offsets/common.py @@ -13,18 +13,14 @@ def assert_offset_equal(offset, base, expected): assert actual_apply == expected except AssertionError: raise AssertionError( - "\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" - "\nAt Date: {base}".format( - expected=expected, actual=actual, offset=offset, base=base - ) + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {base}" ) def assert_onOffset(offset, date, expected): actual = offset.onOffset(date) assert actual == expected, ( - "\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" - "\nAt Date: {date}".format( - expected=expected, actual=actual, offset=offset, date=date - ) + f"\nExpected: {expected}\nActual: {actual}\nFor Offset: {offset})" + f"\nAt Date: {date}" ) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 9162e6a415b34..26b13b42b1af6 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -245,7 +245,7 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: ): raise TypeError( "cannot infer freq from a non-convertible dtype " - "on a Series of {dtype}".format(dtype=index.dtype) + f"on a Series of {index.dtype}" ) index = values @@ -263,8 +263,7 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex): if isinstance(index, (pd.Int64Index, pd.Float64Index)): raise TypeError( - "cannot infer freq from a non-convertible index " - "type {type}".format(type=type(index)) + f"cannot infer freq from a non-convertible index type {type(index)}" ) index = index.values @@ -396,7 +395,7 @@ def _infer_daily_rule(self) -> Optional[str]: if annual_rule: nyears = self.ydiffs[0] month = MONTH_ALIASES[self.rep_stamp.month] - alias = "{prefix}-{month}".format(prefix=annual_rule, month=month) + alias = f"{annual_rule}-{month}" return _maybe_add_count(alias, nyears) quarterly_rule = self._get_quarterly_rule() @@ -404,7 +403,7 @@ def _infer_daily_rule(self) -> Optional[str]: nquarters = self.mdiffs[0] / 3 mod_dict = {0: 12, 2: 11, 1: 10} month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]] - alias = "{prefix}-{month}".format(prefix=quarterly_rule, month=month) + alias = f"{quarterly_rule}-{month}" return _maybe_add_count(alias, nquarters) monthly_rule = self._get_monthly_rule() @@ -416,7 +415,7 @@ def _infer_daily_rule(self) -> Optional[str]: if days % 7 == 0: # Weekly day = int_to_weekday[self.rep_stamp.weekday()] - return _maybe_add_count("W-{day}".format(day=day), days / 7) + return _maybe_add_count(f"W-{day}", days / 7) else: return _maybe_add_count("D", days) @@ -490,7 +489,7 @@ def _get_wom_rule(self) -> Optional[str]: week = week_of_months[0] + 1 wd = int_to_weekday[weekdays[0]] - return "WOM-{week}{weekday}".format(week=week, weekday=wd) + return f"WOM-{week}{wd}" class _TimedeltaFrequencyInferer(_FrequencyInferer): @@ -500,7 +499,7 @@ def _infer_daily_rule(self): if days % 7 == 0: # Weekly wd = int_to_weekday[self.rep_stamp.weekday()] - alias = "W-{weekday}".format(weekday=wd) + alias = f"W-{wd}" return _maybe_add_count(alias, days / 7) else: return _maybe_add_count("D", days) @@ -514,6 +513,6 @@ def _maybe_add_count(base: str, count: float) -> str: if count != 1: assert count == int(count) count = int(count) - return "{count}{base}".format(count=count, base=base) + return f"{count}{base}" else: return base