diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index 765c1b8bff62e..0000000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-
-name: Bug Report
-about: Create a bug report to help us improve pandas
-title: "BUG:"
-labels: "Bug, Needs Triage"
-
----
-
-- [ ] I have checked that this issue has not already been reported.
-
-- [ ] I have confirmed this bug exists on the latest version of pandas.
-
-- [ ] (optional) I have confirmed this bug exists on the master branch of pandas.
-
----
-
-**Note**: Please read [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) detailing how to provide the necessary information for us to reproduce your bug.
-
-#### Code Sample, a copy-pastable example
-
-```python
-# Your code here
-
-```
-
-#### Problem description
-
-[this should explain **why** the current behaviour is a problem and why the expected output is a better solution]
-
-#### Expected Output
-
-#### Output of ``pd.show_versions()``
-
-
-
-[paste the output of ``pd.show_versions()`` here leaving a blank line after the details tag]
-
-
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml
new file mode 100644
index 0000000000000..0f50eb47607cd
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.yaml
@@ -0,0 +1,67 @@
+name: Bug Report
+description: Report incorrect behavior in the pandas library
+title: "BUG: "
+labels: [Bug, Needs Triage]
+
+body:
+ - type: checkboxes
+ id: checks
+ attributes:
+ options:
+ - label: >
+ I have checked that this issue has not already been reported.
+ required: true
+ - label: >
+ I have confirmed this bug exists on the
+ [latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
+ required: true
+ - label: >
+ I have confirmed this bug exists on the master branch of pandas.
+ - type: textarea
+ id: example
+ attributes:
+ label: Reproducible Example
+ description: >
+ Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to
+ provide a minimal, copy-pastable example.
+ placeholder: >
+ import pandas as pd
+
+ df = pd.DataFrame(range(5))
+
+ ...
+ render: python
+ validations:
+ required: true
+ - type: textarea
+ id: problem
+ attributes:
+ label: Issue Description
+ description: >
+ Please provide a description of the issue shown in the reproducible example.
+ validations:
+ required: true
+ - type: textarea
+ id: expected-behavior
+ attributes:
+ label: Expected Behavior
+ description: >
+ Please describe or show a code example of the expected behavior.
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Installed Versions
+ description: >
+ Please paste the output of ``pd.show_versions()``
+ value: >
+
+
+
+ Replace this line with the output of pd.show_versions()
+
+
+
+ validations:
+ required: true
diff --git a/.github/ISSUE_TEMPLATE/installation_issue.yaml b/.github/ISSUE_TEMPLATE/installation_issue.yaml
new file mode 100644
index 0000000000000..d5db0d1c83a41
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/installation_issue.yaml
@@ -0,0 +1,65 @@
+name: Installation Issue
+description: Report issues installing the pandas library on your system
+title: "BUILD: "
+labels: [Build, Needs Triage]
+
+body:
+ - type: checkboxes
+ id: checks
+ attributes:
+ options:
+ - label: >
+ I have read the [installation guide](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-pandas).
+ required: true
+ - type: input
+ id: platform
+ attributes:
+ label: Platform
+ description: >
+ Please provide the output of ``import platform; print(platform.platform())``
+ validations:
+ required: true
+ - type: dropdown
+ id: method
+ attributes:
+ label: Installation Method
+ description: >
+ Please provide how you tried to install pandas from a clean environment.
+ options:
+ - pip install
+ - conda install
+ - apt-get install
+ - Built from source
+ - Other
+ validations:
+ required: true
+ - type: input
+ id: pandas
+ attributes:
+ label: pandas Version
+ description: >
+ Please provide the version of pandas you are trying to install.
+ validations:
+ required: true
+ - type: input
+ id: python
+ attributes:
+ label: Python Version
+ description: >
+ Please provide the installed version of Python.
+ validations:
+ required: true
+ - type: textarea
+ id: logs
+ attributes:
+ label: Installation Logs
+ description: >
+ If possible, please copy and paste the installation logs when attempting to install pandas.
+ value: >
+
+
+
+ Replace this line with the installation logs.
+
+
+
diff --git a/.github/ISSUE_TEMPLATE/performance_issue.yaml b/.github/ISSUE_TEMPLATE/performance_issue.yaml
new file mode 100644
index 0000000000000..2dcfc94f4a604
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/performance_issue.yaml
@@ -0,0 +1,52 @@
+name: Performance Issue
+description: Report slow performance or memory issues when running pandas code
+title: "PERF: "
+labels: [Performance, Needs Triage]
+
+body:
+ - type: checkboxes
+ id: checks
+ attributes:
+ options:
+ - label: >
+ I have checked that this issue has not already been reported.
+ required: true
+ - label: >
+ I have confirmed this issue exists on the
+ [latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
+ required: true
+ - label: >
+ I have confirmed this issue exists on the master branch of pandas.
+ - type: textarea
+ id: example
+ attributes:
+ label: Reproducible Example
+ description: >
+ Please provide a minimal, copy-pastable example that quantifies
+ [slow runtime](https://docs.python.org/3/library/timeit.html) or
+ [memory](https://pypi.org/project/memory-profiler/) issues.
+ validations:
+ required: true
+ - type: textarea
+ id: version
+ attributes:
+ label: Installed Versions
+ description: >
+ Please paste the output of ``pd.show_versions()``
+ value: >
+
+
+
+ Replace this line with the output of pd.show_versions()
+
+
+
+ validations:
+ required: true
+ - type: textarea
+ id: prior-performance
+ attributes:
+ label: Prior Performance
+ description: >
+ If applicable, please provide the prior version of pandas and output
+ of the same reproducible example where the performance issue did not exist.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 7fb5a6ddf2024..42017db8a05b1 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,4 @@
- [ ] closes #xxxx
- [ ] tests added / passed
-- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing.html#code-standards) for how to run them
+- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit) for how to run them
- [ ] whatsnew entry
diff --git a/.github/workflows/asv-bot.yml b/.github/workflows/asv-bot.yml
new file mode 100644
index 0000000000000..c2a49dd96c1c1
--- /dev/null
+++ b/.github/workflows/asv-bot.yml
@@ -0,0 +1,81 @@
+name: "ASV Bot"
+
+on:
+ issue_comment: # Pull requests are issues
+ types:
+ - created
+
+env:
+ ENV_FILE: environment.yml
+ COMMENT: ${{github.event.comment.body}}
+
+jobs:
+ autotune:
+ name: "Run benchmarks"
+ # TODO: Support more benchmarking options later, against different branches, against self, etc
+ if: startsWith(github.event.comment.body, '@github-actions benchmark')
+ runs-on: ubuntu-latest
+ defaults:
+ run:
+ shell: bash -l {0}
+
+ concurrency:
+ # Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
+ # each user can only run one concurrent benchmark bot at a time
+ # We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
+ # to wait
+ group: ${{ github.actor }}-asv
+ cancel-in-progress: false
+
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: Cache conda
+ uses: actions/cache@v2
+ with:
+ path: ~/conda_pkgs_dir
+ key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
+
+ # Although asv sets up its own env, deps are still needed
+ # during discovery process
+ - uses: conda-incubator/setup-miniconda@v2
+ with:
+ activate-environment: pandas-dev
+ channel-priority: strict
+ environment-file: ${{ env.ENV_FILE }}
+ use-only-tar-bz2: true
+
+ - name: Run benchmarks
+ id: bench
+ continue-on-error: true # This is a fake failure, asv will exit code 1 for regressions
+ run: |
+ # extracting the regex, see https://stackoverflow.com/a/36798723
+ REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
+ cd asv_bench
+ asv check -E existing
+ git remote add upstream https://github.com/pandas-dev/pandas.git
+ git fetch upstream
+ asv machine --yes
+ asv continuous -f 1.1 -b $REGEX upstream/master HEAD
+ echo 'BENCH_OUTPUT<> $GITHUB_ENV
+ asv compare -f 1.1 upstream/master HEAD >> $GITHUB_ENV
+ echo 'EOF' >> $GITHUB_ENV
+ echo "REGEX=$REGEX" >> $GITHUB_ENV
+
+ - uses: actions/github-script@v4
+ env:
+ BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
+ REGEX: ${{env.REGEX}}
+ with:
+ script: |
+ const ENV_VARS = process.env
+ const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
+ github.issues.createComment({
+ issue_number: context.issue.number,
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
+ })
diff --git a/.github/workflows/autoupdate-pre-commit-config.yml b/.github/workflows/autoupdate-pre-commit-config.yml
index 801e063f72726..3696cba8cf2e6 100644
--- a/.github/workflows/autoupdate-pre-commit-config.yml
+++ b/.github/workflows/autoupdate-pre-commit-config.yml
@@ -2,7 +2,7 @@ name: "Update pre-commit config"
on:
schedule:
- - cron: "0 7 * * 1" # At 07:00 on each Monday.
+ - cron: "0 7 1 * *" # At 07:00 on 1st of every month.
workflow_dispatch:
jobs:
diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index c45d5a0814544..55f6be848aa13 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -50,15 +50,26 @@ def time_pandas_dtype_invalid(self, dtype):
class SelectDtypes:
- params = [
- tm.ALL_INT_NUMPY_DTYPES
- + tm.ALL_INT_EA_DTYPES
- + tm.FLOAT_NUMPY_DTYPES
- + tm.COMPLEX_DTYPES
- + tm.DATETIME64_DTYPES
- + tm.TIMEDELTA64_DTYPES
- + tm.BOOL_DTYPES
- ]
+ try:
+ params = [
+ tm.ALL_INT_NUMPY_DTYPES
+ + tm.ALL_INT_EA_DTYPES
+ + tm.FLOAT_NUMPY_DTYPES
+ + tm.COMPLEX_DTYPES
+ + tm.DATETIME64_DTYPES
+ + tm.TIMEDELTA64_DTYPES
+ + tm.BOOL_DTYPES
+ ]
+ except AttributeError:
+ params = [
+ tm.ALL_INT_DTYPES
+ + tm.ALL_EA_INT_DTYPES
+ + tm.FLOAT_DTYPES
+ + tm.COMPLEX_DTYPES
+ + tm.DATETIME64_DTYPES
+ + tm.TIMEDELTA64_DTYPES
+ + tm.BOOL_DTYPES
+ ]
param_names = ["dtype"]
def setup(self, dtype):
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 75ef8a276da5e..58f2a73d82842 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -399,12 +399,14 @@ class ChainIndexing:
def setup(self, mode):
self.N = 1000000
+ self.df = DataFrame({"A": np.arange(self.N), "B": "foo"})
def time_chained_indexing(self, mode):
+ df = self.df
+ N = self.N
with warnings.catch_warnings(record=True):
with option_context("mode.chained_assignment", mode):
- df = DataFrame({"A": np.arange(self.N), "B": "foo"})
- df2 = df[df.A > self.N // 2]
+ df2 = df[df.A > N // 2]
df2["C"] = 1.0
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index 0aa924dabd469..4cbaa184791b8 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -115,19 +115,27 @@ def time_maybe_convert_objects(self):
class ToDatetimeFromIntsFloats:
def setup(self):
self.ts_sec = Series(range(1521080307, 1521685107), dtype="int64")
+ self.ts_sec_uint = Series(range(1521080307, 1521685107), dtype="uint64")
self.ts_sec_float = self.ts_sec.astype("float64")
self.ts_nanosec = 1_000_000 * self.ts_sec
+ self.ts_nanosec_uint = 1_000_000 * self.ts_sec_uint
self.ts_nanosec_float = self.ts_nanosec.astype("float64")
- # speed of int64 and float64 paths should be comparable
+ # speed of int64, uint64 and float64 paths should be comparable
def time_nanosec_int64(self):
to_datetime(self.ts_nanosec, unit="ns")
+ def time_nanosec_uint64(self):
+ to_datetime(self.ts_nanosec_uint, unit="ns")
+
def time_nanosec_float64(self):
to_datetime(self.ts_nanosec_float, unit="ns")
+ def time_sec_uint64(self):
+ to_datetime(self.ts_sec_uint, unit="s")
+
def time_sec_int64(self):
to_datetime(self.ts_sec, unit="s")
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 35e5818cd3b2b..c8c1a962e6861 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -67,16 +67,28 @@ def time_sparse_series_from_coo(self):
class ToCoo:
- def setup(self):
+ params = [True, False]
+ param_names = ["sort_labels"]
+
+ def setup(self, sort_labels):
s = Series([np.nan] * 10000)
s[0] = 3.0
s[100] = -1.0
s[999] = 12.1
- s.index = MultiIndex.from_product([range(10)] * 4)
- self.ss = s.astype("Sparse")
- def time_sparse_series_to_coo(self):
- self.ss.sparse.to_coo(row_levels=[0, 1], column_levels=[2, 3], sort_labels=True)
+ s_mult_lvl = s.set_axis(MultiIndex.from_product([range(10)] * 4))
+ self.ss_mult_lvl = s_mult_lvl.astype("Sparse")
+
+ s_two_lvl = s.set_axis(MultiIndex.from_product([range(100)] * 2))
+ self.ss_two_lvl = s_two_lvl.astype("Sparse")
+
+ def time_sparse_series_to_coo(self, sort_labels):
+ self.ss_mult_lvl.sparse.to_coo(
+ row_levels=[0, 1], column_levels=[2, 3], sort_labels=sort_labels
+ )
+
+ def time_sparse_series_to_coo_single_level(self, sort_labels):
+ self.ss_two_lvl.sparse.to_coo(sort_labels=sort_labels)
class Arithmetic:
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 8df048ce65582..0096b3337e19a 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -461,7 +461,6 @@
# eg pandas.Series.str and pandas.Series.dt (see GH9322)
import sphinx # isort:skip
-from sphinx.util import rpartition # isort:skip
from sphinx.ext.autodoc import ( # isort:skip
AttributeDocumenter,
Documenter,
@@ -521,8 +520,8 @@ def resolve_name(self, modname, parents, path, base):
# HACK: this is added in comparison to ClassLevelDocumenter
# mod_cls still exists of class.accessor, so an extra
# rpartition is needed
- modname, accessor = rpartition(mod_cls, ".")
- modname, cls = rpartition(modname, ".")
+ modname, _, accessor = mod_cls.rpartition(".")
+ modname, _, cls = modname.rpartition(".")
parents = [cls, accessor]
# if the module name is still missing, get it like above
if not modname:
diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
index f4a09e0daa750..9b3d50069b077 100644
--- a/doc/source/development/contributing.rst
+++ b/doc/source/development/contributing.rst
@@ -331,7 +331,12 @@ can comment::
@github-actions pre-commit
-on that pull request. This will trigger a workflow which will autofix formatting errors.
+on that pull request. This will trigger a workflow which will autofix formatting
+errors.
+
+To automatically fix formatting errors on each commit you make, you can
+set up pre-commit yourself. First, create a Python :ref:`environment
+` and then set up :ref:`pre-commit `.
Delete your merged branch (optional)
------------------------------------
diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst
index 1f9b34c7a784e..4c3c12eb9da92 100644
--- a/doc/source/development/contributing_environment.rst
+++ b/doc/source/development/contributing_environment.rst
@@ -133,7 +133,6 @@ compiler installation instructions.
Let us know if you have any difficulties by opening an issue or reaching out on `Gitter `_.
-
Creating a Python environment
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index e58779c090d8f..78caf360519b6 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -575,3 +575,17 @@ Library Accessor Classes Description
.. _composeml: https://github.com/alteryx/compose
.. _datatest: https://datatest.readthedocs.io/
.. _woodwork: https://github.com/alteryx/woodwork
+
+Development tools
+----------------------------
+
+`pandas-stubs `__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While pandas repository is partially typed, the package itself doesn't expose this information for external use.
+Install pandas-stubs to enable basic type coverage of pandas API.
+
+Learn more by reading through these issues `14468 `_,
+`26766 `_, `28142 `_.
+
+See installation and usage instructions on the `github page `__.
diff --git a/doc/source/getting_started/comparison/includes/nth_word.rst b/doc/source/getting_started/comparison/includes/nth_word.rst
index 7af0285005d5b..20e2ec47a8c9d 100644
--- a/doc/source/getting_started/comparison/includes/nth_word.rst
+++ b/doc/source/getting_started/comparison/includes/nth_word.rst
@@ -5,5 +5,5 @@ word by index. Note there are more powerful approaches should you need them.
firstlast = pd.DataFrame({"String": ["John Smith", "Jane Cook"]})
firstlast["First_Name"] = firstlast["String"].str.split(" ", expand=True)[0]
- firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[0]
+ firstlast["Last_Name"] = firstlast["String"].str.rsplit(" ", expand=True)[1]
firstlast
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
index 3ff3b2bb53fda..a60dab549e66d 100644
--- a/doc/source/reference/series.rst
+++ b/doc/source/reference/series.rst
@@ -427,6 +427,8 @@ strings and apply several methods to it. These can be accessed like
Series.str.normalize
Series.str.pad
Series.str.partition
+ Series.str.removeprefix
+ Series.str.removesuffix
Series.str.repeat
Series.str.replace
Series.str.rfind
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 535b503e4372c..fd89e4e896178 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -851,12 +851,12 @@ values **not** in the categories, similarly to how you can reindex **any** panda
Int64Index and RangeIndex
~~~~~~~~~~~~~~~~~~~~~~~~~
-.. note::
-
+.. deprecated:: 1.4.0
In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
- will be removed. See :ref:`here ` for more.
- ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+ are therefore deprecated and will be removed in a futire version.
+ See :ref:`here ` for more.
+ ``RangeIndex`` will not be removed, as it represents an optimized version of an integer index.
:class:`Int64Index` is a fundamental basic index in pandas. This is an immutable array
implementing an ordered, sliceable set.
@@ -869,12 +869,12 @@ implementing an ordered, sliceable set.
Float64Index
~~~~~~~~~~~~
-.. note::
-
- In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
+.. deprecated:: 1.4.0
+ :class:`NumericIndex` will become the default index type for numeric types in the future
instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
- will be removed. See :ref:`here ` for more.
- ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+ are therefore deprecated and will be removed in a future version of Pandas.
+ See :ref:`here ` for more.
+ ``RangeIndex`` will not be removed as it represents an optimized version of an integer index.
By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
@@ -981,9 +981,9 @@ NumericIndex
.. note::
In pandas 2.0, :class:`NumericIndex` will become the default index type for numeric types
- instead of ``Int64Index``, ``Float64Index`` and ``UInt64Index`` and those index types
- will be removed.
- ``RangeIndex`` however, will not be removed, as it represents an optimized version of an integer index.
+ instead of :class:`Int64Index`, :class:`Float64Index` and :class:`UInt64Index` and those index types
+ are therefore deprecated and will be removed in a future version.
+ :class:`RangeIndex` will not be removed as it represents an optimized version of an integer index.
:class:`NumericIndex` is an index type that can hold data of any numpy int/uint/float dtype. For example:
@@ -998,7 +998,7 @@ NumericIndex
``UInt64Index`` except that it can hold any numpy int, uint or float dtype.
Until Pandas 2.0, you will have to call ``NumericIndex`` explicitly in order to use it, like in the example above.
-In Pandas 2.0, ``NumericIndex`` will become the default pandas numeric index type and will automatically be used where appropriate.
+In the future, ``NumericIndex`` will become the default pandas numeric index type and will automatically be used where appropriate.
Please notice that ``NumericIndex`` *can not* hold Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 1f1556123db17..4c7b13bcf989f 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -2502,14 +2502,16 @@ Read a URL with no options:
.. ipython:: python
- url = (
- "https://raw.githubusercontent.com/pandas-dev/pandas/master/"
- "pandas/tests/io/data/html/spam.html"
- )
+ url = "https://www.fdic.gov/resources/resolutions/bank-failures/failed-bank-list"
dfs = pd.read_html(url)
dfs
-Read in the content of the "banklist.html" file and pass it to ``read_html``
+.. note::
+
+ The data from the above URL changes every Monday so the resulting data above
+ and the data below may be slightly different.
+
+Read in the content of the file from the above URL and pass it to ``read_html``
as a string:
.. ipython:: python
diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst
index 62a347acdaa34..1193dff4361b4 100644
--- a/doc/source/user_guide/options.rst
+++ b/doc/source/user_guide/options.rst
@@ -138,7 +138,7 @@ More information can be found in the `IPython documentation
import pandas as pd
pd.set_option("display.max_rows", 999)
- pd.set_option("precision", 5)
+ pd.set_option("display.precision", 5)
.. _options.frequently_used:
@@ -253,9 +253,9 @@ This is only a suggestion.
.. ipython:: python
df = pd.DataFrame(np.random.randn(5, 5))
- pd.set_option("precision", 7)
+ pd.set_option("display.precision", 7)
df
- pd.set_option("precision", 4)
+ pd.set_option("display.precision", 4)
df
``display.chop_threshold`` sets at what level pandas rounds to zero when
@@ -487,8 +487,27 @@ styler.sparse.index True "Sparsify" MultiIndex displ
elements in outer levels within groups).
styler.sparse.columns True "Sparsify" MultiIndex display for columns
in Styler output.
+styler.render.repr html Standard output format for Styler rendered in Jupyter Notebook.
+ Should be one of "html" or "latex".
styler.render.max_elements 262144 Maximum number of datapoints that Styler will render
trimming either rows, columns or both to fit.
+styler.render.encoding utf-8 Default encoding for output HTML or LaTeX files.
+styler.format.formatter None Object to specify formatting functions to ``Styler.format``.
+styler.format.na_rep None String representation for missing data.
+styler.format.precision 6 Precision to display floating point and complex numbers.
+styler.format.decimal . String representation for decimal point separator for floating
+ point and complex numbers.
+styler.format.thousands None String representation for thousands separator for
+ integers, and floating point and complex numbers.
+styler.format.escape None Whether to escape "html" or "latex" special
+ characters in the display representation.
+styler.html.mathjax True If set to False will render specific CSS classes to
+ table attributes that will prevent Mathjax from rendering
+ in Jupyter Notebook.
+styler.latex.multicol_align r Alignment of headers in a merged column due to sparsification. Can be in {"r", "c", "l"}.
+styler.latex.multirow_align c Alignment of index labels in a merged row due to sparsification. Can be in {"c", "t", "b"}.
+styler.latex.environment None If given will replace the default ``\\begin{table}`` environment. If "longtable" is specified
+ this will render with a specific "longtable" template with longtable features.
======================================= ============ ==================================
diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index db9485f3f2348..d350351075cb6 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -335,6 +335,19 @@ regular expression object will raise a ``ValueError``.
---------------------------------------------------------------------------
ValueError: case and flags cannot be set when pat is a compiled regex
+``removeprefix`` and ``removesuffix`` have the same effect as ``str.removeprefix`` and ``str.removesuffix`` added in Python 3.9
+`__:
+
+.. versionadded:: 1.4.0
+
+.. ipython:: python
+
+ s = pd.Series(["str_foo", "str_bar", "no_prefix"])
+ s.str.removeprefix("str_")
+
+ s = pd.Series(["foo_str", "bar_str", "no_suffix"])
+ s.str.removesuffix("_str")
+
.. _text.concatenate:
Concatenation
@@ -742,6 +755,8 @@ Method summary
:meth:`~Series.str.get_dummies`;Split strings on the delimiter returning DataFrame of dummy variables
:meth:`~Series.str.contains`;Return boolean array if each string contains pattern/regex
:meth:`~Series.str.replace`;Replace occurrences of pattern/regex/string with some other string or the return value of a callable given the occurrence
+ :meth:`~Series.str.removeprefix`;Remove prefix from string, i.e. only remove if string starts with prefix.
+ :meth:`~Series.str.removesuffix`;Remove suffix from string, i.e. only remove if string ends with suffix.
:meth:`~Series.str.repeat`;Duplicate values (``s.str.repeat(3)`` equivalent to ``x * 3``)
:meth:`~Series.str.pad`;"Add whitespace to left, right, or both sides of strings"
:meth:`~Series.str.center`;Equivalent to ``str.center``
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index 733995cc718dd..8d9821e53e30c 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -248,11 +248,12 @@ or purely non-negative, integers. Previously, handling these integers would
result in improper rounding or data-type casting, leading to incorrect results.
Notably, a new numerical index, ``UInt64Index``, has been created (:issue:`14937`)
-.. ipython:: python
+.. code-block:: ipython
- idx = pd.UInt64Index([1, 2, 3])
- df = pd.DataFrame({'A': ['a', 'b', 'c']}, index=idx)
- df.index
+ In [1]: idx = pd.UInt64Index([1, 2, 3])
+ In [2]: df = pd.DataFrame({'A': ['a', 'b', 'c']}, index=idx)
+ In [3]: df.index
+ Out[3]: UInt64Index([1, 2, 3], dtype='uint64')
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`)
- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`)
diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
index f4caea9d363eb..be84c562b3c32 100644
--- a/doc/source/whatsnew/v0.23.0.rst
+++ b/doc/source/whatsnew/v0.23.0.rst
@@ -861,21 +861,21 @@ Previous behavior:
Current behavior:
-.. ipython:: python
+.. code-block:: ipython
- index = pd.Int64Index([-1, 0, 1])
+ In [12]: index = pd.Int64Index([-1, 0, 1])
# division by zero gives -infinity where negative,
# +infinity where positive, and NaN for 0 / 0
- index / 0
+ In [13]: index / 0
# The result of division by zero should not depend on
# whether the zero is int or float
- index / 0.0
+ In [14]: index / 0.0
- index = pd.UInt64Index([0, 1])
- index / np.array([0, 0], dtype=np.uint64)
+ In [15]: index = pd.UInt64Index([0, 1])
+ In [16]: index / np.array([0, 0], dtype=np.uint64)
- pd.RangeIndex(1, 5) / 0
+ In [17]: pd.RangeIndex(1, 5) / 0
.. _whatsnew_0230.api_breaking.extract:
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 89c003f34f0cc..9cbfa49cc8c5c 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -473,10 +473,12 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`).
*New behavior*:
-.. ipython:: python
+.. code-block:: python
- pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
- pd.Index([], dtype=object).union(pd.Index([1, 2, 3]))
+ In [3]: pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
+ Out[3]: Index([1991-09-05, 1991-09-06, 1, 2, 3], dtype='object')
+ In [4]: pd.Index([], dtype=object).union(pd.Index([1, 2, 3]))
+ Out[4]: Index([1, 2, 3], dtype='object')
Note that integer- and floating-dtype indexes are considered "compatible". The integer
values are coerced to floating point, which may result in loss of precision. See
diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst
index 7a9549affef00..e3c6268547dd2 100644
--- a/doc/source/whatsnew/v1.3.2.rst
+++ b/doc/source/whatsnew/v1.3.2.rst
@@ -26,6 +26,7 @@ Fixed regressions
- Fixed regression in :func:`concat` where ``copy=False`` was not honored in ``axis=1`` Series concatenation (:issue:`42501`)
- Regression in :meth:`Series.nlargest` and :meth:`Series.nsmallest` with nullable integer or float dtype (:issue:`42816`)
- Fixed regression in :meth:`Series.quantile` with :class:`Int64Dtype` (:issue:`42626`)
+- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` where supplying the ``by`` argument with a Series named with a tuple would incorrectly raise (:issue:`42731`)
.. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst
index 9aac0a9ad9681..5ffc1a20b382f 100644
--- a/doc/source/whatsnew/v1.3.3.rst
+++ b/doc/source/whatsnew/v1.3.3.rst
@@ -17,7 +17,25 @@ Fixed regressions
- Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`)
- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`)
- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`)
+- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`)
+- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`)
+- Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`)
- Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`)
+- Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`)
+- Fixed regression in :meth:`DataFrame.loc.__setitem__` raising ``ValueError`` when setting array as cell value (:issue:`43422`)
+- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
+- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
+- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
+
+.. ---------------------------------------------------------------------------
+
+.. _whatsnew_133.performance:
+
+Performance improvements
+~~~~~~~~~~~~~~~~~~~~~~~~
+- Performance improvement for :meth:`DataFrame.__setitem__` when the key or value is not a :class:`DataFrame`, or key is not list-like (:issue:`43274`)
+-
+-
.. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 450ecc85c725b..7107e3eecb2f1 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -29,8 +29,8 @@ It is now possible to create an index of any numpy int/uint/float dtype using th
pd.NumericIndex([1, 2, 3], dtype="uint32")
pd.NumericIndex([1, 2, 3], dtype="float32")
-In order to maintain backwards compatibility, calls to the base :class:`Index` will in
-pandas 1.x. return :class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index`.
+In order to maintain backwards compatibility, calls to the base :class:`Index` will currently
+return :class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index`, where relevant.
For example, the code below returns an ``Int64Index`` with dtype ``int64``:
.. code-block:: ipython
@@ -38,11 +38,12 @@ For example, the code below returns an ``Int64Index`` with dtype ``int64``:
In [1]: pd.Index([1, 2, 3], dtype="int8")
Int64Index([1, 2, 3], dtype='int64')
-For the duration of Pandas 1.x, in order to maintain backwards compatibility, all
-operations that until now have returned :class:`Int64Index`, :class:`UInt64Index` and
-:class:`Float64Index` will continue to so. This means, that in order to use
-``NumericIndex``, you will have to call ``NumericIndex`` explicitly. For example the below series
-will have an ``Int64Index``:
+but will in a future version return a :class:`NumericIndex` with dtype ``int8``.
+
+More generally, currently, all operations that until now have
+returned :class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index` will
+continue to so. This means, that in order to use ``NumericIndex`` in the current version, you
+will have to call ``NumericIndex`` explicitly. For example the below series will have an ``Int64Index``:
.. code-block:: ipython
@@ -50,7 +51,7 @@ will have an ``Int64Index``:
In [3]: ser.index
Int64Index([1, 2, 3], dtype='int64')
-Instead if you want to use a ``NumericIndex``, you should do:
+Instead, if you want to use a ``NumericIndex``, you should do:
.. ipython:: python
@@ -58,10 +59,11 @@ Instead if you want to use a ``NumericIndex``, you should do:
ser = pd.Series([1, 2, 3], index=idx)
ser.index
-In Pandas 2.0, :class:`NumericIndex` will become the default numeric index type and
-``Int64Index``, ``UInt64Index`` and ``Float64Index`` will be removed.
+In a future version of Pandas, :class:`NumericIndex` will become the default numeric index type and
+``Int64Index``, ``UInt64Index`` and ``Float64Index`` are therefore deprecated and will
+be removed in the future, see :ref:`here ` for more.
-See :ref:`here ` for more.
+See :ref:`here ` for more about :class:`NumericIndex`.
.. _whatsnew_140.enhancements.styler:
@@ -73,11 +75,17 @@ Styler
- Styling of indexing has been added, with :meth:`.Styler.apply_index` and :meth:`.Styler.applymap_index`. These mirror the signature of the methods already used to style data values, and work with both HTML and LaTeX format (:issue:`41893`).
- :meth:`.Styler.bar` introduces additional arguments to control alignment and display (:issue:`26070`, :issue:`36419`), and it also validates the input arguments ``width`` and ``height`` (:issue:`42511`).
- :meth:`.Styler.to_latex` introduces keyword argument ``environment``, which also allows a specific "longtable" entry through a separate jinja2 template (:issue:`41866`).
- - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index`` and ``sparse_columns`` (:issue:`41946`)
- - Keyword argument ``level`` is added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
+ - :meth:`.Styler.to_html` introduces keyword arguments ``sparse_index``, ``sparse_columns``, ``bold_headers``, ``caption`` (:issue:`41946`, :issue:`43149`).
+ - Keyword arguments ``level`` and ``names`` added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for additional control of visibility of MultiIndexes and index names (:issue:`25475`, :issue:`43404`, :issue:`43346`)
+ - Global options have been extended to configure default ``Styler`` properties including formatting and encoding and mathjax options and LaTeX (:issue:`41395`)
+ - Naive sparsification is now possible for LaTeX without the multirow package (:issue:`43369`)
+
+Formerly Styler relied on ``display.html.use_mathjax``, which has now been replaced by ``styler.html.mathjax``.
There are also bug fixes and deprecations listed below.
+Validation now for ``caption`` arg (:issue:`43368`)
+
.. _whatsnew_140.enhancements.pyarrow_csv_engine:
Multithreaded CSV reading with a new CSV Engine based on pyarrow
@@ -95,7 +103,9 @@ Other enhancements
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`42273`)
- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`)
--
+- :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`)
+- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
+- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
.. ---------------------------------------------------------------------------
@@ -221,6 +231,41 @@ Other API changes
Deprecations
~~~~~~~~~~~~
+
+.. _whatsnew_140.deprecations.int64_uint64_float64index:
+
+Deprecated Int64Index, UInt64Index & Float64Index
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+:class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index` have been deprecated
+in favor of the new :class:`NumericIndex` and will be removed in Pandas 2.0 (:issue:`43028`).
+
+Currently, in order to maintain backward compatibility, calls to
+:class:`Index` will continue to return :class:`Int64Index`, :class:`UInt64Index` and :class:`Float64Index`
+when given numeric data, but in the future, a :class:`NumericIndex` will be returned.
+
+*Current behavior*:
+
+.. code-block:: ipython
+
+ In [1]: pd.Index([1, 2, 3], dtype="int32")
+ Out [1]: Int64Index([1, 2, 3], dtype='int64')
+ In [1]: pd.Index([1, 2, 3], dtype="uint64")
+ Out [1]: UInt64Index([1, 2, 3], dtype='uint64')
+
+*Future behavior*:
+
+.. code-block:: ipython
+
+ In [3]: pd.Index([1, 2, 3], dtype="int32")
+ Out [3]: NumericIndex([1, 2, 3], dtype='int32')
+ In [4]: pd.Index([1, 2, 3], dtype="uint64")
+ Out [4]: NumericIndex([1, 2, 3], dtype='uint64')
+
+
+.. _whatsnew_140.deprecations.other:
+
+Other Deprecations
+^^^^^^^^^^^^^^^^^^
- Deprecated :meth:`Index.is_type_compatible` (:issue:`42113`)
- Deprecated ``method`` argument in :meth:`Index.get_loc`, use ``index.get_indexer([label], method=...)`` instead (:issue:`42269`)
- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`)
@@ -231,6 +276,7 @@ Deprecations
- Deprecated dropping of nuisance columns in :class:`Rolling`, :class:`Expanding`, and :class:`EWM` aggregations (:issue:`42738`)
- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
- Deprecated :meth:`.Styler.render` in favour of :meth:`.Styler.to_html` (:issue:`42140`)
+- Deprecated passing in a string column label into ``times`` in :meth:`DataFrame.ewm` (:issue:`43265`)
.. ---------------------------------------------------------------------------
@@ -245,7 +291,9 @@ Performance improvements
- Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`)
- Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`)
- Performance improvement in :func:`read_stata` (:issue:`43059`)
--
+- Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`)
+- Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
+- Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
.. ---------------------------------------------------------------------------
@@ -276,7 +324,7 @@ Timedelta
Timezones
^^^^^^^^^
--
+- Bug in :meth:`Series.dt.tz_convert` resetting index in a :class:`Series` with :class:`CategoricalIndex` (:issue:`43080`)
-
Numeric
@@ -313,7 +361,8 @@ Indexing
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`)
- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
- Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
--
+- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
+
Missing
^^^^^^^
@@ -325,6 +374,7 @@ MultiIndex
- Bug in :meth:`MultiIndex.get_loc` where the first level is a :class:`DatetimeIndex` and a string key is passed (:issue:`42465`)
- Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`)
- Bug in :meth:`MultiIndex.get_loc` raising ``TypeError`` instead of ``KeyError`` on nested tuple (:issue:`42440`)
+- Bug in :meth:`MultiIndex.putmask` where the other value was also a :class:`MultiIndex` (:issue:`43212`)
-
I/O
@@ -332,8 +382,11 @@ I/O
- Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
- Bug in :func:`json_normalize` where ``errors=ignore`` could fail to ignore missing values of ``meta`` when ``record_path`` has a length greater than one (:issue:`41876`)
- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
+- Bug in :func:`read_fwf`, where difference in lengths of ``colspecs`` and ``names`` was not raising ``ValueError`` (:issue:`40830`)
- Bug in :func:`Series.to_json` and :func:`DataFrame.to_json` where some attributes were skipped when serialising plain Python objects to JSON (:issue:`42768`, :issue:`33043`)
--
+- Column headers are dropped when constructing a :class:`DataFrame` from a sqlalchemy's ``Row`` object (:issue:`40682`)
+- Bug in unpickling a :class:`Index` with object dtype incorrectly inferring numeric dtypes (:issue:`43188`)
+- Bug in :func:`read_csv` where reading multi-header input with unequal lengths incorrectly raising uncontrolled ``IndexError`` (:issue:`43102`)
Period
^^^^^^
@@ -349,19 +402,24 @@ Groupby/resample/rolling
^^^^^^^^^^^^^^^^^^^^^^^^
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
+- Bug in :meth:`GroupBy.max` and :meth:`GroupBy.min` with nullable integer dtypes losing precision (:issue:`41743`)
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
- Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input Series was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`)
- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`)
- Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`)
- Bug in :meth:`Resampler.aggregate` did not allow the use of Named Aggregation (:issue:`32803`)
--
+- Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`)
+- Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`)
+- Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`)
Reshaping
^^^^^^^^^
- Improved error message when creating a :class:`DataFrame` column from a multi-dimensional :class:`numpy.ndarray` (:issue:`42463`)
- :func:`concat` creating :class:`MultiIndex` with duplicate level entries when concatenating a :class:`DataFrame` with duplicates in :class:`Index` and multiple keys (:issue:`42651`)
- Bug in :meth:`pandas.cut` on :class:`Series` with duplicate indices (:issue:`42185`) and non-exact :meth:`pandas.CategoricalIndex` (:issue:`42425`)
+- Bug in :meth:`DataFrame.append` failing to retain dtypes when appended columns do not match (:issue:`43392`)
+- Bug in :func:`concat` of ``bool`` and ``boolean`` dtypes resulting in ``object`` dtype instead of ``boolean`` dtype (:issue:`42800`)
-
Sparse
@@ -380,7 +438,9 @@ Styler
- Bug in :meth:`.Styler.to_html` where the ``Styler`` object was updated if the ``to_html`` method was called with some args (:issue:`43034`)
- Bug in :meth:`.Styler.copy` where ``uuid`` was not previously copied (:issue:`40675`)
- Bug in :meth:`Styler.apply` where functions which returned Series objects were not correctly handled in terms of aligning their index labels (:issue:`13657`, :issue:`42014`)
--
+- Bug when rendering an empty DataFrame with a named index (:issue:`43305`).
+- Bug when rendering a single level MultiIndex (:issue:`43383`).
+- Bug when combining non-sparse rendering and :meth:`.Styler.hide_columns` (:issue:`43464`)
Other
^^^^^
diff --git a/pandas/__init__.py b/pandas/__init__.py
index 68ca20338e99b..294b092e33c58 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -70,10 +70,7 @@
# indexes
Index,
CategoricalIndex,
- Int64Index,
- UInt64Index,
RangeIndex,
- Float64Index,
NumericIndex,
MultiIndex,
IntervalIndex,
@@ -186,10 +183,35 @@
# GH 27101
+__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"]
+
+
+def __dir__():
+ # GH43028
+ # Int64Index etc. are deprecated, but we still want them to be available in the dir.
+ # Remove in Pandas 2.0, when we remove Int64Index etc. from the code base.
+ return list(globals().keys()) + __deprecated_num_index_names
+
+
def __getattr__(name):
import warnings
- if name == "datetime":
+ if name in __deprecated_num_index_names:
+ warnings.warn(
+ f"pandas.{name} is deprecated "
+ "and will be removed from pandas in a future version. "
+ "Use pandas.NumericIndex with the appropriate dtype instead.",
+ FutureWarning,
+ stacklevel=2,
+ )
+ from pandas.core.api import Float64Index, Int64Index, UInt64Index
+
+ return {
+ "Float64Index": Float64Index,
+ "Int64Index": Int64Index,
+ "UInt64Index": UInt64Index,
+ }[name]
+ elif name == "datetime":
warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
diff --git a/pandas/_libs/algos.pyi b/pandas/_libs/algos.pyi
index 9da5534c51321..fdec60a84a708 100644
--- a/pandas/_libs/algos.pyi
+++ b/pandas/_libs/algos.pyi
@@ -3,6 +3,8 @@ from typing import Any
import numpy as np
+from pandas._typing import npt
+
class Infinity:
"""
Provide a positive Infinity comparison method for ranking.
@@ -30,7 +32,7 @@ class NegInfinity:
def unique_deltas(
arr: np.ndarray, # const int64_t[:]
) -> np.ndarray: ... # np.ndarray[np.int64, ndim=1]
-def is_lexsorted(list_of_arrays: list[np.ndarray]) -> bool: ...
+def is_lexsorted(list_of_arrays: list[npt.NDArray[np.int64]]) -> bool: ...
def groupsort_indexer(
index: np.ndarray, # const int64_t[:]
ngroups: int,
@@ -146,18 +148,20 @@ def diff_2d(
axis: int,
datetimelike: bool = ...,
) -> None: ...
-def ensure_platform_int(arr: object) -> np.ndarray: ...
-def ensure_object(arr: object) -> np.ndarray: ...
-def ensure_float64(arr: object, copy=True) -> np.ndarray: ...
-def ensure_float32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int8(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int16(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_int64(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint8(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint16(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint32(arr: object, copy=True) -> np.ndarray: ...
-def ensure_uint64(arr: object, copy=True) -> np.ndarray: ...
+def ensure_platform_int(arr: object) -> npt.NDArray[np.intp]: ...
+def ensure_object(arr: object) -> npt.NDArray[np.object_]: ...
+def ensure_complex64(arr: object, copy=True) -> npt.NDArray[np.complex64]: ...
+def ensure_complex128(arr: object, copy=True) -> npt.NDArray[np.complex128]: ...
+def ensure_float64(arr: object, copy=True) -> npt.NDArray[np.float64]: ...
+def ensure_float32(arr: object, copy=True) -> npt.NDArray[np.float32]: ...
+def ensure_int8(arr: object, copy=True) -> npt.NDArray[np.int8]: ...
+def ensure_int16(arr: object, copy=True) -> npt.NDArray[np.int16]: ...
+def ensure_int32(arr: object, copy=True) -> npt.NDArray[np.int32]: ...
+def ensure_int64(arr: object, copy=True) -> npt.NDArray[np.int64]: ...
+def ensure_uint8(arr: object, copy=True) -> npt.NDArray[np.uint8]: ...
+def ensure_uint16(arr: object, copy=True) -> npt.NDArray[np.uint16]: ...
+def ensure_uint32(arr: object, copy=True) -> npt.NDArray[np.uint32]: ...
+def ensure_uint64(arr: object, copy=True) -> npt.NDArray[np.uint64]: ...
def take_1d_int8_int8(
values: np.ndarray, indexer: np.ndarray, out: np.ndarray, fill_value=...
) -> None: ...
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 2353c66f3378f..99929c36c0929 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -15,6 +15,8 @@ import numpy as np
cimport numpy as cnp
from numpy cimport (
+ NPY_COMPLEX64,
+ NPY_COMPLEX128,
NPY_FLOAT32,
NPY_FLOAT64,
NPY_INT8,
@@ -122,7 +124,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr):
Parameters
----------
- arr : ndarray[in64_t]
+ arr : ndarray[int64_t]
Returns
-------
@@ -516,97 +518,6 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarr
return result
-# ----------------------------------------------------------------------
-# Kendall correlation
-# Wikipedia article: https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient
-
-@cython.boundscheck(False)
-@cython.wraparound(False)
-def nancorr_kendall(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1) -> ndarray:
- """
- Perform kendall correlation on a 2d array
-
- Parameters
- ----------
- mat : np.ndarray[float64_t, ndim=2]
- Array to compute kendall correlation on
- minp : int, default 1
- Minimum number of observations required per pair of columns
- to have a valid result.
-
- Returns
- -------
- numpy.ndarray[float64_t, ndim=2]
- Correlation matrix
- """
- cdef:
- Py_ssize_t i, j, k, xi, yi, N, K
- ndarray[float64_t, ndim=2] result
- ndarray[float64_t, ndim=2] ranked_mat
- ndarray[uint8_t, ndim=2] mask
- float64_t currj
- ndarray[uint8_t, ndim=1] valid
- ndarray[int64_t] sorted_idxs
- ndarray[float64_t, ndim=1] col
- int64_t n_concordant
- int64_t total_concordant = 0
- int64_t total_discordant = 0
- float64_t kendall_tau
- int64_t n_obs
-
- N, K = (