Merge remote-tracking branch 'upstream/master' into pr43154

pandas-dev · Sep 9, 2021 · e141123 · e141123
2 parents 08b2429 + 4a2e467
commit e141123
Show file tree

Hide file tree

Showing 215 changed files with 4,260 additions and 1,433 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml
@@ -0,0 +1,67 @@
+name: Bug Report
+description: Report incorrect behavior in the pandas library
+title: "BUG: "
+labels: [Bug, Needs Triage]
+
+body:
+  - type: checkboxes
+    id: checks
+    attributes:
+      options:
+        - label: >
+            I have checked that this issue has not already been reported.
+          required: true
+        - label: >
+            I have confirmed this bug exists on the
+            [latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
+          required: true
+        - label: >
+            I have confirmed this bug exists on the master branch of pandas.
+  - type: textarea
+    id: example
+    attributes:
+      label: Reproducible Example
+      description: >
+        Please follow [this guide](https://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports) on how to
+        provide a minimal, copy-pastable example.
+      placeholder: >
+        import pandas as pd
+
+        df = pd.DataFrame(range(5))
+
+        ...
+      render: python
+    validations:
+      required: true
+  - type: textarea
+    id: problem
+    attributes:
+      label: Issue Description
+      description: >
+        Please provide a description of the issue shown in the reproducible example.
+    validations:
+      required: true
+  - type: textarea
+    id: expected-behavior
+    attributes:
+      label: Expected Behavior
+      description: >
+        Please describe or show a code example of the expected behavior.
+    validations:
+      required: true
+  - type: textarea
+    id: version
+    attributes:
+      label: Installed Versions
+      description: >
+        Please paste the output of ``pd.show_versions()``
+      value: >
+        <details>
+
+
+        Replace this line with the output of pd.show_versions()
+
+
+        </details>
+    validations:
+      required: true
diff --git a/.github/ISSUE_TEMPLATE/installation_issue.yaml b/.github/ISSUE_TEMPLATE/installation_issue.yaml
@@ -0,0 +1,65 @@
+name: Installation Issue
+description: Report issues installing the pandas library on your system
+title: "BUILD: "
+labels: [Build, Needs Triage]
+
+body:
+  - type: checkboxes
+    id: checks
+    attributes:
+      options:
+        - label: >
+            I have read the [installation guide](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-pandas).
+          required: true
+  - type: input
+    id: platform
+    attributes:
+      label: Platform
+      description: >
+        Please provide the output of ``import platform; print(platform.platform())``
+    validations:
+      required: true
+  - type: dropdown
+    id: method
+    attributes:
+      label: Installation Method
+      description: >
+        Please provide how you tried to install pandas from a clean environment.
+      options:
+        - pip install
+        - conda install
+        - apt-get install
+        - Built from source
+        - Other
+    validations:
+      required: true
+  - type: input
+    id: pandas
+    attributes:
+      label: pandas Version
+      description: >
+        Please provide the version of pandas you are trying to install.
+    validations:
+      required: true
+  - type: input
+    id: python
+    attributes:
+      label: Python Version
+      description: >
+        Please provide the installed version of Python.
+    validations:
+      required: true
+  - type: textarea
+    id: logs
+    attributes:
+      label: Installation Logs
+      description: >
+        If possible, please copy and paste the installation logs when attempting to install pandas.
+      value: >
+        <details>
+
+
+        Replace this line with the installation logs.
+
+
+        </details>
diff --git a/.github/ISSUE_TEMPLATE/performance_issue.yaml b/.github/ISSUE_TEMPLATE/performance_issue.yaml
@@ -0,0 +1,52 @@
+name: Performance Issue
+description: Report slow performance or memory issues when running pandas code
+title: "PERF: "
+labels: [Performance, Needs Triage]
+
+body:
+  - type: checkboxes
+    id: checks
+    attributes:
+      options:
+        - label: >
+            I have checked that this issue has not already been reported.
+          required: true
+        - label: >
+            I have confirmed this issue exists on the
+            [latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
+          required: true
+        - label: >
+            I have confirmed this issue exists on the master branch of pandas.
+  - type: textarea
+    id: example
+    attributes:
+      label: Reproducible Example
+      description: >
+        Please provide a minimal, copy-pastable example that quantifies
+        [slow runtime](https://docs.python.org/3/library/timeit.html) or
+        [memory](https://pypi.org/project/memory-profiler/) issues.
+    validations:
+      required: true
+  - type: textarea
+    id: version
+    attributes:
+      label: Installed Versions
+      description: >
+        Please paste the output of ``pd.show_versions()``
+      value: >
+        <details>
+
+
+        Replace this line with the output of pd.show_versions()
+
+
+        </details>
+    validations:
+      required: true
+  - type: textarea
+    id: prior-performance
+    attributes:
+      label: Prior Performance
+      description: >
+        If applicable, please provide the prior version of pandas and output
+        of the same reproducible example where the performance issue did not exist.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,4 +1,4 @@
 - [ ] closes #xxxx
 - [ ] tests added / passed
-- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing.html#code-standards) for how to run them
+- [ ] Ensure all linting tests pass, see [here](https://pandas.pydata.org/pandas-docs/dev/development/contributing_codebase.html#pre-commit) for how to run them
 - [ ] whatsnew entry
diff --git a/.github/workflows/asv-bot.yml b/.github/workflows/asv-bot.yml
@@ -0,0 +1,81 @@
+name: "ASV Bot"
+
+on:
+  issue_comment: # Pull requests are issues
+    types:
+      - created
+
+env:
+  ENV_FILE: environment.yml
+  COMMENT: ${{github.event.comment.body}}
+
+jobs:
+  autotune:
+    name: "Run benchmarks"
+    # TODO: Support more benchmarking options later, against different branches, against self, etc
+    if: startsWith(github.event.comment.body, '@github-actions benchmark')
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    concurrency:
+      # Set concurrency to prevent abuse(full runs are ~5.5 hours !!!)
+      # each user can only run one concurrent benchmark bot at a time
+      # We don't cancel in progress jobs, but if you want to benchmark multiple PRs, you're gonna have
+      # to wait
+      group: ${{ github.actor }}-asv
+      cancel-in-progress: false
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Cache conda
+        uses: actions/cache@v2
+        with:
+          path: ~/conda_pkgs_dir
+          key: ${{ runner.os }}-conda-${{ hashFiles('${{ env.ENV_FILE }}') }}
+
+        # Although asv sets up its own env, deps are still needed
+        # during discovery process
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: pandas-dev
+          channel-priority: strict
+          environment-file: ${{ env.ENV_FILE }}
+          use-only-tar-bz2: true
+
+      - name: Run benchmarks
+        id: bench
+        continue-on-error: true # This is a fake failure, asv will exit code 1 for regressions
+        run: |
+          # extracting the regex, see https://stackoverflow.com/a/36798723
+          REGEX=$(echo "$COMMENT" | sed -n "s/^.*-b\s*\(\S*\).*$/\1/p")
+          cd asv_bench
+          asv check -E existing
+          git remote add upstream https://github.com/pandas-dev/pandas.git
+          git fetch upstream
+          asv machine --yes
+          asv continuous -f 1.1 -b $REGEX upstream/master HEAD
+          echo 'BENCH_OUTPUT<<EOF' >> $GITHUB_ENV
+          asv compare -f 1.1 upstream/master HEAD >> $GITHUB_ENV
+          echo 'EOF' >> $GITHUB_ENV
+          echo "REGEX=$REGEX" >> $GITHUB_ENV
+
+      - uses: actions/github-script@v4
+        env:
+          BENCH_OUTPUT: ${{env.BENCH_OUTPUT}}
+          REGEX: ${{env.REGEX}}
+        with:
+          script: |
+            const ENV_VARS = process.env
+            const run_url = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`
+            github.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: '\nBenchmarks completed. View runner logs here.' + run_url + '\nRegex used: '+ 'regex ' + ENV_VARS["REGEX"] + '\n' + ENV_VARS["BENCH_OUTPUT"]
+            })
diff --git a/.github/workflows/autoupdate-pre-commit-config.yml b/.github/workflows/autoupdate-pre-commit-config.yml
@@ -2,7 +2,7 @@ name: "Update pre-commit config"
 
 on:
   schedule:
-    - cron: "0 7 * * 1" # At 07:00 on each Monday.
+    - cron: "0 7 1 * *" # At 07:00 on 1st of every month.
   workflow_dispatch:
 
 jobs:

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
@@ -50,15 +50,26 @@ def time_pandas_dtype_invalid(self, dtype):
 
 class SelectDtypes:
 
-    params = [
-        tm.ALL_INT_NUMPY_DTYPES
-        + tm.ALL_INT_EA_DTYPES
-        + tm.FLOAT_NUMPY_DTYPES
-        + tm.COMPLEX_DTYPES
-        + tm.DATETIME64_DTYPES
-        + tm.TIMEDELTA64_DTYPES
-        + tm.BOOL_DTYPES
-    ]
+    try:
+        params = [
+            tm.ALL_INT_NUMPY_DTYPES
+            + tm.ALL_INT_EA_DTYPES
+            + tm.FLOAT_NUMPY_DTYPES
+            + tm.COMPLEX_DTYPES
+            + tm.DATETIME64_DTYPES
+            + tm.TIMEDELTA64_DTYPES
+            + tm.BOOL_DTYPES
+        ]
+    except AttributeError:
+        params = [
+            tm.ALL_INT_DTYPES
+            + tm.ALL_EA_INT_DTYPES
+            + tm.FLOAT_DTYPES
+            + tm.COMPLEX_DTYPES
+            + tm.DATETIME64_DTYPES
+            + tm.TIMEDELTA64_DTYPES
+            + tm.BOOL_DTYPES
+        ]
     param_names = ["dtype"]
 
     def setup(self, dtype):

diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
@@ -399,12 +399,14 @@ class ChainIndexing:
 
     def setup(self, mode):
         self.N = 1000000
+        self.df = DataFrame({"A": np.arange(self.N), "B": "foo"})
 
     def time_chained_indexing(self, mode):
+        df = self.df
+        N = self.N
         with warnings.catch_warnings(record=True):
             with option_context("mode.chained_assignment", mode):
-                df = DataFrame({"A": np.arange(self.N), "B": "foo"})
-                df2 = df[df.A > self.N // 2]
+                df2 = df[df.A > N // 2]
                 df2["C"] = 1.0