Merge branch 'main' into remove-tests

pandas-dev · Mar 20, 2024 · 23952aa · 23952aa
2 parents e53ad08 + 710720e
commit 23952aa
Show file tree

Hide file tree

Showing 58 changed files with 2,064 additions and 2,037 deletions.
diff --git a/.github/actions/run-tests/action.yml b/.github/actions/run-tests/action.yml
@@ -1,16 +1,9 @@
 name: Run tests and report results
-inputs:
-  preload:
-    description: Preload arguments for sanitizer
-    required: false
-  asan_options:
-    description: Arguments for Address Sanitizer (ASAN)
-    required: false
 runs:
   using: composite
   steps:
     - name: Test
-      run: ${{ inputs.asan_options }} ${{ inputs.preload }} ci/run_tests.sh
+      run: ci/run_tests.sh
       shell: bash -el {0}
 
     - name: Publish test results

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -68,14 +68,6 @@ jobs:
           - name: "Pyarrow Nightly"
             env_file: actions-311-pyarrownightly.yaml
             pattern: "not slow and not network and not single_cpu"
-          - name: "ASAN / UBSAN"
-            env_file: actions-311-sanitizers.yaml
-            pattern: "not slow and not network and not single_cpu and not skip_ubsan"
-            asan_options: "ASAN_OPTIONS=detect_leaks=0"
-            preload: LD_PRELOAD=$(gcc -print-file-name=libasan.so)
-            meson_args: --config-settings=setup-args="-Db_sanitize=address,undefined"
-            cflags_adds: -fno-sanitize-recover=all
-            pytest_workers: -1  # disable pytest-xdist as it swallows stderr from ASAN
       fail-fast: false
     name: ${{ matrix.name || format('ubuntu-latest {0}', matrix.env_file) }}
     env:
@@ -161,18 +153,12 @@ jobs:
     - name: Test (not single_cpu)
       uses: ./.github/actions/run-tests
       if: ${{ matrix.name != 'Pypy' }}
-      with:
-        preload: ${{ matrix.preload }}
-        asan_options: ${{ matrix.asan_options }}
       env:
         # Set pattern to not single_cpu if not already set
         PATTERN: ${{ env.PATTERN == '' && 'not single_cpu' || matrix.pattern }}
 
     - name: Test (single_cpu)
       uses: ./.github/actions/run-tests
-      with:
-        preload: ${{ matrix.preload }}
-        asan_options: ${{ matrix.asan_options }}
       env:
         PATTERN: 'single_cpu'
         PYTEST_WORKERS: 0

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -78,7 +78,7 @@ repos:
     hooks:
     -   id: pylint
         stages: [manual]
-        args: [--load-plugins=pylint.extensions.redefined_loop_name]
+        args: [--load-plugins=pylint.extensions.redefined_loop_name, --fail-on=I0021]
     -   id: pylint
         alias: redefined-outer-name
         name: Redefining name from outer scope

diff --git a/Dockerfile b/Dockerfile
@@ -9,6 +9,6 @@ RUN apt-get install -y build-essential
 RUN apt-get install -y libhdf5-dev libgles2-mesa-dev
 
 RUN python -m pip install --upgrade pip
-RUN python -m pip install \
-    -r https://raw.githubusercontent.com/pandas-dev/pandas/main/requirements-dev.txt
+COPY requirements-dev.txt /tmp
+RUN python -m pip install -r /tmp/requirements-dev.txt
 CMD ["/bin/bash"]
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
diff --git a/ci/deps/actions-311-sanitizers.yaml b/ci/deps/actions-311-sanitizers.yaml
diff --git a/doc/source/_static/css/getting_started.css b/doc/source/_static/css/getting_started.css
@@ -248,6 +248,7 @@ ul.task-bullet > li > p:first-child {
 }
 
 .tutorial-card .card-header {
+  --bs-card-cap-color: var(--pst-color-text-base);
   cursor: pointer;
   background-color: var(--pst-color-surface);
   border: 1px solid var(--pst-color-border)
@@ -269,7 +270,7 @@ ul.task-bullet > li > p:first-child {
 
 
 .tutorial-card .gs-badge-link a {
-  color: var(--pst-color-text-base);
+  color: var(--pst-color-primary-text);
   text-decoration: none;
 }
 

diff --git a/doc/source/user_guide/missing_data.rst b/doc/source/user_guide/missing_data.rst
@@ -88,7 +88,7 @@ To detect these missing value, use the :func:`isna` or :func:`notna` methods.
 
 .. warning::
 
-   Experimental: the behaviour of :class:`NA`` can still change without warning.
+   Experimental: the behaviour of :class:`NA` can still change without warning.
 
 Starting from pandas 1.0, an experimental :class:`NA` value (singleton) is
 available to represent scalar missing values. The goal of :class:`NA` is provide a
@@ -105,7 +105,7 @@ dtype, it will use :class:`NA`:
     s[2]
     s[2] is pd.NA
 
-Currently, pandas does not yet use those data types using :class:`NA` by default
+Currently, pandas does not use those data types using :class:`NA` by default in
 a :class:`DataFrame` or :class:`Series`, so you need to specify
 the dtype explicitly. An easy way to convert to those dtypes is explained in the
 :ref:`conversion section <missing_data.NA.conversion>`.
@@ -253,8 +253,8 @@ Conversion
 ^^^^^^^^^^
 
 If you have a :class:`DataFrame` or :class:`Series` using ``np.nan``,
-:meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes`
-in :class:`DataFrame` that can convert data to use the data types that use :class:`NA`
+:meth:`DataFrame.convert_dtypes` and :meth:`Series.convert_dtypes`, respectively,
+will convert your data to use the nullable data types supporting :class:`NA`,
 such as :class:`Int64Dtype` or :class:`ArrowDtype`. This is especially helpful after reading
 in data sets from IO methods where data types were inferred.
 

diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -797,8 +797,6 @@ There are several time/date properties that one can access from ``Timestamp`` or
     timetz,"Returns datetime.time as local time with timezone information"
     dayofyear,"The ordinal day of year"
     day_of_year,"The ordinal day of year"
-    weekofyear,"The week ordinal of the year"
-    week,"The week ordinal of the year"
     dayofweek,"The number of the day of the week with Monday=0, Sunday=6"
     day_of_week,"The number of the day of the week with Monday=0, Sunday=6"
     weekday,"The number of the day of the week with Monday=0, Sunday=6"
@@ -812,6 +810,10 @@ There are several time/date properties that one can access from ``Timestamp`` or
     is_year_end,"Logical indicating if last day of year (defined by frequency)"
     is_leap_year,"Logical indicating if the date belongs to a leap year"
 
+.. note::
+
+   You can use ``DatetimeIndex.isocalendar().week`` to access week of year date information.
+
 Furthermore, if you have a ``Series`` with datetimelike values, then you can
 access these properties via the ``.dt`` accessor, as detailed in the section
 on :ref:`.dt accessors<basics.dt_accessors>`.

diff --git a/doc/source/whatsnew/v2.2.2.rst b/doc/source/whatsnew/v2.2.2.rst
@@ -14,14 +14,16 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pandas nullable on with missing values (:issue:`56702`)
+- :meth:`DataFrame.__dataframe__` was producing incorrect data buffers when the a column's type was a pyarrow nullable on with missing values (:issue:`57664`)
 -
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_222.bug_fixes:
 
 Bug fixes
 ~~~~~~~~~
--
+- :meth:`DataFrame.__dataframe__` was showing bytemask instead of bitmask for ``'string[pyarrow]'`` validity buffer (:issue:`57762`)
+- :meth:`DataFrame.__dataframe__` was showing non-null validity buffer (instead of ``None``) ``'string[pyarrow]'`` without missing values (:issue:`57761`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_222.other:

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -298,6 +298,7 @@ Bug fixes
 - Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
 - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
+- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
 
 Categorical
 ^^^^^^^^^^^
@@ -307,6 +308,7 @@ Categorical
 Datetimelike
 ^^^^^^^^^^^^
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
+- Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
 -
 
 Timedelta
@@ -357,6 +359,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
+- Now all ``Mapping`` s are pretty printed correctly. Before only literal ``dict`` s were. (:issue:`57915`)
 -
 -
 

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -180,6 +180,8 @@ def is_lexsorted(list_of_arrays: list) -> bool:
     n = len(list_of_arrays[0])
 
     cdef int64_t **vecs = <int64_t**>malloc(nlevels * sizeof(int64_t*))
+    if vecs is NULL:
+        raise MemoryError()
     for i in range(nlevels):
         arr = list_of_arrays[i]
         assert arr.dtype.name == "int64"

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -81,6 +81,8 @@ cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) noexcept n
             return NaN
 
         tmp = <float64_t*>malloc((n - na_count) * sizeof(float64_t))
+        if tmp is NULL:
+            raise MemoryError()
 
         j = 0
         for i in range(n):
@@ -118,6 +120,8 @@ cdef float64_t median_linear(float64_t* a, int n) noexcept nogil:
             return NaN
 
         tmp = <float64_t*>malloc((n - na_count) * sizeof(float64_t))
+        if tmp is NULL:
+            raise MemoryError()
 
         j = 0
         for i in range(n):

diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx
@@ -68,7 +68,11 @@ def hash_object_array(
 
     # create an array of bytes
     vecs = <char **>malloc(n * sizeof(char *))
+    if vecs is NULL:
+        raise MemoryError()
     lens = <uint64_t*>malloc(n * sizeof(uint64_t))
+    if lens is NULL:
+        raise MemoryError()
 
     for i in range(n):
         val = arr[i]

diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
@@ -174,7 +174,7 @@ cdef class StringHashTable(HashTable):
 
 cdef struct Int64VectorData:
     int64_t *data
-    Py_ssize_t n, m
+    Py_ssize_t size, capacity
 
 cdef class Vector:
     cdef bint external_view_exists