From 6f4f3756012f56efb26bf22a8b8548199edf0cf9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 6 May 2024 21:21:21 +0200 Subject: [PATCH] Fix pandas internals changes (#78) --- .github/workflows/ci.yml | 2 +- partd/pandas.py | 57 +++++++++++++++++++++++----------------- pyproject.toml | 9 +++---- 3 files changed, 38 insertions(+), 30 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d58bce..db8cdce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source diff --git a/partd/pandas.py b/partd/pandas.py index 5a25c26..880558c 100644 --- a/partd/pandas.py +++ b/partd/pandas.py @@ -1,38 +1,40 @@ from functools import partial import pickle -import numpy as np import pandas as pd -from pandas.core.internals import create_block_manager_from_blocks, make_block +from packaging.version import Version + +PANDAS_GE_210 = Version(pd.__version__).release >= (2, 1, 0) +PANDAS_GE_300 = Version(pd.__version__).major >= 3 + +if PANDAS_GE_300: + from pandas.api.internals import create_dataframe_from_blocks + create_block_manager_from_blocks = None + make_block = None +else: + create_dataframe_from_blocks = None + try: + from pandas.core.internals.managers import create_block_manager_from_blocks + except ImportError: + from pandas.core.internals import create_block_manager_from_blocks + + from pandas.core.internals import make_block from . import numpy as pnp from .core import Interface from .encode import Encode from .utils import extend, framesplit, frame +from pandas.api.types import is_extension_array_dtype +from pandas.api.extensions import ExtensionArray -try: - # pandas >= 0.24.0 - from pandas.api.types import is_extension_array_dtype -except ImportError: - def is_extension_array_dtype(dtype): - return False - -try: - # Some `ExtensionArray`s can have a `.dtype` which is not a `ExtensionDtype` - # (e.g. they can be backed by a NumPy dtype). For these cases we check - # whether the instance is a `ExtensionArray`. - # https://github.com/dask/partd/issues/48 - from pandas.api.extensions import ExtensionArray - def is_extension_array(x): - return isinstance(x, ExtensionArray) -except ImportError: - def is_extension_array(x): - return False +def is_extension_array(x): + return isinstance(x, ExtensionArray) dumps = partial(pickle.dumps, protocol=pickle.HIGHEST_PROTOCOL) + class PandasColumns(Interface): def __init__(self, partd=None): self.partd = pnp.Numpy(partd) @@ -148,7 +150,7 @@ def block_to_header_bytes(block): return header, bytes -def block_from_header_bytes(header, bytes): +def block_from_header_bytes(header, bytes, create_block: bool): placement, dtype, shape, (extension_type, extension_values) = header if extension_type == "other": @@ -164,7 +166,9 @@ def block_from_header_bytes(header, bytes): tz_info = extension_values[0] values = pd.DatetimeIndex(values).tz_localize('utc').tz_convert( tz_info) - return make_block(values, placement=placement) + if create_block: + return make_block(values, placement=placement) + return values, placement def serialize(df): @@ -193,9 +197,14 @@ def deserialize(bytes): bytes = frames[1:] axes = [index_from_header_bytes(headers[0], bytes[0]), index_from_header_bytes(headers[1], bytes[1])] - blocks = [block_from_header_bytes(h, b) + blocks = [block_from_header_bytes(h, b, create_block=not PANDAS_GE_300) for (h, b) in zip(headers[2:], bytes[2:])] - return pd.DataFrame(create_block_manager_from_blocks(blocks, axes)) + if PANDAS_GE_300: + return pd.api.internals.create_dataframe_from_blocks(blocks, axes[1], axes[0]) + elif PANDAS_GE_210: + return pd.DataFrame._from_mgr(create_block_manager_from_blocks(blocks, axes), axes=axes) + else: + return pd.DataFrame(create_block_manager_from_blocks(blocks, axes)) def join(dfs): diff --git a/pyproject.toml b/pyproject.toml index 051a159..1eca1aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,15 +10,14 @@ license = {text = "BSD"} keywords = [] classifiers = [ "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] readme = "README.rst" urls = {Homepage = "http://github.com/dask/partd/"} -requires-python = ">=3.7" +requires-python = ">=3.9" dynamic = ["version"] dependencies = [ "locket", @@ -27,8 +26,8 @@ dependencies = [ [project.optional-dependencies] complete = [ - "numpy >= 1.9.0", - "pandas >=0.19.0", + "numpy >= 1.20.0", + "pandas >=1.3", "pyzmq", "blosc", ]