From 6c05ae0e4c5e84cae38b1d5bfefb429ab6a38f00 Mon Sep 17 00:00:00 2001 From: Alexey Kudinkin Date: Fri, 1 Nov 2024 20:12:08 -0700 Subject: [PATCH 1/2] Make `DelegatingBlockBuilder` use `ArrowBlockBuilder` by default Signed-off-by: Alexey Kudinkin --- python/ray/data/_internal/delegating_block_builder.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/python/ray/data/_internal/delegating_block_builder.py b/python/ray/data/_internal/delegating_block_builder.py index ab33c1aa4b3a..59ee059cee64 100644 --- a/python/ray/data/_internal/delegating_block_builder.py +++ b/python/ray/data/_internal/delegating_block_builder.py @@ -23,17 +23,8 @@ def _inferred_block_type(self) -> Optional[BlockType]: def add(self, item: Mapping[str, Any]) -> None: assert isinstance(item, collections.abc.Mapping), item - import pyarrow - if self._builder is None: - try: - check = ArrowBlockBuilder() - check.add(item) - check.build() - self._builder = ArrowBlockBuilder() - except (TypeError, pyarrow.lib.ArrowInvalid, ArrowConversionError): - # Can also handle nested Python objects, which Arrow cannot. - self._builder = PandasBlockBuilder() + self._builder = ArrowBlockBuilder() self._builder.add(item) From 02f72d557e8428b6e0f55b1e032205d0994e75d9 Mon Sep 17 00:00:00 2001 From: Alexey Kudinkin Date: Fri, 1 Nov 2024 20:12:29 -0700 Subject: [PATCH 2/2] `lint` Signed-off-by: Alexey Kudinkin --- python/ray/data/_internal/delegating_block_builder.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/ray/data/_internal/delegating_block_builder.py b/python/ray/data/_internal/delegating_block_builder.py index 59ee059cee64..4655a8e24148 100644 --- a/python/ray/data/_internal/delegating_block_builder.py +++ b/python/ray/data/_internal/delegating_block_builder.py @@ -1,10 +1,8 @@ import collections from typing import Any, Mapping, Optional -from ray.air.util.tensor_extensions.arrow import ArrowConversionError from ray.data._internal.arrow_block import ArrowBlockBuilder from ray.data._internal.block_builder import BlockBuilder -from ray.data._internal.pandas_block import PandasBlockBuilder from ray.data.block import Block, BlockAccessor, BlockType, DataBatch