Skip to content

Commit

Permalink
apacheGH-40092: [Python] Support Binary/StringView conversion to nump…
Browse files Browse the repository at this point in the history
…y/pandas
  • Loading branch information
jorisvandenbossche committed Feb 15, 2024
1 parent a03d957 commit 16b993a
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
21 changes: 18 additions & 3 deletions python/pyarrow/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ struct WrapBytes<LargeStringType> {
}
};

template <>
struct WrapBytes<StringViewType> {
static inline PyObject* Wrap(const char* data, int64_t length) {
return PyUnicode_FromStringAndSize(data, length);
}
};

template <>
struct WrapBytes<BinaryType> {
static inline PyObject* Wrap(const char* data, int64_t length) {
Expand All @@ -147,6 +154,13 @@ struct WrapBytes<LargeBinaryType> {
}
};

template <>
struct WrapBytes<BinaryViewType> {
static inline PyObject* Wrap(const char* data, int64_t length) {
return PyBytes_FromStringAndSize(data, length);
}
};

template <>
struct WrapBytes<FixedSizeBinaryType> {
static inline PyObject* Wrap(const char* data, int64_t length) {
Expand Down Expand Up @@ -1154,7 +1168,7 @@ struct ObjectWriterVisitor {
}

template <typename Type>
enable_if_t<is_base_binary_type<Type>::value || is_fixed_size_binary_type<Type>::value,
enable_if_t<is_base_binary_type<Type>::value || is_binary_view_like_type<Type>::value || is_fixed_size_binary_type<Type>::value,
Status>
Visit(const Type& type) {
auto WrapValue = [](const std::string_view& view, PyObject** out) {
Expand Down Expand Up @@ -1355,8 +1369,7 @@ struct ObjectWriterVisitor {
std::is_same<ExtensionType, Type>::value ||
(std::is_base_of<IntervalType, Type>::value &&
!std::is_same<MonthDayNanoIntervalType, Type>::value) ||
std::is_base_of<UnionType, Type>::value ||
std::is_base_of<BinaryViewType, Type>::value,
std::is_base_of<UnionType, Type>::value,
Status>
Visit(const Type& type) {
return Status::NotImplemented("No implemented conversion to object dtype: ",
Expand Down Expand Up @@ -2086,8 +2099,10 @@ static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions&
break;
case Type::STRING: // fall through
case Type::LARGE_STRING: // fall through
case Type::STRING_VIEW: // fall through
case Type::BINARY: // fall through
case Type::LARGE_BINARY:
case Type::BINARY_VIEW:
case Type::NA: // fall through
case Type::FIXED_SIZE_BINARY: // fall through
case Type::STRUCT: // fall through
Expand Down
14 changes: 14 additions & 0 deletions python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,20 @@ def test_large_string(self):
_check_pandas_roundtrip(
df, schema=pa.schema([('a', pa.large_string())]))

def test_binary_view(self):
s = pd.Series([b'123', b'', b'a', None])
_check_series_roundtrip(s, type_=pa.binary_view())
df = pd.DataFrame({'a': s})
_check_pandas_roundtrip(
df, schema=pa.schema([('a', pa.binary_view())]))

def test_string_view(self):
s = pd.Series(['123', '', 'a', None])
_check_series_roundtrip(s, type_=pa.string_view())
df = pd.DataFrame({'a': s})
_check_pandas_roundtrip(
df, schema=pa.schema([('a', pa.string_view())]))

def test_table_empty_str(self):
values = ['', '', '', '', '']
df = pd.DataFrame({'strings': values})
Expand Down

0 comments on commit 16b993a

Please sign in to comment.