Skip to content

Commit

Permalink
fix(rust, python): implement ser/de for BinaryChunked (pola-rs#6543)
Browse files Browse the repository at this point in the history
  • Loading branch information
papparapa authored and cojmeister committed Jan 30, 2023
1 parent 7999a13 commit b9ab0f4
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 1 deletion.
5 changes: 4 additions & 1 deletion polars/polars-core/src/serde/chunked_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ impl_serialize!(Utf8Chunked);
impl_serialize!(BooleanChunked);
impl_serialize!(ListChunked);

#[cfg(feature = "dtype-binary")]
impl_serialize!(BinaryChunked);

#[cfg(feature = "dtype-categorical")]
impl Serialize for CategoricalChunked {
fn serialize<S>(
Expand All @@ -140,7 +143,7 @@ impl Serialize for CategoricalChunked {
}
}

#[cfg(feature = "dtype-categorical")]
#[cfg(feature = "dtype-struct")]
impl Serialize for StructChunked {
fn serialize<S>(
&self,
Expand Down
21 changes: 21 additions & 0 deletions polars/polars-core/src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum DeDataType<'a> {
Float32,
Float64,
Utf8,
Binary,
Date,
Datetime(TimeUnit, Option<TimeZone>),
Duration(TimeUnit),
Expand Down Expand Up @@ -49,6 +50,8 @@ impl From<&DataType> for DeDataType<'_> {
DataType::Boolean => DeDataType::Boolean,
DataType::Null => DeDataType::Null,
DataType::List(_) => DeDataType::List,
#[cfg(feature = "dtype-binary")]
DataType::Binary => DeDataType::Binary,
#[cfg(feature = "object")]
DataType::Object(s) => DeDataType::Object(s),
#[cfg(feature = "dtype-struct")]
Expand Down Expand Up @@ -128,6 +131,24 @@ mod test {
let out = serde_json::from_reader::<_, DataFrame>(json.as_bytes()).unwrap(); // uses `DeserializeOwned`
assert!(df.frame_equal_missing(&out));
}

#[test]
#[cfg(feature = "dtype-binary")]
fn test_serde_binary_series_owned_bincode() {
let s1 = Series::new(
"foo",
&[
vec![1u8, 2u8, 3u8],
vec![4u8, 5u8, 6u8, 7u8],
vec![8u8, 9u8],
],
);
let df = DataFrame::new(vec![s1]).unwrap();
let bytes = bincode::serialize(&df).unwrap();
let out = bincode::deserialize_from::<_, DataFrame>(bytes.as_slice()).unwrap();
assert!(df.frame_equal_missing(&out));
}

#[test]
#[cfg(feature = "dtype-struct")]
fn test_serde_struct_series_owned_json() {
Expand Down
10 changes: 10 additions & 0 deletions polars/polars-core/src/serde/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ impl Serialize for Series {
ca.serialize(serializer)
} else {
match self.dtype() {
#[cfg(feature = "dtype-binary")]
DataType::Binary => {
let ca = self.binary().unwrap();
ca.serialize(serializer)
}
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => {
let ca = self.struct_().unwrap();
Expand Down Expand Up @@ -201,6 +206,11 @@ impl<'de> Deserialize<'de> for Series {
let values: Vec<Option<Series>> = map.next_value()?;
Ok(Series::new(&name, values))
}
#[cfg(feature = "dtype-binary")]
DeDataType::Binary => {
let values: Vec<Option<Cow<[u8]>>> = map.next_value()?;
Ok(Series::new(&name, values))
}
#[cfg(feature = "dtype-struct")]
DeDataType::Struct => {
let values: Vec<Series> = map.next_value()?;
Expand Down
20 changes: 20 additions & 0 deletions py-polars/tests/unit/test_serde.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,23 @@ def test_serde_duration() -> None:
def test_serde_expression_5461() -> None:
e = pl.col("a").sqrt() / pl.col("b").alias("c")
assert pickle.loads(pickle.dumps(e)).meta == e.meta


def test_serde_binary() -> None:
data = pl.Series(
"binary_data",
[
b"\xba\x9b\xca\xd3y\xcb\xc9#",
b"9\x04\xab\xe2\x11\xf3\x85",
b"\xb8\xcb\xc9^\\\xa9-\x94\xe0H\x9d ",
b"S\xbc:\xcb\xf0\xf5r\xfe\x18\xfeH",
b",\xf5)y\x00\xe5\xf7",
b"\xfd\xf6\xf1\xc2X\x0cn\xb9#",
b"\x06\xef\xa6\xa2\xb7",
b"@\xff\x95\xda\xff\xd2\x18",
],
)
assert_series_equal(
data,
pickle.loads(pickle.dumps(data)),
)

0 comments on commit b9ab0f4

Please sign in to comment.