Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework error messages for deserializers #228

Merged
merged 10 commits into from
Sep 10, 2024
215 changes: 136 additions & 79 deletions serde_arrow/src/internal/deserialization/array_deserializer.rs

Large diffs are not rendered by default.

78 changes: 50 additions & 28 deletions serde_arrow/src/internal/deserialization/binary_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use serde::de::{SeqAccess, Visitor};

use crate::internal::{
arrow::BytesArrayView,
error::{fail, Error, Result},
utils::{Mut, Offset},
error::{fail, set_default, try_, Context, ContextSupport, Error, Result},
utils::{Mut, NamedType, Offset},
};

use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set};

pub struct BinaryDeserializer<'a, O: Offset> {
pub path: String,
pub view: BytesArrayView<'a, O>,
pub next: (usize, usize),
}

impl<'a, O: Offset> BinaryDeserializer<'a, O> {
pub fn new(view: BytesArrayView<'a, O>) -> Self {
Self { view, next: (0, 0) }
pub fn new(path: String, view: BytesArrayView<'a, O>) -> Self {
Self {
path,
view,
next: (0, 0),
}
}

pub fn peek_next(&self) -> Result<bool> {
if self.next.0 + 1 >= self.view.offsets.len() {
fail!("Exhausted ListDeserializer")
fail!("Exhausted deserializer")
}
if let Some(validity) = &self.view.validity {
bitset_is_set(validity, self.next.0)
Expand All @@ -36,7 +41,7 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> {
pub fn peek_next_slice_range(&self) -> Result<(usize, usize)> {
let (item, _) = self.next;
if item + 1 >= self.view.offsets.len() {
fail!("called next_slices on exhausted BinaryDeserializer");
fail!("Exhausted deserializer");
}
let end = self.view.offsets[item + 1].try_into_usize()?;
let start = self.view.offsets[item].try_into_usize()?;
Expand All @@ -51,39 +56,56 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> {
}
}

impl<'a, O: Offset> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> {
fn name() -> &'static str {
"BinaryDeserializer"
impl<'a, O: Offset + NamedType> Context for BinaryDeserializer<'a, O> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(
annotations,
"data_type",
match O::NAME {
"i32" => "Binary",
"i64" => "LargeBinary",
_ => "<unknown>",
},
);
}
}

impl<'a, O: Offset + NamedType> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> {
fn deserialize_any<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
self.deserialize_bytes(visitor)
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next().ctx(self)? {
self.deserialize_bytes(visitor).ctx(self)
} else {
self.consume_next();
visitor.visit_none::<Error>().ctx(self)
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next().ctx(self)? {
visitor.visit_some(Mut(self)).ctx(self)
} else {
self.consume_next();
visitor.visit_none::<Error>().ctx(self)
}
})
.ctx(self)
}

fn deserialize_seq<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_seq(self)
try_(|| visitor.visit_seq(&mut *self)).ctx(self)
}

fn deserialize_bytes<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_borrowed_bytes(self.next_slice()?)
try_(|| visitor.visit_borrowed_bytes::<Error>(self.next_slice()?)).ctx(self)
}

fn deserialize_byte_buf<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_borrowed_bytes(self.next_slice()?)
try_(|| visitor.visit_borrowed_bytes::<Error>(self.next_slice()?)).ctx(self)
}
}

Expand Down Expand Up @@ -111,11 +133,11 @@ impl<'de, O: Offset> SeqAccess<'de> for BinaryDeserializer<'de, O> {

struct U8Deserializer(u8);

impl<'de> SimpleDeserializer<'de> for U8Deserializer {
fn name() -> &'static str {
"U8Deserializer"
}
impl Context for U8Deserializer {
fn annotate(&self, _: &mut std::collections::BTreeMap<String, String>) {}
}

impl<'de> SimpleDeserializer<'de> for U8Deserializer {
fn deserialize_u8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u8(self.0)
}
Expand Down
72 changes: 43 additions & 29 deletions serde_arrow/src/internal/deserialization/bool_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use serde::de::Visitor;

use crate::internal::{
arrow::BooleanArrayView,
error::{fail, Result},
error::{fail, set_default, try_, Context, ContextSupport, Error, Result},
utils::Mut,
};

use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set};

pub struct BoolDeserializer<'a> {
pub path: String,
pub view: BooleanArrayView<'a>,
pub next: usize,
}

impl<'a> BoolDeserializer<'a> {
pub fn new(view: BooleanArrayView<'a>) -> Self {
Self { view, next: 0 }
pub fn new(path: String, view: BooleanArrayView<'a>) -> Self {
Self {
path,
view,
next: 0,
}
}

fn next(&mut self) -> Result<Option<bool>> {
if self.next >= self.view.len {
fail!("Exhausted BoolDeserializer");
fail!("Exhausted deserializer");
}
if let Some(validty) = &self.view.validity {
if !bitset_is_set(validty, self.next)? {
Expand All @@ -44,7 +49,7 @@ impl<'a> BoolDeserializer<'a> {

fn peek_next(&self) -> Result<bool> {
if self.next >= self.view.len {
fail!("Exhausted BoolDeserializer");
fail!("Exhausted deserializer");
} else if let Some(validity) = &self.view.validity {
bitset_is_set(validity, self.next)
} else {
Expand All @@ -57,62 +62,71 @@ impl<'a> BoolDeserializer<'a> {
}
}

impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> {
fn name() -> &'static str {
"BoolDeserializer"
impl<'de> Context for BoolDeserializer<'de> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(annotations, "data_type", "Boolean");
}
}

impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> {
fn deserialize_any<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
self.deserialize_bool(visitor)
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next()? {
self.deserialize_bool(visitor)
} else {
self.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_bool<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_bool(self.next_required()?)
try_(|| visitor.visit_bool::<Error>(self.next_required()?)).ctx(self)
}

fn deserialize_u8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u8(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u8::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u16<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u16(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u16::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u32(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u32::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u64<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u64(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u64::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i8(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i8::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i16<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i16(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i16::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i32(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i32::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i64<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i64(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i64::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}
}
68 changes: 45 additions & 23 deletions serde_arrow/src/internal/deserialization/date32_deserializer.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
use chrono::{Duration, NaiveDate, NaiveDateTime};
use serde::de::Visitor;

use crate::internal::{arrow::BitsWithOffset, error::Result, utils::Mut};
use crate::internal::{
arrow::BitsWithOffset,
error::{set_default, try_, Context, ContextSupport, Error, Result},
utils::Mut,
};

use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator};

pub struct Date32Deserializer<'a>(ArrayBufferIterator<'a, i32>);
pub struct Date32Deserializer<'a> {
path: String,
array: ArrayBufferIterator<'a, i32>,
}

impl<'a> Date32Deserializer<'a> {
pub fn new(buffer: &'a [i32], validity: Option<BitsWithOffset<'a>>) -> Self {
Self(ArrayBufferIterator::new(buffer, validity))
pub fn new(path: String, buffer: &'a [i32], validity: Option<BitsWithOffset<'a>>) -> Self {
Self {
path,
array: ArrayBufferIterator::new(buffer, validity),
}
}

pub fn get_string_repr(&self, ts: i32) -> Result<String> {
Expand All @@ -21,39 +31,51 @@ impl<'a> Date32Deserializer<'a> {
}
}

impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> {
fn name() -> &'static str {
"Date32Deserializer"
impl<'de> Context for Date32Deserializer<'de> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(annotations, "data_type", "Date32");
}
}

impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> {
fn deserialize_any<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.0.peek_next()? {
self.deserialize_i32(visitor)
} else {
self.0.consume_next();
visitor.visit_none()
}
try_(|| {
if self.array.peek_next()? {
self.deserialize_i32(visitor)
} else {
self.array.consume_next();
visitor.visit_none()
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.0.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.0.consume_next();
visitor.visit_none()
}
try_(|| {
if self.array.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.array.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_i32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i32(self.0.next_required()?)
try_(|| visitor.visit_i32(self.array.next_required()?)).ctx(self)
}

fn deserialize_str<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
self.deserialize_string(visitor)
try_(|| self.deserialize_string(visitor)).ctx(self)
}

fn deserialize_string<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
let ts = self.0.next_required()?;
visitor.visit_string(self.get_string_repr(ts)?)
try_(|| {
let ts = self.array.next_required()?;
visitor.visit_string(self.get_string_repr(ts)?)
})
.ctx(self)
}
}
Loading