Skip to content

Commit

Permalink
Merge pull request #228 from chmp/feature/37-error-messages-deserializer
Browse files Browse the repository at this point in the history
Rework error messages for deserializers
  • Loading branch information
chmp authored Sep 10, 2024
2 parents 16866ac + 8f8e717 commit 095ed90
Show file tree
Hide file tree
Showing 49 changed files with 1,535 additions and 968 deletions.
215 changes: 136 additions & 79 deletions serde_arrow/src/internal/deserialization/array_deserializer.rs

Large diffs are not rendered by default.

78 changes: 50 additions & 28 deletions serde_arrow/src/internal/deserialization/binary_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use serde::de::{SeqAccess, Visitor};

use crate::internal::{
arrow::BytesArrayView,
error::{fail, Error, Result},
utils::{Mut, Offset},
error::{fail, set_default, try_, Context, ContextSupport, Error, Result},
utils::{Mut, NamedType, Offset},
};

use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set};

pub struct BinaryDeserializer<'a, O: Offset> {
pub path: String,
pub view: BytesArrayView<'a, O>,
pub next: (usize, usize),
}

impl<'a, O: Offset> BinaryDeserializer<'a, O> {
pub fn new(view: BytesArrayView<'a, O>) -> Self {
Self { view, next: (0, 0) }
pub fn new(path: String, view: BytesArrayView<'a, O>) -> Self {
Self {
path,
view,
next: (0, 0),
}
}

pub fn peek_next(&self) -> Result<bool> {
if self.next.0 + 1 >= self.view.offsets.len() {
fail!("Exhausted ListDeserializer")
fail!("Exhausted deserializer")
}
if let Some(validity) = &self.view.validity {
bitset_is_set(validity, self.next.0)
Expand All @@ -36,7 +41,7 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> {
pub fn peek_next_slice_range(&self) -> Result<(usize, usize)> {
let (item, _) = self.next;
if item + 1 >= self.view.offsets.len() {
fail!("called next_slices on exhausted BinaryDeserializer");
fail!("Exhausted deserializer");
}
let end = self.view.offsets[item + 1].try_into_usize()?;
let start = self.view.offsets[item].try_into_usize()?;
Expand All @@ -51,39 +56,56 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> {
}
}

impl<'a, O: Offset> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> {
fn name() -> &'static str {
"BinaryDeserializer"
impl<'a, O: Offset + NamedType> Context for BinaryDeserializer<'a, O> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(
annotations,
"data_type",
match O::NAME {
"i32" => "Binary",
"i64" => "LargeBinary",
_ => "<unknown>",
},
);
}
}

impl<'a, O: Offset + NamedType> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> {
fn deserialize_any<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
self.deserialize_bytes(visitor)
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next().ctx(self)? {
self.deserialize_bytes(visitor).ctx(self)
} else {
self.consume_next();
visitor.visit_none::<Error>().ctx(self)
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next().ctx(self)? {
visitor.visit_some(Mut(self)).ctx(self)
} else {
self.consume_next();
visitor.visit_none::<Error>().ctx(self)
}
})
.ctx(self)
}

fn deserialize_seq<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_seq(self)
try_(|| visitor.visit_seq(&mut *self)).ctx(self)
}

fn deserialize_bytes<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_borrowed_bytes(self.next_slice()?)
try_(|| visitor.visit_borrowed_bytes::<Error>(self.next_slice()?)).ctx(self)
}

fn deserialize_byte_buf<V: Visitor<'a>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_borrowed_bytes(self.next_slice()?)
try_(|| visitor.visit_borrowed_bytes::<Error>(self.next_slice()?)).ctx(self)
}
}

Expand Down Expand Up @@ -111,11 +133,11 @@ impl<'de, O: Offset> SeqAccess<'de> for BinaryDeserializer<'de, O> {

struct U8Deserializer(u8);

impl<'de> SimpleDeserializer<'de> for U8Deserializer {
fn name() -> &'static str {
"U8Deserializer"
}
impl Context for U8Deserializer {
fn annotate(&self, _: &mut std::collections::BTreeMap<String, String>) {}
}

impl<'de> SimpleDeserializer<'de> for U8Deserializer {
fn deserialize_u8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u8(self.0)
}
Expand Down
72 changes: 43 additions & 29 deletions serde_arrow/src/internal/deserialization/bool_deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,30 @@ use serde::de::Visitor;

use crate::internal::{
arrow::BooleanArrayView,
error::{fail, Result},
error::{fail, set_default, try_, Context, ContextSupport, Error, Result},
utils::Mut,
};

use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set};

pub struct BoolDeserializer<'a> {
pub path: String,
pub view: BooleanArrayView<'a>,
pub next: usize,
}

impl<'a> BoolDeserializer<'a> {
pub fn new(view: BooleanArrayView<'a>) -> Self {
Self { view, next: 0 }
pub fn new(path: String, view: BooleanArrayView<'a>) -> Self {
Self {
path,
view,
next: 0,
}
}

fn next(&mut self) -> Result<Option<bool>> {
if self.next >= self.view.len {
fail!("Exhausted BoolDeserializer");
fail!("Exhausted deserializer");
}
if let Some(validty) = &self.view.validity {
if !bitset_is_set(validty, self.next)? {
Expand All @@ -44,7 +49,7 @@ impl<'a> BoolDeserializer<'a> {

fn peek_next(&self) -> Result<bool> {
if self.next >= self.view.len {
fail!("Exhausted BoolDeserializer");
fail!("Exhausted deserializer");
} else if let Some(validity) = &self.view.validity {
bitset_is_set(validity, self.next)
} else {
Expand All @@ -57,62 +62,71 @@ impl<'a> BoolDeserializer<'a> {
}
}

impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> {
fn name() -> &'static str {
"BoolDeserializer"
impl<'de> Context for BoolDeserializer<'de> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(annotations, "data_type", "Boolean");
}
}

impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> {
fn deserialize_any<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
self.deserialize_bool(visitor)
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next()? {
self.deserialize_bool(visitor)
} else {
self.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none()
}
try_(|| {
if self.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_bool<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_bool(self.next_required()?)
try_(|| visitor.visit_bool::<Error>(self.next_required()?)).ctx(self)
}

fn deserialize_u8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u8(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u8::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u16<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u16(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u16::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u32(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u32::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_u64<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_u64(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_u64::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i8<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i8(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i8::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i16<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i16(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i16::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i32(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i32::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}

fn deserialize_i64<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i64(if self.next_required()? { 1 } else { 0 })
try_(|| visitor.visit_i64::<Error>(if self.next_required()? { 1 } else { 0 })).ctx(self)
}
}
68 changes: 45 additions & 23 deletions serde_arrow/src/internal/deserialization/date32_deserializer.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
use chrono::{Duration, NaiveDate, NaiveDateTime};
use serde::de::Visitor;

use crate::internal::{arrow::BitsWithOffset, error::Result, utils::Mut};
use crate::internal::{
arrow::BitsWithOffset,
error::{set_default, try_, Context, ContextSupport, Error, Result},
utils::Mut,
};

use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator};

pub struct Date32Deserializer<'a>(ArrayBufferIterator<'a, i32>);
pub struct Date32Deserializer<'a> {
path: String,
array: ArrayBufferIterator<'a, i32>,
}

impl<'a> Date32Deserializer<'a> {
pub fn new(buffer: &'a [i32], validity: Option<BitsWithOffset<'a>>) -> Self {
Self(ArrayBufferIterator::new(buffer, validity))
pub fn new(path: String, buffer: &'a [i32], validity: Option<BitsWithOffset<'a>>) -> Self {
Self {
path,
array: ArrayBufferIterator::new(buffer, validity),
}
}

pub fn get_string_repr(&self, ts: i32) -> Result<String> {
Expand All @@ -21,39 +31,51 @@ impl<'a> Date32Deserializer<'a> {
}
}

impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> {
fn name() -> &'static str {
"Date32Deserializer"
impl<'de> Context for Date32Deserializer<'de> {
fn annotate(&self, annotations: &mut std::collections::BTreeMap<String, String>) {
set_default(annotations, "field", &self.path);
set_default(annotations, "data_type", "Date32");
}
}

impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> {
fn deserialize_any<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.0.peek_next()? {
self.deserialize_i32(visitor)
} else {
self.0.consume_next();
visitor.visit_none()
}
try_(|| {
if self.array.peek_next()? {
self.deserialize_i32(visitor)
} else {
self.array.consume_next();
visitor.visit_none()
}
})
.ctx(self)
}

fn deserialize_option<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
if self.0.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.0.consume_next();
visitor.visit_none()
}
try_(|| {
if self.array.peek_next()? {
visitor.visit_some(Mut(self))
} else {
self.array.consume_next();
visitor.visit_none::<Error>()
}
})
.ctx(self)
}

fn deserialize_i32<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
visitor.visit_i32(self.0.next_required()?)
try_(|| visitor.visit_i32(self.array.next_required()?)).ctx(self)
}

fn deserialize_str<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
self.deserialize_string(visitor)
try_(|| self.deserialize_string(visitor)).ctx(self)
}

fn deserialize_string<V: Visitor<'de>>(&mut self, visitor: V) -> Result<V::Value> {
let ts = self.0.next_required()?;
visitor.visit_string(self.get_string_repr(ts)?)
try_(|| {
let ts = self.array.next_required()?;
visitor.visit_string(self.get_string_repr(ts)?)
})
.ctx(self)
}
}
Loading

0 comments on commit 095ed90

Please sign in to comment.