diff --git a/serde_arrow/src/internal/deserialization/array_deserializer.rs b/serde_arrow/src/internal/deserialization/array_deserializer.rs index 9ba76024..c423d105 100644 --- a/serde_arrow/src/internal/deserialization/array_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/array_deserializer.rs @@ -3,9 +3,9 @@ use serde::de::{Deserialize, DeserializeSeed, VariantAccess, Visitor}; use crate::internal::{ arrow::{ArrayView, FieldMeta, PrimitiveArrayView, TimeUnit}, - error::{fail, Error, Result}, + error::{fail, Context, Error, Result}, schema::{Strategy, STRATEGY_KEY}, - utils::Mut, + utils::{ChildName, Mut}, }; use super::{ @@ -70,90 +70,128 @@ pub enum ArrayDeserializer<'a> { } impl<'a> ArrayDeserializer<'a> { - pub fn new(strategy: Option<&Strategy>, array: ArrayView<'a>) -> Result { + pub fn new(path: String, strategy: Option<&Strategy>, array: ArrayView<'a>) -> Result { use {ArrayDeserializer as D, ArrayView as V}; match array { - ArrayView::Null(_) => Ok(Self::Null(NullDeserializer {})), - V::Boolean(view) => Ok(D::Bool(BoolDeserializer::new(view))), - V::Int8(view) => Ok(D::I8(IntegerDeserializer::new(view))), - V::Int16(view) => Ok(D::I16(IntegerDeserializer::new(view))), - V::Int32(view) => Ok(D::I32(IntegerDeserializer::new(view))), - V::Int64(view) => Ok(D::I64(IntegerDeserializer::new(view))), - V::UInt8(view) => Ok(D::U8(IntegerDeserializer::new(view))), - V::UInt16(view) => Ok(D::U16(IntegerDeserializer::new(view))), - V::UInt32(view) => Ok(D::U32(IntegerDeserializer::new(view))), - V::UInt64(view) => Ok(D::U64(IntegerDeserializer::new(view))), - V::Float16(view) => Ok(D::F16(FloatDeserializer::new(view))), - V::Float32(view) => Ok(D::F32(FloatDeserializer::new(view))), - V::Float64(view) => Ok(D::F64(FloatDeserializer::new(view))), - V::Decimal128(view) => Ok(D::Decimal128(DecimalDeserializer::new(view))), + ArrayView::Null(_) => Ok(Self::Null(NullDeserializer::new(path))), + V::Boolean(view) => Ok(D::Bool(BoolDeserializer::new(path, view))), + V::Int8(view) => Ok(D::I8(IntegerDeserializer::new(path, view))), + V::Int16(view) => Ok(D::I16(IntegerDeserializer::new(path, view))), + V::Int32(view) => Ok(D::I32(IntegerDeserializer::new(path, view))), + V::Int64(view) => Ok(D::I64(IntegerDeserializer::new(path, view))), + V::UInt8(view) => Ok(D::U8(IntegerDeserializer::new(path, view))), + V::UInt16(view) => Ok(D::U16(IntegerDeserializer::new(path, view))), + V::UInt32(view) => Ok(D::U32(IntegerDeserializer::new(path, view))), + V::UInt64(view) => Ok(D::U64(IntegerDeserializer::new(path, view))), + V::Float16(view) => Ok(D::F16(FloatDeserializer::new(path, view))), + V::Float32(view) => Ok(D::F32(FloatDeserializer::new(path, view))), + V::Float64(view) => Ok(D::F64(FloatDeserializer::new(path, view))), + V::Decimal128(view) => Ok(D::Decimal128(DecimalDeserializer::new(path, view))), ArrayView::Date32(view) => Ok(Self::Date32(Date32Deserializer::new( + path, view.values, view.validity, ))), ArrayView::Date64(view) => Ok(Self::Date64(Date64Deserializer::new( + path, view.values, view.validity, TimeUnit::Millisecond, is_utc_date64(strategy)?, ))), - V::Time32(view) => Ok(D::Time32(TimeDeserializer::new(view))), - V::Time64(view) => Ok(D::Time64(TimeDeserializer::new(view))), + V::Time32(view) => Ok(D::Time32(TimeDeserializer::new(path, view))), + V::Time64(view) => Ok(D::Time64(TimeDeserializer::new(path, view))), ArrayView::Timestamp(view) => match strategy { Some(Strategy::NaiveStrAsDate64 | Strategy::UtcStrAsDate64) => { Ok(Self::Date64(Date64Deserializer::new( + path, view.values, view.validity, view.unit, is_utc_timestamp(view.timezone.as_deref())?, ))) } - Some(strategy) => fail!("invalid strategy {strategy} for timestamp field"), + Some(strategy) => { + fail!("Invalid strategy: {strategy} is not supported for timestamp field") + } None => Ok(Self::Date64(Date64Deserializer::new( + path, view.values, view.validity, view.unit, is_utc_timestamp(view.timezone.as_deref())?, ))), }, - V::Duration(view) => Ok(D::I64(IntegerDeserializer::new(PrimitiveArrayView { - values: view.values, - validity: view.validity, - }))), - V::Utf8(view) => Ok(D::Utf8(StringDeserializer::new(view))), - V::LargeUtf8(view) => Ok(D::LargeUtf8(StringDeserializer::new(view))), - V::Binary(view) => Ok(D::Binary(BinaryDeserializer::new(view))), - V::LargeBinary(view) => Ok(D::LargeBinary(BinaryDeserializer::new(view))), - V::FixedSizeBinary(view) => { - Ok(D::FixedSizeBinary(FixedSizeBinaryDeserializer::new(view)?)) - } - V::List(view) => Ok(D::List(ListDeserializer::new( - ArrayDeserializer::new(get_strategy(&view.meta)?.as_ref(), *view.element)?, - view.offsets, - view.validity, - )?)), - V::LargeList(view) => Ok(D::LargeList(ListDeserializer::new( - ArrayDeserializer::new(get_strategy(&view.meta)?.as_ref(), *view.element)?, - view.offsets, - view.validity, - )?)), - V::FixedSizeList(view) => Ok(D::FixedSizeList(FixedSizeListDeserializer::new( - ArrayDeserializer::new(get_strategy(&view.meta)?.as_ref(), *view.element)?, - view.validity, - view.n.try_into()?, - view.len, + V::Duration(view) => Ok(D::I64(IntegerDeserializer::new( + path, + PrimitiveArrayView { + values: view.values, + validity: view.validity, + }, ))), + V::Utf8(view) => Ok(D::Utf8(StringDeserializer::new(path, view))), + V::LargeUtf8(view) => Ok(D::LargeUtf8(StringDeserializer::new(path, view))), + V::Binary(view) => Ok(D::Binary(BinaryDeserializer::new(path, view))), + V::LargeBinary(view) => Ok(D::LargeBinary(BinaryDeserializer::new(path, view))), + V::FixedSizeBinary(view) => Ok(D::FixedSizeBinary(FixedSizeBinaryDeserializer::new( + path, view, + )?)), + V::List(view) => { + let child_path = format!("{path}.{child}", child = ChildName(&view.meta.name)); + Ok(D::List(ListDeserializer::new( + path, + ArrayDeserializer::new( + child_path, + get_strategy(&view.meta)?.as_ref(), + *view.element, + )?, + view.offsets, + view.validity, + )?)) + } + V::LargeList(view) => { + let child_path = format!("{path}.{child}", child = ChildName(&view.meta.name)); + Ok(D::LargeList(ListDeserializer::new( + path, + ArrayDeserializer::new( + child_path, + get_strategy(&view.meta)?.as_ref(), + *view.element, + )?, + view.offsets, + view.validity, + )?)) + } + V::FixedSizeList(view) => { + let child_path = format!("{path}.{child}", child = ChildName(&view.meta.name)); + Ok(D::FixedSizeList(FixedSizeListDeserializer::new( + path, + ArrayDeserializer::new( + child_path, + get_strategy(&view.meta)?.as_ref(), + *view.element, + )?, + view.validity, + view.n.try_into()?, + view.len, + ))) + } V::Struct(view) => { let mut fields = Vec::new(); for (field_view, field_meta) in view.fields { - let field_deserializer = - ArrayDeserializer::new(get_strategy(&field_meta)?.as_ref(), field_view)?; + let child_path = format!("{path}.{child}", child = ChildName(&field_meta.name)); + let field_deserializer = ArrayDeserializer::new( + child_path, + get_strategy(&field_meta)?.as_ref(), + field_view, + )?; let field_name = field_meta.name; fields.push((field_name, field_deserializer)); } Ok(D::Struct(StructDeserializer::new( + path, fields, view.validity, view.len, @@ -161,17 +199,28 @@ impl<'a> ArrayDeserializer<'a> { } V::Map(view) => { let ArrayView::Struct(entries_view) = *view.element else { - fail!("invalid entries field in map array"); + fail!("Invalid entries field in map array"); }; let Ok(entries_fields) = <[_; 2]>::try_from(entries_view.fields) else { - fail!("invalid entries field in map array") + fail!("Invalid entries field in map array") }; let [(keys_view, keys_meta), (values_view, values_meta)] = entries_fields; - let keys = ArrayDeserializer::new(get_strategy(&keys_meta)?.as_ref(), keys_view)?; - let values = - ArrayDeserializer::new(get_strategy(&values_meta)?.as_ref(), values_view)?; + let keys_path = format!("{path}.{child}", child = ChildName(&keys_meta.name)); + let keys = ArrayDeserializer::new( + keys_path, + get_strategy(&keys_meta)?.as_ref(), + keys_view, + )?; + + let values_path = format!("{path}.{child}", child = ChildName(&values_meta.name)); + let values = ArrayDeserializer::new( + values_path, + get_strategy(&values_meta)?.as_ref(), + values_view, + )?; Ok(D::Map(MapDeserializer::new( + path, keys, values, view.offsets, @@ -180,68 +229,72 @@ impl<'a> ArrayDeserializer<'a> { } V::Dictionary(view) => match (*view.indices, *view.values) { (V::Int8(keys), V::Utf8(values)) => Ok(D::DictionaryI8I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int16(keys), V::Utf8(values)) => Ok(D::DictionaryI16I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int32(keys), V::Utf8(values)) => Ok(D::DictionaryI32I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int64(keys), V::Utf8(values)) => Ok(D::DictionaryI64I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt8(keys), V::Utf8(values)) => Ok(Self::DictionaryU8I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt16(keys), V::Utf8(values)) => Ok(D::DictionaryU16I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt32(keys), V::Utf8(values)) => Ok(D::DictionaryU32I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt64(keys), V::Utf8(values)) => Ok(D::DictionaryU64I32( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int8(keys), V::LargeUtf8(values)) => Ok(D::DictionaryI8I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int16(keys), V::LargeUtf8(values)) => Ok(D::DictionaryI16I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int32(keys), V::LargeUtf8(values)) => Ok(D::DictionaryI32I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::Int64(keys), V::LargeUtf8(values)) => Ok(D::DictionaryI64I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt8(keys), V::LargeUtf8(values)) => Ok(D::DictionaryU8I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt16(keys), V::LargeUtf8(values)) => Ok(D::DictionaryU16I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt32(keys), V::LargeUtf8(values)) => Ok(D::DictionaryU32I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), (V::UInt64(keys), V::LargeUtf8(values)) => Ok(D::DictionaryU64I64( - DictionaryDeserializer::new(keys, values)?, + DictionaryDeserializer::new(path, keys, values)?, )), - _ => fail!("unsupported dictionary array"), + _ => fail!("Unsupported dictionary array type"), }, ArrayView::DenseUnion(view) => { let mut fields = Vec::new(); for (idx, (type_id, field_view, field_meta)) in view.fields.into_iter().enumerate() { if usize::try_from(type_id) != Ok(idx) { - fail!("Only unions with consecutive type ids are currently supported in arrow2"); + fail!("Only unions with consecutive type ids are currently supported"); } - let field_deserializer = - ArrayDeserializer::new(get_strategy(&field_meta)?.as_ref(), field_view)?; + let child_path = format!("{path}.{child}", child = ChildName(&field_meta.name)); + let field_deserializer = ArrayDeserializer::new( + child_path, + get_strategy(&field_meta)?.as_ref(), + field_view, + )?; fields.push((field_meta.name, field_deserializer)) } - Ok(Self::Enum(EnumDeserializer::new(view.types, fields))) + Ok(Self::Enum(EnumDeserializer::new(path, view.types, fields))) } } } @@ -250,7 +303,7 @@ impl<'a> ArrayDeserializer<'a> { fn is_utc_timestamp(timezone: Option<&str>) -> Result { match timezone { Some(tz) if tz.to_lowercase() == "utc" => Ok(true), - Some(tz) => fail!("unsupported timezone {}", tz), + Some(tz) => fail!("Unsupported timezone: {} is not supported", tz), None => Ok(false), } } @@ -259,7 +312,9 @@ fn is_utc_date64(strategy: Option<&Strategy>) -> Result { match strategy { None | Some(Strategy::UtcStrAsDate64) => Ok(true), Some(Strategy::NaiveStrAsDate64) => Ok(false), - Some(strategy) => fail!("invalid strategy for date64 deserializer: {strategy}"), + Some(strategy) => { + fail!("Invalid strategy: {strategy} is not supported for date64 deserializer") + } } } @@ -322,11 +377,13 @@ macro_rules! dispatch { }; } -impl<'de> SimpleDeserializer<'de> for ArrayDeserializer<'de> { - fn name() -> &'static str { - "ArrayDeserializer" +impl<'de> Context for ArrayDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + dispatch!(self, ArrayDeserializer(deser) => deser.annotate(annotations)) } +} +impl<'de> SimpleDeserializer<'de> for ArrayDeserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { dispatch!(self, ArrayDeserializer(deser) => deser.deserialize_any(visitor)) } diff --git a/serde_arrow/src/internal/deserialization/binary_deserializer.rs b/serde_arrow/src/internal/deserialization/binary_deserializer.rs index 7c9ff1df..29e77a7d 100644 --- a/serde_arrow/src/internal/deserialization/binary_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/binary_deserializer.rs @@ -2,25 +2,30 @@ use serde::de::{SeqAccess, Visitor}; use crate::internal::{ arrow::BytesArrayView, - error::{fail, Error, Result}, - utils::{Mut, Offset}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, + utils::{Mut, NamedType, Offset}, }; use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set}; pub struct BinaryDeserializer<'a, O: Offset> { + pub path: String, pub view: BytesArrayView<'a, O>, pub next: (usize, usize), } impl<'a, O: Offset> BinaryDeserializer<'a, O> { - pub fn new(view: BytesArrayView<'a, O>) -> Self { - Self { view, next: (0, 0) } + pub fn new(path: String, view: BytesArrayView<'a, O>) -> Self { + Self { + path, + view, + next: (0, 0), + } } pub fn peek_next(&self) -> Result { if self.next.0 + 1 >= self.view.offsets.len() { - fail!("Exhausted ListDeserializer") + fail!("Exhausted deserializer") } if let Some(validity) = &self.view.validity { bitset_is_set(validity, self.next.0) @@ -36,7 +41,7 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> { pub fn peek_next_slice_range(&self) -> Result<(usize, usize)> { let (item, _) = self.next; if item + 1 >= self.view.offsets.len() { - fail!("called next_slices on exhausted BinaryDeserializer"); + fail!("Exhausted deserializer"); } let end = self.view.offsets[item + 1].try_into_usize()?; let start = self.view.offsets[item].try_into_usize()?; @@ -51,39 +56,56 @@ impl<'a, O: Offset> BinaryDeserializer<'a, O> { } } -impl<'a, O: Offset> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> { - fn name() -> &'static str { - "BinaryDeserializer" +impl<'a, O: Offset + NamedType> Context for BinaryDeserializer<'a, O> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match O::NAME { + "i32" => "Binary", + "i64" => "LargeBinary", + _ => "", + }, + ); } +} +impl<'a, O: Offset + NamedType> SimpleDeserializer<'a> for BinaryDeserializer<'a, O> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_bytes(visitor) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next().ctx(self)? { + self.deserialize_bytes(visitor).ctx(self) + } else { + self.consume_next(); + visitor.visit_none::().ctx(self) + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next().ctx(self)? { + visitor.visit_some(Mut(self)).ctx(self) + } else { + self.consume_next(); + visitor.visit_none::().ctx(self) + } + }) + .ctx(self) } fn deserialize_seq>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } fn deserialize_bytes>(&mut self, visitor: V) -> Result { - visitor.visit_borrowed_bytes(self.next_slice()?) + try_(|| visitor.visit_borrowed_bytes::(self.next_slice()?)).ctx(self) } fn deserialize_byte_buf>(&mut self, visitor: V) -> Result { - visitor.visit_borrowed_bytes(self.next_slice()?) + try_(|| visitor.visit_borrowed_bytes::(self.next_slice()?)).ctx(self) } } @@ -111,11 +133,11 @@ impl<'de, O: Offset> SeqAccess<'de> for BinaryDeserializer<'de, O> { struct U8Deserializer(u8); -impl<'de> SimpleDeserializer<'de> for U8Deserializer { - fn name() -> &'static str { - "U8Deserializer" - } +impl Context for U8Deserializer { + fn annotate(&self, _: &mut std::collections::BTreeMap) {} +} +impl<'de> SimpleDeserializer<'de> for U8Deserializer { fn deserialize_u8>(&mut self, visitor: V) -> Result { visitor.visit_u8(self.0) } diff --git a/serde_arrow/src/internal/deserialization/bool_deserializer.rs b/serde_arrow/src/internal/deserialization/bool_deserializer.rs index 4e7fd87d..b91d3b8e 100644 --- a/serde_arrow/src/internal/deserialization/bool_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/bool_deserializer.rs @@ -2,25 +2,30 @@ use serde::de::Visitor; use crate::internal::{ arrow::BooleanArrayView, - error::{fail, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set}; pub struct BoolDeserializer<'a> { + pub path: String, pub view: BooleanArrayView<'a>, pub next: usize, } impl<'a> BoolDeserializer<'a> { - pub fn new(view: BooleanArrayView<'a>) -> Self { - Self { view, next: 0 } + pub fn new(path: String, view: BooleanArrayView<'a>) -> Self { + Self { + path, + view, + next: 0, + } } fn next(&mut self) -> Result> { if self.next >= self.view.len { - fail!("Exhausted BoolDeserializer"); + fail!("Exhausted deserializer"); } if let Some(validty) = &self.view.validity { if !bitset_is_set(validty, self.next)? { @@ -44,7 +49,7 @@ impl<'a> BoolDeserializer<'a> { fn peek_next(&self) -> Result { if self.next >= self.view.len { - fail!("Exhausted BoolDeserializer"); + fail!("Exhausted deserializer"); } else if let Some(validity) = &self.view.validity { bitset_is_set(validity, self.next) } else { @@ -57,62 +62,71 @@ impl<'a> BoolDeserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> { - fn name() -> &'static str { - "BoolDeserializer" +impl<'de> Context for BoolDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Boolean"); } +} +impl<'de> SimpleDeserializer<'de> for BoolDeserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_bool(visitor) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + self.deserialize_bool(visitor) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_bool>(&mut self, visitor: V) -> Result { - visitor.visit_bool(self.next_required()?) + try_(|| visitor.visit_bool::(self.next_required()?)).ctx(self) } fn deserialize_u8>(&mut self, visitor: V) -> Result { - visitor.visit_u8(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_u8::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_u16>(&mut self, visitor: V) -> Result { - visitor.visit_u16(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_u16::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_u32>(&mut self, visitor: V) -> Result { - visitor.visit_u32(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_u32::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_u64>(&mut self, visitor: V) -> Result { - visitor.visit_u64(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_u64::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_i8>(&mut self, visitor: V) -> Result { - visitor.visit_i8(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_i8::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_i16>(&mut self, visitor: V) -> Result { - visitor.visit_i16(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_i16::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_i32>(&mut self, visitor: V) -> Result { - visitor.visit_i32(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_i32::(if self.next_required()? { 1 } else { 0 })).ctx(self) } fn deserialize_i64>(&mut self, visitor: V) -> Result { - visitor.visit_i64(if self.next_required()? { 1 } else { 0 }) + try_(|| visitor.visit_i64::(if self.next_required()? { 1 } else { 0 })).ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/date32_deserializer.rs b/serde_arrow/src/internal/deserialization/date32_deserializer.rs index fca2d52f..f37f0b00 100644 --- a/serde_arrow/src/internal/deserialization/date32_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/date32_deserializer.rs @@ -1,15 +1,25 @@ use chrono::{Duration, NaiveDate, NaiveDateTime}; use serde::de::Visitor; -use crate::internal::{arrow::BitsWithOffset, error::Result, utils::Mut}; +use crate::internal::{ + arrow::BitsWithOffset, + error::{set_default, try_, Context, ContextSupport, Error, Result}, + utils::Mut, +}; use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator}; -pub struct Date32Deserializer<'a>(ArrayBufferIterator<'a, i32>); +pub struct Date32Deserializer<'a> { + path: String, + array: ArrayBufferIterator<'a, i32>, +} impl<'a> Date32Deserializer<'a> { - pub fn new(buffer: &'a [i32], validity: Option>) -> Self { - Self(ArrayBufferIterator::new(buffer, validity)) + pub fn new(path: String, buffer: &'a [i32], validity: Option>) -> Self { + Self { + path, + array: ArrayBufferIterator::new(buffer, validity), + } } pub fn get_string_repr(&self, ts: i32) -> Result { @@ -21,39 +31,51 @@ impl<'a> Date32Deserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> { - fn name() -> &'static str { - "Date32Deserializer" +impl<'de> Context for Date32Deserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Date32"); } +} +impl<'de> SimpleDeserializer<'de> for Date32Deserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - self.deserialize_i32(visitor) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + self.deserialize_i32(visitor) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.array.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_i32>(&mut self, visitor: V) -> Result { - visitor.visit_i32(self.0.next_required()?) + try_(|| visitor.visit_i32(self.array.next_required()?)).ctx(self) } fn deserialize_str>(&mut self, visitor: V) -> Result { - self.deserialize_string(visitor) + try_(|| self.deserialize_string(visitor)).ctx(self) } fn deserialize_string>(&mut self, visitor: V) -> Result { - let ts = self.0.next_required()?; - visitor.visit_string(self.get_string_repr(ts)?) + try_(|| { + let ts = self.array.next_required()?; + visitor.visit_string(self.get_string_repr(ts)?) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/date64_deserializer.rs b/serde_arrow/src/internal/deserialization/date64_deserializer.rs index d6dd2be2..b6bc2601 100644 --- a/serde_arrow/src/internal/deserialization/date64_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/date64_deserializer.rs @@ -3,26 +3,37 @@ use serde::de::Visitor; use crate::internal::{ arrow::{BitsWithOffset, TimeUnit}, - error::{fail, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::Mut, }; use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator}; -pub struct Date64Deserializer<'a>(ArrayBufferIterator<'a, i64>, TimeUnit, bool); +pub struct Date64Deserializer<'a> { + path: String, + array: ArrayBufferIterator<'a, i64>, + unit: TimeUnit, + is_utc: bool, +} impl<'a> Date64Deserializer<'a> { pub fn new( + path: String, buffer: &'a [i64], validity: Option>, unit: TimeUnit, is_utc: bool, ) -> Self { - Self(ArrayBufferIterator::new(buffer, validity), unit, is_utc) + Self { + path, + array: ArrayBufferIterator::new(buffer, validity), + unit, + is_utc, + } } pub fn get_string_repr(&self, ts: i64) -> Result { - let Some(date_time) = (match self.1 { + let Some(date_time) = (match self.unit { TimeUnit::Second => DateTime::from_timestamp(ts, 0), TimeUnit::Millisecond => DateTime::from_timestamp_millis(ts), TimeUnit::Microsecond => DateTime::from_timestamp_micros(ts), @@ -31,7 +42,7 @@ impl<'a> Date64Deserializer<'a> { fail!("Unsupported timestamp value: {ts}"); }; - if self.2 { + if self.is_utc { // NOTE: chrono documents that Debug, not Display, can be parsed Ok(format!("{:?}", date_time)) } else { @@ -41,39 +52,51 @@ impl<'a> Date64Deserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for Date64Deserializer<'de> { - fn name() -> &'static str { - "Date64Deserializer" +impl<'de> Context for Date64Deserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Date64"); } +} +impl<'de> SimpleDeserializer<'de> for Date64Deserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - self.deserialize_i64(visitor) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + self.deserialize_i64(visitor) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_i64>(&mut self, visitor: V) -> Result { - visitor.visit_i64(self.0.next_required()?) + try_(|| visitor.visit_i64(self.array.next_required()?)).ctx(self) } fn deserialize_str>(&mut self, visitor: V) -> Result { - self.deserialize_string(visitor) + try_(|| self.deserialize_string(visitor)).ctx(self) } fn deserialize_string>(&mut self, visitor: V) -> Result { - let ts = self.0.next_required()?; - visitor.visit_string(self.get_string_repr(ts)?) + try_(|| { + let ts = self.array.next_required()?; + visitor.visit_string(self.get_string_repr(ts)?) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/decimal_deserializer.rs b/serde_arrow/src/internal/deserialization/decimal_deserializer.rs index 11b62a45..b58f5213 100644 --- a/serde_arrow/src/internal/deserialization/decimal_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/decimal_deserializer.rs @@ -2,54 +2,68 @@ use serde::de::Visitor; use crate::internal::{ arrow::DecimalArrayView, - error::Result, + error::{set_default, try_, Context, ContextSupport, Result}, utils::{decimal, Mut}, }; use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator}; pub struct DecimalDeserializer<'a> { + path: String, inner: ArrayBufferIterator<'a, i128>, scale: i8, } impl<'a> DecimalDeserializer<'a> { - pub fn new(view: DecimalArrayView<'a, i128>) -> Self { + pub fn new(path: String, view: DecimalArrayView<'a, i128>) -> Self { Self { + path, inner: ArrayBufferIterator::new(view.values, view.validity), scale: view.scale, } } } -impl<'de> SimpleDeserializer<'de> for DecimalDeserializer<'de> { - fn name() -> &'static str { - "DecimalDeserializer" +impl<'de> Context for DecimalDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Decimal128(..)"); } +} +impl<'de> SimpleDeserializer<'de> for DecimalDeserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.inner.peek_next()? { - self.deserialize_str(visitor) - } else { - self.inner.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.inner.peek_next()? { + self.deserialize_str(visitor) + } else { + self.inner.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.inner.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.inner.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.inner.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.inner.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_str>(&mut self, visitor: V) -> Result { - let val = self.inner.next_required()?; - let mut buffer = [0; decimal::BUFFER_SIZE_I128]; - let formatted = decimal::format_decimal(&mut buffer, val, self.scale); + try_(|| { + let val = self.inner.next_required()?; + let mut buffer = [0; decimal::BUFFER_SIZE_I128]; + let formatted = decimal::format_decimal(&mut buffer, val, self.scale); - visitor.visit_str(formatted) + visitor.visit_str(formatted) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/dictionary_deserializer.rs b/serde_arrow/src/internal/deserialization/dictionary_deserializer.rs index ef9bb397..84d86338 100644 --- a/serde_arrow/src/internal/deserialization/dictionary_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/dictionary_deserializer.rs @@ -2,7 +2,7 @@ use serde::de::Visitor; use crate::internal::{ arrow::{BytesArrayView, PrimitiveArrayView}, - error::{fail, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::{Mut, Offset}, }; @@ -12,18 +12,24 @@ use super::{ }; pub struct DictionaryDeserializer<'a, K: Integer, V: Offset> { + path: String, keys: ArrayBufferIterator<'a, K>, offsets: &'a [V], data: &'a [u8], } impl<'a, K: Integer, V: Offset> DictionaryDeserializer<'a, K, V> { - pub fn new(keys: PrimitiveArrayView<'a, K>, values: BytesArrayView<'a, V>) -> Result { + pub fn new( + path: String, + keys: PrimitiveArrayView<'a, K>, + values: BytesArrayView<'a, V>, + ) -> Result { if values.validity.is_some() { // TODO: check whether all values are defined? - fail!("dictionaries with nullable values are not supported"); + fail!("Null for non-nullable type: dictionaries do not support nullable values"); } Ok(Self { + path, keys: ArrayBufferIterator::new(keys.values, keys.validity), offsets: values.offsets, data: values.data, @@ -33,12 +39,12 @@ impl<'a, K: Integer, V: Offset> DictionaryDeserializer<'a, K, V> { pub fn next_str(&mut self) -> Result<&str> { let k: usize = self.keys.next_required()?.into_u64()?.try_into()?; let Some(start) = self.offsets.get(k) else { - fail!("invalid index"); + fail!("Invalid index"); }; let start = start.try_into_usize()?; let Some(end) = self.offsets.get(k + 1) else { - fail!("invalid index"); + fail!("Invalid index"); }; let end = end.try_into_usize()?; @@ -47,35 +53,44 @@ impl<'a, K: Integer, V: Offset> DictionaryDeserializer<'a, K, V> { } } -impl<'de, K: Integer, V: Offset> SimpleDeserializer<'de> for DictionaryDeserializer<'de, K, V> { - fn name() -> &'static str { - "DictionaryDeserializer" +impl<'de, K: Integer, V: Offset> Context for DictionaryDeserializer<'de, K, V> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Dictionary(..)"); } +} +impl<'de, K: Integer, V: Offset> SimpleDeserializer<'de> for DictionaryDeserializer<'de, K, V> { fn deserialize_any>(&mut self, visitor: VV) -> Result { - if self.keys.peek_next()? { - self.deserialize_str(visitor) - } else { - self.keys.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.keys.peek_next()? { + self.deserialize_str(visitor) + } else { + self.keys.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: VV) -> Result { - if self.keys.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.keys.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.keys.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.keys.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_str>(&mut self, visitor: VV) -> Result { - visitor.visit_str(self.next_str()?) + try_(|| visitor.visit_str(self.next_str()?)).ctx(self) } fn deserialize_string>(&mut self, visitor: VV) -> Result { - visitor.visit_string(self.next_str()?.to_owned()) + try_(|| visitor.visit_string(self.next_str()?.to_owned())).ctx(self) } fn deserialize_enum>( @@ -84,7 +99,10 @@ impl<'de, K: Integer, V: Offset> SimpleDeserializer<'de> for DictionaryDeseriali _: &'static [&'static str], visitor: VV, ) -> Result { - let variant = self.next_str()?; - visitor.visit_enum(EnumAccess(variant)) + try_(|| { + let variant = self.next_str()?; + visitor.visit_enum(EnumAccess(variant)) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/enum_deserializer.rs b/serde_arrow/src/internal/deserialization/enum_deserializer.rs index 8a322d89..6ad2fbfa 100644 --- a/serde_arrow/src/internal/deserialization/enum_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/enum_deserializer.rs @@ -1,21 +1,29 @@ +use std::collections::BTreeMap; + use serde::de::{DeserializeSeed, Deserializer, EnumAccess, Visitor}; use crate::internal::{ - error::{fail, Error, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; use super::{array_deserializer::ArrayDeserializer, simple_deserializer::SimpleDeserializer}; pub struct EnumDeserializer<'a> { + pub path: String, pub type_ids: &'a [i8], pub variants: Vec<(String, ArrayDeserializer<'a>)>, pub next: usize, } impl<'a> EnumDeserializer<'a> { - pub fn new(type_ids: &'a [i8], variants: Vec<(String, ArrayDeserializer<'a>)>) -> Self { + pub fn new( + path: String, + type_ids: &'a [i8], + variants: Vec<(String, ArrayDeserializer<'a>)>, + ) -> Self { Self { + path, type_ids, variants, next: 0, @@ -23,18 +31,24 @@ impl<'a> EnumDeserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for EnumDeserializer<'de> { - fn name() -> &'static str { - "EnumDeserializer" +impl<'de> Context for EnumDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Union(..)"); } +} +impl<'de> SimpleDeserializer<'de> for EnumDeserializer<'de> { fn deserialize_enum>( &mut self, _: &'static str, _: &'static [&'static str], visitor: V, ) -> Result { - visitor.visit_enum(self) + let mut ctx = BTreeMap::new(); + self.annotate(&mut ctx); + + try_(|| visitor.visit_enum(self)).ctx(&ctx) } } @@ -44,7 +58,7 @@ impl<'a, 'de> EnumAccess<'de> for &'a mut EnumDeserializer<'de> { fn variant_seed>(self, seed: V) -> Result<(V::Value, Self::Variant)> { if self.next >= self.type_ids.len() { - fail!("Exhausted EnumDeserializer"); + fail!("Exhausted deserializer"); } let type_id = self.type_ids[self.next]; self.next += 1; @@ -65,7 +79,7 @@ struct VariantIdDeserializer<'a> { macro_rules! unimplemented { ($lifetime:lifetime, $name:ident $($tt:tt)*) => { fn $name>(self $($tt)*, _: V) -> Result { - fail!("{} is not implemented", stringify!($name)) + fail!("Unsupported: EnumDeserializer does not implement {}", stringify!($name)) } }; } diff --git a/serde_arrow/src/internal/deserialization/enums_as_string_impl.rs b/serde_arrow/src/internal/deserialization/enums_as_string_impl.rs index fd4cd6fd..fcfc91e5 100644 --- a/serde_arrow/src/internal/deserialization/enums_as_string_impl.rs +++ b/serde_arrow/src/internal/deserialization/enums_as_string_impl.rs @@ -17,7 +17,7 @@ impl<'a, 'de> serde::de::EnumAccess<'de> for EnumAccess<'a> { macro_rules! unimplemented { ($lifetime:lifetime, $name:ident $($tt:tt)*) => { fn $name>(self $($tt)*, _: V) -> Result { - fail!("{} is not implemented", stringify!($name)) + fail!("Unsupported: EnumDeserializer does not implement {}", stringify!($name)) } }; } @@ -78,15 +78,15 @@ impl<'de> serde::de::VariantAccess<'de> for UnitVariant { type Error = Error; fn newtype_variant_seed>(self, _: T) -> Result { - fail!("cannot deserialize enums with data from strings") + fail!("Unsupported: cannot deserialize enums with data from strings") } fn struct_variant>(self, _: &'static [&'static str], _: V) -> Result { - fail!("cannot deserialize enums with data from strings") + fail!("Unsupported: cannot deserialize enums with data from strings") } fn tuple_variant>(self, _: usize, _: V) -> Result { - fail!("cannot deserialize enums with data from strings") + fail!("Unsupported: cannot deserialize enums with data from strings") } fn unit_variant(self) -> Result<(), Self::Error> { diff --git a/serde_arrow/src/internal/deserialization/fixed_size_binary_deserializer.rs b/serde_arrow/src/internal/deserialization/fixed_size_binary_deserializer.rs index 757a6989..fe91c0d3 100644 --- a/serde_arrow/src/internal/deserialization/fixed_size_binary_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/fixed_size_binary_deserializer.rs @@ -2,20 +2,21 @@ use serde::de::{SeqAccess, Visitor}; use crate::internal::{ arrow::FixedSizeBinaryArrayView, - error::{fail, Error, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; use super::{simple_deserializer::SimpleDeserializer, utils::bitset_is_set}; pub struct FixedSizeBinaryDeserializer<'a> { + pub path: String, pub view: FixedSizeBinaryArrayView<'a>, pub next: (usize, usize), pub shape: (usize, usize), } impl<'a> FixedSizeBinaryDeserializer<'a> { - pub fn new(view: FixedSizeBinaryArrayView<'a>) -> Result { + pub fn new(path: String, view: FixedSizeBinaryArrayView<'a>) -> Result { let n = usize::try_from(view.n)?; if view.data.len() % n != 0 { fail!( @@ -30,6 +31,7 @@ impl<'a> FixedSizeBinaryDeserializer<'a> { let shape = (view.data.len() / n, n); Ok(Self { + path, view, shape, next: (0, 0), @@ -38,7 +40,7 @@ impl<'a> FixedSizeBinaryDeserializer<'a> { pub fn peek_next(&self) -> Result { if self.next.0 >= self.shape.0 { - fail!("Exhausted ListDeserializer") + fail!("Exhausted deserializer") } if let Some(validity) = &self.view.validity { Ok(bitset_is_set(validity, self.next.0)?) @@ -54,7 +56,7 @@ impl<'a> FixedSizeBinaryDeserializer<'a> { pub fn next_slice(&mut self) -> Result<&'a [u8]> { let (item, _) = self.next; if item >= self.shape.0 { - fail!("called next_slices on exhausted BinaryDeserializer"); + fail!("Exhausted deserializer"); } self.next = (item + 1, 0); @@ -62,39 +64,48 @@ impl<'a> FixedSizeBinaryDeserializer<'a> { } } -impl<'a> SimpleDeserializer<'a> for FixedSizeBinaryDeserializer<'a> { - fn name() -> &'static str { - "FixedSizeBinaryDeserializer" +impl<'a> Context for FixedSizeBinaryDeserializer<'a> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "FixedSizeBinary(..)"); } +} +impl<'a> SimpleDeserializer<'a> for FixedSizeBinaryDeserializer<'a> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_bytes(visitor) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + self.deserialize_bytes(visitor) + } else { + self.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(self)) + } else { + self.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_seq>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } fn deserialize_bytes>(&mut self, visitor: V) -> Result { - visitor.visit_borrowed_bytes(self.next_slice()?) + try_(|| visitor.visit_borrowed_bytes(self.next_slice()?)).ctx(self) } fn deserialize_byte_buf>(&mut self, visitor: V) -> Result { - visitor.visit_borrowed_bytes(self.next_slice()?) + try_(|| visitor.visit_borrowed_bytes(self.next_slice()?)).ctx(self) } } @@ -122,11 +133,11 @@ impl<'de> SeqAccess<'de> for FixedSizeBinaryDeserializer<'de> { struct U8Deserializer(u8); -impl<'de> SimpleDeserializer<'de> for U8Deserializer { - fn name() -> &'static str { - "U8Deserializer" - } +impl Context for U8Deserializer { + fn annotate(&self, _: &mut std::collections::BTreeMap) {} +} +impl<'de> SimpleDeserializer<'de> for U8Deserializer { fn deserialize_u8>(&mut self, visitor: V) -> Result { visitor.visit_u8(self.0) } diff --git a/serde_arrow/src/internal/deserialization/fixed_size_list_deserializer.rs b/serde_arrow/src/internal/deserialization/fixed_size_list_deserializer.rs index e146b309..8be733fb 100644 --- a/serde_arrow/src/internal/deserialization/fixed_size_list_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/fixed_size_list_deserializer.rs @@ -2,7 +2,7 @@ use serde::de::{IgnoredAny, SeqAccess, Visitor}; use crate::internal::{ arrow::BitsWithOffset, - error::{fail, Error, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; @@ -12,6 +12,7 @@ use super::{ }; pub struct FixedSizeListDeserializer<'a> { + pub path: String, pub item: Box>, pub validity: Option>, pub shape: (usize, usize), @@ -20,12 +21,14 @@ pub struct FixedSizeListDeserializer<'a> { impl<'a> FixedSizeListDeserializer<'a> { pub fn new( + path: String, item: ArrayDeserializer<'a>, validity: Option>, n: usize, len: usize, ) -> Self { Self { + path, item: Box::new(item), validity, shape: (len, n), @@ -35,7 +38,7 @@ impl<'a> FixedSizeListDeserializer<'a> { pub fn peek_next(&self) -> Result { if self.next.0 >= self.shape.0 { - fail!("Exhausted ListDeserializer") + fail!("Exhausted deserializer") } if let Some(validity) = &self.validity { Ok(bitset_is_set(validity, self.next.0)?) @@ -54,31 +57,40 @@ impl<'a> FixedSizeListDeserializer<'a> { } } -impl<'a> SimpleDeserializer<'a> for FixedSizeListDeserializer<'a> { - fn name() -> &'static str { - "ListDeserializer" +impl<'a> Context for FixedSizeListDeserializer<'a> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "FixedSizeList(..)"); } +} +impl<'a> SimpleDeserializer<'a> for FixedSizeListDeserializer<'a> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_seq(visitor) - } else { - self.consume_next()?; - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + self.deserialize_seq(visitor) + } else { + self.consume_next()?; + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next()?; - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.consume_next()?; + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_seq>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/float_deserializer.rs b/serde_arrow/src/internal/deserialization/float_deserializer.rs index 36dba2a1..46db143e 100644 --- a/serde_arrow/src/internal/deserialization/float_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/float_deserializer.rs @@ -1,6 +1,10 @@ use serde::de::Visitor; -use crate::internal::{arrow::PrimitiveArrayView, error::Result, utils::Mut}; +use crate::internal::{ + arrow::PrimitiveArrayView, + error::{set_default, try_, Context, ContextSupport, Result}, + utils::{Mut, NamedType}, +}; use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator}; @@ -14,42 +18,66 @@ pub trait Float: Copy { fn into_f64(self) -> Result; } -pub struct FloatDeserializer<'a, F: Float>(ArrayBufferIterator<'a, F>); +pub struct FloatDeserializer<'a, F: Float> { + path: String, + array: ArrayBufferIterator<'a, F>, +} impl<'a, F: Float> FloatDeserializer<'a, F> { - pub fn new(view: PrimitiveArrayView<'a, F>) -> Self { - Self(ArrayBufferIterator::new(view.values, view.validity)) + pub fn new(path: String, view: PrimitiveArrayView<'a, F>) -> Self { + Self { + path, + array: ArrayBufferIterator::new(view.values, view.validity), + } } } -impl<'de, F: Float> SimpleDeserializer<'de> for FloatDeserializer<'de, F> { - fn name() -> &'static str { - "FloatDeserializer" +impl<'de, F: NamedType + Float> Context for FloatDeserializer<'de, F> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match F::NAME { + "f16" => "Float16", + "f32" => "Float32", + "f64" => "Float64", + _ => "", + }, + ); } +} +impl<'de, F: NamedType + Float> SimpleDeserializer<'de> for FloatDeserializer<'de, F> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - F::deserialize_any(self, visitor) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + F::deserialize_any(&mut *self, visitor) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_f32>(&mut self, visitor: V) -> Result { - visitor.visit_f32(self.0.next_required()?.into_f32()?) + try_(|| visitor.visit_f32(self.array.next_required()?.into_f32()?)).ctx(self) } fn deserialize_f64>(&mut self, visitor: V) -> Result { - visitor.visit_f64(self.0.next_required()?.into_f64()?) + try_(|| visitor.visit_f64(self.array.next_required()?.into_f64()?)).ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/integer_deserializer.rs b/serde_arrow/src/internal/deserialization/integer_deserializer.rs index 4c5afe1a..5ef6c3ba 100644 --- a/serde_arrow/src/internal/deserialization/integer_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/integer_deserializer.rs @@ -1,6 +1,10 @@ use serde::de::Visitor; -use crate::internal::{arrow::PrimitiveArrayView, error::Result, utils::Mut}; +use crate::internal::{ + arrow::PrimitiveArrayView, + error::{set_default, try_, Context, ContextSupport, Result}, + utils::{Mut, NamedType}, +}; use super::{simple_deserializer::SimpleDeserializer, utils::ArrayBufferIterator}; @@ -23,74 +27,103 @@ pub trait Integer: Sized + Copy { fn into_u64(self) -> Result; } -pub struct IntegerDeserializer<'a, T: Integer>(ArrayBufferIterator<'a, T>); +pub struct IntegerDeserializer<'a, T: Integer> { + path: String, + array: ArrayBufferIterator<'a, T>, +} impl<'a, T: Integer> IntegerDeserializer<'a, T> { - pub fn new(view: PrimitiveArrayView<'a, T>) -> Self { - Self(ArrayBufferIterator::new(view.values, view.validity)) + pub fn new(path: String, view: PrimitiveArrayView<'a, T>) -> Self { + Self { + path, + array: ArrayBufferIterator::new(view.values, view.validity), + } } } -impl<'de, T: Integer> SimpleDeserializer<'de> for IntegerDeserializer<'de, T> { - fn name() -> &'static str { - "IntegerDeserializer" +impl<'de, T: NamedType + Integer> Context for IntegerDeserializer<'de, T> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match T::NAME { + "i8" => "Int8", + "i16" => "Int16", + "i32" => "Int32", + "i64" => "Int64", + "u8" => "UInt8", + "u16" => "UInt16", + "u32" => "UInt32", + "u64" => "UInt64", + _ => "", + }, + ); } +} +impl<'de, T: NamedType + Integer> SimpleDeserializer<'de> for IntegerDeserializer<'de, T> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - T::deserialize_any(self, visitor) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + T::deserialize_any(&mut *self, visitor) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_bool>(&mut self, visitor: V) -> Result { - visitor.visit_bool(self.0.next_required()?.into_bool()?) + try_(|| visitor.visit_bool(self.array.next_required()?.into_bool()?)).ctx(self) } fn deserialize_char>(&mut self, visitor: V) -> Result { - visitor.visit_char(self.0.next_required()?.into_u32()?.try_into()?) + try_(|| visitor.visit_char(self.array.next_required()?.into_u32()?.try_into()?)).ctx(self) } fn deserialize_u8>(&mut self, visitor: V) -> Result { - visitor.visit_u8(self.0.next_required()?.into_u8()?) + try_(|| visitor.visit_u8(self.array.next_required()?.into_u8()?)).ctx(self) } fn deserialize_u16>(&mut self, visitor: V) -> Result { - visitor.visit_u16(self.0.next_required()?.into_u16()?) + try_(|| visitor.visit_u16(self.array.next_required()?.into_u16()?)).ctx(self) } fn deserialize_u32>(&mut self, visitor: V) -> Result { - visitor.visit_u32(self.0.next_required()?.into_u32()?) + try_(|| visitor.visit_u32(self.array.next_required()?.into_u32()?)).ctx(self) } fn deserialize_u64>(&mut self, visitor: V) -> Result { - visitor.visit_u64(self.0.next_required()?.into_u64()?) + try_(|| visitor.visit_u64(self.array.next_required()?.into_u64()?)).ctx(self) } fn deserialize_i8>(&mut self, visitor: V) -> Result { - visitor.visit_i8(self.0.next_required()?.into_i8()?) + try_(|| visitor.visit_i8(self.array.next_required()?.into_i8()?)).ctx(self) } fn deserialize_i16>(&mut self, visitor: V) -> Result { - visitor.visit_i16(self.0.next_required()?.into_i16()?) + try_(|| visitor.visit_i16(self.array.next_required()?.into_i16()?)).ctx(self) } fn deserialize_i32>(&mut self, visitor: V) -> Result { - visitor.visit_i32(self.0.next_required()?.into_i32()?) + try_(|| visitor.visit_i32(self.array.next_required()?.into_i32()?)).ctx(self) } fn deserialize_i64>(&mut self, visitor: V) -> Result { - visitor.visit_i64(self.0.next_required()?.into_i64()?) + try_(|| visitor.visit_i64(self.array.next_required()?.into_i64()?)).ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/list_deserializer.rs b/serde_arrow/src/internal/deserialization/list_deserializer.rs index ebf45562..c93a81c1 100644 --- a/serde_arrow/src/internal/deserialization/list_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/list_deserializer.rs @@ -2,8 +2,8 @@ use serde::de::{SeqAccess, Visitor}; use crate::internal::{ arrow::BitsWithOffset, - error::{fail, Error, Result}, - utils::{Mut, Offset}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, + utils::{Mut, NamedType, Offset}, }; use super::{ @@ -13,6 +13,7 @@ use super::{ }; pub struct ListDeserializer<'a, O: Offset> { + pub path: String, pub item: Box>, pub offsets: &'a [O], pub validity: Option>, @@ -21,6 +22,7 @@ pub struct ListDeserializer<'a, O: Offset> { impl<'a, O: Offset> ListDeserializer<'a, O> { pub fn new( + path: String, item: ArrayDeserializer<'a>, offsets: &'a [O], validity: Option>, @@ -28,6 +30,7 @@ impl<'a, O: Offset> ListDeserializer<'a, O> { check_supported_list_layout(validity, offsets)?; Ok(Self { + path, item: Box::new(item), offsets, validity, @@ -37,7 +40,7 @@ impl<'a, O: Offset> ListDeserializer<'a, O> { pub fn peek_next(&self) -> Result { if self.next.0 + 1 >= self.offsets.len() { - fail!("Exhausted ListDeserializer") + fail!("Exhausted deserializer") } if let Some(validity) = &self.validity { Ok(bitset_is_set(validity, self.next.0)?) @@ -51,43 +54,60 @@ impl<'a, O: Offset> ListDeserializer<'a, O> { } } -impl<'a, O: Offset> SimpleDeserializer<'a> for ListDeserializer<'a, O> { - fn name() -> &'static str { - "ListDeserializer" +impl<'a, O: NamedType + Offset> Context for ListDeserializer<'a, O> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "filed", &self.path); + set_default( + annotations, + "data_type", + match O::NAME { + "i32" => "List(..)", + "i64" => "LargeList(..)", + _ => "", + }, + ); } +} +impl<'a, O: NamedType + Offset> SimpleDeserializer<'a> for ListDeserializer<'a, O> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_seq(visitor) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + self.deserialize_seq(visitor) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_seq>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } fn deserialize_bytes>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } fn deserialize_byte_buf>(&mut self, visitor: V) -> Result { - visitor.visit_seq(self) + try_(|| visitor.visit_seq(&mut *self)).ctx(self) } } -impl<'de, O: Offset> SeqAccess<'de> for ListDeserializer<'de, O> { +impl<'de, O: NamedType + Offset> SeqAccess<'de> for ListDeserializer<'de, O> { type Error = Error; fn next_element_seed>( diff --git a/serde_arrow/src/internal/deserialization/map_deserializer.rs b/serde_arrow/src/internal/deserialization/map_deserializer.rs index fde9e0a0..e08fca98 100644 --- a/serde_arrow/src/internal/deserialization/map_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/map_deserializer.rs @@ -2,7 +2,7 @@ use serde::de::{DeserializeSeed, MapAccess, Visitor}; use crate::internal::{ arrow::BitsWithOffset, - error::{fail, Error, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; @@ -13,6 +13,7 @@ use super::{ }; pub struct MapDeserializer<'a> { + path: String, key: Box>, value: Box>, offsets: &'a [i32], @@ -22,6 +23,7 @@ pub struct MapDeserializer<'a> { impl<'a> MapDeserializer<'a> { pub fn new( + path: String, key: ArrayDeserializer<'a>, value: ArrayDeserializer<'a>, offsets: &'a [i32], @@ -30,6 +32,7 @@ impl<'a> MapDeserializer<'a> { check_supported_list_layout(validity, offsets)?; Ok(Self { + path, key: Box::new(key), value: Box::new(value), offsets, @@ -40,7 +43,7 @@ impl<'a> MapDeserializer<'a> { pub fn peek_next(&self) -> Result { if self.next.0 + 1 >= self.offsets.len() { - fail!("Exhausted ListDeserializer") + fail!("Exhausted deserializer") } if let Some(validity) = &self.validity { Ok(bitset_is_set(validity, self.next.0)?) @@ -54,31 +57,40 @@ impl<'a> MapDeserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for MapDeserializer<'de> { - fn name() -> &'static str { - "MapDeserializer" +impl<'de> Context for MapDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Map(..)"); } +} +impl<'de> SimpleDeserializer<'de> for MapDeserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - self.deserialize_map(visitor) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + self.deserialize_map(visitor) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.consume_next(); + visitor.visit_none::() + } + }) + .ctx(self) } fn deserialize_map>(&mut self, visitor: V) -> Result { - visitor.visit_map(self) + try_(|| visitor.visit_map(&mut *self)).ctx(self) } } @@ -91,7 +103,7 @@ impl<'de> MapAccess<'de> for MapDeserializer<'de> { ) -> Result, Self::Error> { let (item, entry) = self.next; if item + 1 >= self.offsets.len() { - fail!("Exhausted MapDeserializer"); + fail!(in self, "Exhausted deserializer"); } let start: usize = self.offsets[item].try_into()?; let end: usize = self.offsets[item + 1].try_into()?; diff --git a/serde_arrow/src/internal/deserialization/null_deserializer.rs b/serde_arrow/src/internal/deserialization/null_deserializer.rs index 2909c491..5d76ab77 100644 --- a/serde_arrow/src/internal/deserialization/null_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/null_deserializer.rs @@ -1,26 +1,37 @@ use serde::de::Visitor; -use crate::internal::error::Result; +use crate::internal::error::{set_default, Context, ContextSupport, Error, Result}; use super::simple_deserializer::SimpleDeserializer; -pub struct NullDeserializer; +pub struct NullDeserializer { + path: String, +} -impl<'de> SimpleDeserializer<'de> for NullDeserializer { - fn name() -> &'static str { - "NullDeserializer" +impl NullDeserializer { + pub fn new(path: String) -> Self { + Self { path } } +} +impl Context for NullDeserializer { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Null"); + } +} + +impl<'de> SimpleDeserializer<'de> for NullDeserializer { fn deserialize_any>(&mut self, visitor: V) -> Result { - visitor.visit_unit() + visitor.visit_unit::().ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - visitor.visit_none() + visitor.visit_none::().ctx(self) } fn deserialize_unit>(&mut self, visitor: V) -> Result { - visitor.visit_unit() + visitor.visit_unit::().ctx(self) } fn deserialize_unit_struct>( @@ -28,6 +39,6 @@ impl<'de> SimpleDeserializer<'de> for NullDeserializer { _: &'static str, visitor: V, ) -> Result { - visitor.visit_unit() + visitor.visit_unit::().ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/outer_sequence_deserializer.rs b/serde_arrow/src/internal/deserialization/outer_sequence_deserializer.rs index f4497ddc..24f0407f 100644 --- a/serde_arrow/src/internal/deserialization/outer_sequence_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/outer_sequence_deserializer.rs @@ -1,7 +1,7 @@ use serde::de::{SeqAccess, Visitor}; use crate::internal::{ - error::{Error, Result}, + error::{Context, Error, Result}, utils::Mut, }; @@ -19,18 +19,18 @@ pub struct OuterSequenceDeserializer<'a> { impl<'a> OuterSequenceDeserializer<'a> { pub fn new(fields: Vec<(String, ArrayDeserializer<'a>)>, len: usize) -> Self { Self { - item: StructDeserializer::new(fields, None, len), + item: StructDeserializer::new(String::from("$"), fields, None, len), next: 0, len, } } } -impl<'de> SimpleDeserializer<'de> for OuterSequenceDeserializer<'de> { - fn name() -> &'static str { - "OuterSequenceDeserializer" - } +impl<'de> Context for OuterSequenceDeserializer<'de> { + fn annotate(&self, _: &mut std::collections::BTreeMap) {} +} +impl<'de> SimpleDeserializer<'de> for OuterSequenceDeserializer<'de> { fn deserialize_newtype_struct>( &mut self, _: &'static str, diff --git a/serde_arrow/src/internal/deserialization/simple_deserializer.rs b/serde_arrow/src/internal/deserialization/simple_deserializer.rs index 7d2e50bc..4753bc86 100644 --- a/serde_arrow/src/internal/deserialization/simple_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/simple_deserializer.rs @@ -1,16 +1,14 @@ use serde::{de::Visitor, Deserializer}; use crate::internal::{ - error::{fail, Error, Result}, + error::{fail, Context, Error, Result}, utils::Mut, }; #[allow(unused)] -pub trait SimpleDeserializer<'de>: Sized { - fn name() -> &'static str; - +pub trait SimpleDeserializer<'de>: Context + Sized { fn deserialize_any>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_any", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_any"); } fn deserialize_ignored_any>(&mut self, visitor: V) -> Result { @@ -18,63 +16,63 @@ pub trait SimpleDeserializer<'de>: Sized { } fn deserialize_bool>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_bool", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_bool"); } fn deserialize_i8>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_i8", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_i8"); } fn deserialize_i16>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_i16", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_i16"); } fn deserialize_i32>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_i32", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_i32"); } fn deserialize_i64>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_i64", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_i64"); } fn deserialize_u8>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_u8", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_u8"); } fn deserialize_u16>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_u16", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_u16"); } fn deserialize_u32>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_u32", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_u32"); } fn deserialize_u64>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_u64", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_u64"); } fn deserialize_f32>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_f32", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_f32"); } fn deserialize_f64>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_f64", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_f64"); } fn deserialize_char>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_char", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_char"); } fn deserialize_str>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_str", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_str"); } fn deserialize_string>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_string", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_string"); } fn deserialize_map>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_map", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_map"); } fn deserialize_struct>( @@ -83,15 +81,15 @@ pub trait SimpleDeserializer<'de>: Sized { fields: &'static [&'static str], visitor: V, ) -> Result { - fail!("{} does not implement deserialize_struct", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_struct"); } fn deserialize_byte_buf>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_byte_buf", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_byte_buf"); } fn deserialize_bytes>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_bytes", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_bytes"); } fn deserialize_enum>( @@ -100,15 +98,15 @@ pub trait SimpleDeserializer<'de>: Sized { variants: &'static [&'static str], visitor: V, ) -> Result { - fail!("{} does not implement deserialize_enum", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_enum"); } fn deserialize_identifier>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_identifier", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_identifier"); } fn deserialize_option>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_option", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_option"); } fn deserialize_newtype_struct>( @@ -120,11 +118,11 @@ pub trait SimpleDeserializer<'de>: Sized { } fn deserialize_tuple>(&mut self, len: usize, visitor: V) -> Result { - fail!("{} does not implement deserialize_tuple", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_tuple"); } fn deserialize_seq>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_seq", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_seq"); } fn deserialize_tuple_struct>( @@ -133,14 +131,13 @@ pub trait SimpleDeserializer<'de>: Sized { len: usize, visitor: V, ) -> Result { - fail!( - "{} does not implement deserialize_tuple_struct", - Self::name() + fail!(in self, + "Deserializer does not implement deserialize_tuple_struct", ); } fn deserialize_unit>(&mut self, visitor: V) -> Result { - fail!("{} does not implement deserialize_unit", Self::name()); + fail!(in self, "Deserializer does not implement deserialize_unit"); } fn deserialize_unit_struct>( @@ -148,9 +145,8 @@ pub trait SimpleDeserializer<'de>: Sized { name: &'static str, visitor: V, ) -> Result { - fail!( - "{} does not implement deserialize_unit_struct", - Self::name() + fail!(in self, + "Deserializer does not implement deserialize_unit_struct", ); } } diff --git a/serde_arrow/src/internal/deserialization/string_deserializer.rs b/serde_arrow/src/internal/deserialization/string_deserializer.rs index 76f31a77..067e79a1 100644 --- a/serde_arrow/src/internal/deserialization/string_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/string_deserializer.rs @@ -1,7 +1,7 @@ use crate::internal::{ arrow::BytesArrayView, - error::{fail, Result}, - utils::{Mut, Offset}, + error::{fail, set_default, Context, ContextSupport, Result}, + utils::{Mut, NamedType, Offset}, }; use super::{ @@ -9,18 +9,23 @@ use super::{ }; pub struct StringDeserializer<'a, O: Offset> { + pub path: String, pub view: BytesArrayView<'a, O>, pub next: usize, } impl<'a, O: Offset> StringDeserializer<'a, O> { - pub fn new(view: BytesArrayView<'a, O>) -> Self { - Self { view, next: 0 } + pub fn new(path: String, view: BytesArrayView<'a, O>) -> Self { + Self { + path, + view, + next: 0, + } } pub fn next(&mut self) -> Result> { if self.next + 1 > self.view.offsets.len() { - fail!("Tried to deserialize a value from an exhausted StringDeserializer"); + fail!("Exhausted deserializer: tried to deserialize a value from an exhausted StringDeserializer"); } if let Some(validity) = &self.view.validity { @@ -40,14 +45,14 @@ impl<'a, O: Offset> StringDeserializer<'a, O> { pub fn next_required(&mut self) -> Result<&'a str> { let Some(next) = self.next()? else { - fail!("Tried to deserialize a value from StringDeserializer, but value is missing") + fail!("Exhausted deserializer: tried to deserialize a value from StringDeserializer, but value is missing") }; Ok(next) } pub fn peek_next(&self) -> Result { if self.next + 1 > self.view.offsets.len() { - fail!("Tried to deserialize a value from an exhausted StringDeserializer"); + fail!("Exhausted deserializer: tried to deserialize a value from an exhausted StringDeserializer"); } if let Some(validity) = &self.view.validity { @@ -63,12 +68,51 @@ impl<'a, O: Offset> StringDeserializer<'a, O> { } } -impl<'a, O: Offset> SimpleDeserializer<'a> for StringDeserializer<'a, O> { - fn name() -> &'static str { - "StringDeserializer" +impl<'a, O: NamedType + Offset> Context for StringDeserializer<'a, O> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match O::NAME { + "i32" => "Utf8", + "i64" => "LargeUtf8", + _ => "", + }, + ); } +} +impl<'a, O: NamedType + Offset> SimpleDeserializer<'a> for StringDeserializer<'a, O> { fn deserialize_any>(&mut self, visitor: V) -> Result { + self.deserialize_any_impl(visitor).ctx(self) + } + + fn deserialize_option>(&mut self, visitor: V) -> Result { + self.deserialize_option_impl(visitor).ctx(self) + } + + fn deserialize_str>(&mut self, visitor: V) -> Result { + self.deserialize_str_impl(visitor).ctx(self) + } + + fn deserialize_string>(&mut self, visitor: V) -> Result { + self.deserialize_string_impl(visitor).ctx(self) + } + + fn deserialize_enum>( + &mut self, + name: &'static str, + variants: &'static [&'static str], + visitor: V, + ) -> Result { + self.deserialize_enum_impl(name, variants, visitor) + .ctx(self) + } +} + +impl<'a, O: NamedType + Offset> StringDeserializer<'a, O> { + fn deserialize_any_impl>(&mut self, visitor: V) -> Result { if self.peek_next()? { self.deserialize_str(visitor) } else { @@ -77,7 +121,10 @@ impl<'a, O: Offset> SimpleDeserializer<'a> for StringDeserializer<'a, O> { } } - fn deserialize_option>(&mut self, visitor: V) -> Result { + fn deserialize_option_impl>( + &mut self, + visitor: V, + ) -> Result { if self.peek_next()? { visitor.visit_some(Mut(self)) } else { @@ -86,15 +133,18 @@ impl<'a, O: Offset> SimpleDeserializer<'a> for StringDeserializer<'a, O> { } } - fn deserialize_str>(&mut self, visitor: V) -> Result { + fn deserialize_str_impl>(&mut self, visitor: V) -> Result { visitor.visit_borrowed_str(self.next_required()?) } - fn deserialize_string>(&mut self, visitor: V) -> Result { + fn deserialize_string_impl>( + &mut self, + visitor: V, + ) -> Result { visitor.visit_string(self.next_required()?.to_owned()) } - fn deserialize_enum>( + fn deserialize_enum_impl>( &mut self, _: &'static str, _: &'static [&'static str], diff --git a/serde_arrow/src/internal/deserialization/struct_deserializer.rs b/serde_arrow/src/internal/deserialization/struct_deserializer.rs index df7b1879..bb889e1f 100644 --- a/serde_arrow/src/internal/deserialization/struct_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/struct_deserializer.rs @@ -4,7 +4,7 @@ use serde::de::{ use crate::internal::{ arrow::BitsWithOffset, - error::{fail, Error, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Error, Result}, utils::Mut, }; @@ -14,6 +14,7 @@ use super::{ }; pub struct StructDeserializer<'a> { + pub path: String, pub fields: Vec<(String, ArrayDeserializer<'a>)>, pub validity: Option>, pub next: (usize, usize), @@ -22,11 +23,13 @@ pub struct StructDeserializer<'a> { impl<'a> StructDeserializer<'a> { pub fn new( + path: String, fields: Vec<(String, ArrayDeserializer<'a>)>, validity: Option>, len: usize, ) -> Self { Self { + path, fields, validity, len, @@ -36,7 +39,7 @@ impl<'a> StructDeserializer<'a> { pub fn peek_next(&self) -> Result { if self.next.0 >= self.len { - fail!("Exhausted StructDeserializer"); + fail!("Exhausted deserializer"); } if let Some(validity) = &self.validity { Ok(bitset_is_set(validity, self.next.0)?) @@ -50,37 +53,46 @@ impl<'a> StructDeserializer<'a> { } } -impl<'de> SimpleDeserializer<'de> for StructDeserializer<'de> { - fn name() -> &'static str { - "StructDeserializer" +impl<'de> Context for StructDeserializer<'de> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Struct(..)"); } +} +impl<'de> SimpleDeserializer<'de> for StructDeserializer<'de> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_map(self) - } else { - self.consume_next(); - for (_, field) in &mut self.fields { - field.deserialize_ignored_any(IgnoredAny)?; + try_(|| { + if self.peek_next()? { + visitor.visit_map(&mut *self) + } else { + self.consume_next(); + for (_, field) in &mut self.fields { + field.deserialize_ignored_any(IgnoredAny)?; + } + visitor.visit_none() } - visitor.visit_none() - } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.consume_next(); - for (_, field) in &mut self.fields { - field.deserialize_ignored_any(IgnoredAny)?; + try_(|| { + if self.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.consume_next(); + for (_, field) in &mut self.fields { + field.deserialize_ignored_any(IgnoredAny)?; + } + visitor.visit_none() } - visitor.visit_none() - } + }) + .ctx(self) } fn deserialize_map>(&mut self, visitor: V) -> Result { - visitor.visit_map(self) + try_(|| visitor.visit_map(&mut *self)).ctx(self) } fn deserialize_struct>( @@ -89,15 +101,18 @@ impl<'de> SimpleDeserializer<'de> for StructDeserializer<'de> { _: &'static [&'static str], visitor: V, ) -> Result { - visitor.visit_map(self) + try_(|| visitor.visit_map(&mut *self)).ctx(self) } fn deserialize_tuple>(&mut self, _: usize, visitor: V) -> Result { - let res = visitor.visit_seq(&mut *self)?; + try_(|| { + let res = visitor.visit_seq(&mut *self)?; - // tuples do not consume the sequence until none is raised - self.consume_next(); - Ok(res) + // tuples do not consume the sequence until none is raised + self.consume_next(); + Ok(res) + }) + .ctx(self) } fn deserialize_tuple_struct>( @@ -106,11 +121,14 @@ impl<'de> SimpleDeserializer<'de> for StructDeserializer<'de> { _: usize, visitor: V, ) -> Result { - let res = visitor.visit_seq(&mut *self)?; + try_(|| { + let res = visitor.visit_seq(&mut *self)?; - // tuples do not consume the sequence until none is raised - self.consume_next(); - Ok(res) + // tuples do not consume the sequence until none is raised + self.consume_next(); + Ok(res) + }) + .ctx(self) } } @@ -120,7 +138,7 @@ impl<'de> MapAccess<'de> for StructDeserializer<'de> { fn next_key_seed>(&mut self, seed: K) -> Result> { let (item, field) = self.next; if item >= self.len { - fail!("Exhausted StructDeserializer"); + fail!("Exhausted deserializer"); } if field >= self.fields.len() { self.next = (item + 1, 0); @@ -148,7 +166,7 @@ impl<'de> SeqAccess<'de> for StructDeserializer<'de> { ) -> Result, Self::Error> { let (item, field) = self.next; if item >= self.len { - fail!("Exhausted StructDeserializer"); + fail!("Exhausted deserializer"); } if field >= self.fields.len() { self.next = (item + 1, 0); diff --git a/serde_arrow/src/internal/deserialization/test.rs b/serde_arrow/src/internal/deserialization/test.rs index 362f4fee..5474050a 100644 --- a/serde_arrow/src/internal/deserialization/test.rs +++ b/serde_arrow/src/internal/deserialization/test.rs @@ -16,17 +16,23 @@ fn example() { vec![ ( String::from("a"), - ArrayDeserializer::I32(IntegerDeserializer::new(PrimitiveArrayView { - values: &[1, 2, 3], - validity: None, - })), + ArrayDeserializer::I32(IntegerDeserializer::new( + String::from("$"), + PrimitiveArrayView { + values: &[1, 2, 3], + validity: None, + }, + )), ), ( String::from("b"), - ArrayDeserializer::I32(IntegerDeserializer::new(PrimitiveArrayView { - values: &[4, 5, 6], - validity: None, - })), + ArrayDeserializer::I32(IntegerDeserializer::new( + String::from("$"), + PrimitiveArrayView { + values: &[4, 5, 6], + validity: None, + }, + )), ), ], 3, diff --git a/serde_arrow/src/internal/deserialization/time_deserializer.rs b/serde_arrow/src/internal/deserialization/time_deserializer.rs index ce5ae075..92d15206 100644 --- a/serde_arrow/src/internal/deserialization/time_deserializer.rs +++ b/serde_arrow/src/internal/deserialization/time_deserializer.rs @@ -3,8 +3,8 @@ use serde::de::Visitor; use crate::internal::{ arrow::{TimeArrayView, TimeUnit}, - error::{fail, Result}, - utils::Mut, + error::{fail, set_default, try_, Context, ContextSupport, Result}, + utils::{Mut, NamedType}, }; use super::{ @@ -12,10 +12,15 @@ use super::{ utils::ArrayBufferIterator, }; -pub struct TimeDeserializer<'a, T: Integer>(ArrayBufferIterator<'a, T>, i64, i64); +pub struct TimeDeserializer<'a, T: Integer> { + path: String, + array: ArrayBufferIterator<'a, T>, + seconds_factor: i64, + nanoseconds_factor: i64, +} impl<'a, T: Integer> TimeDeserializer<'a, T> { - pub fn new(view: TimeArrayView<'a, T>) -> Self { + pub fn new(path: String, view: TimeArrayView<'a, T>) -> Self { let (seconds_factor, nanoseconds_factor) = match view.unit { TimeUnit::Nanosecond => (1_000_000_000, 1), TimeUnit::Microsecond => (1_000_000, 1_000), @@ -23,16 +28,17 @@ impl<'a, T: Integer> TimeDeserializer<'a, T> { TimeUnit::Second => (1, 1_000_000_000), }; - Self( - ArrayBufferIterator::new(view.values, view.validity), + Self { + path, + array: ArrayBufferIterator::new(view.values, view.validity), seconds_factor, nanoseconds_factor, - ) + } } pub fn get_string_repr(&self, ts: i64) -> Result { - let seconds = (ts / self.1) as u32; - let nanoseconds = ((ts % self.1) / self.2) as u32; + let seconds = (ts / self.seconds_factor) as u32; + let nanoseconds = ((ts % self.seconds_factor) / self.nanoseconds_factor) as u32; let Some(res) = NaiveTime::from_num_seconds_from_midnight_opt(seconds, nanoseconds) else { fail!("Invalid timestamp"); @@ -41,43 +47,63 @@ impl<'a, T: Integer> TimeDeserializer<'a, T> { } } -impl<'de, T: Integer> SimpleDeserializer<'de> for TimeDeserializer<'de, T> { - fn name() -> &'static str { - "Time64Deserializer" +impl<'de, T: NamedType + Integer> Context for TimeDeserializer<'de, T> { + fn annotate(&self, annotations: &mut std::collections::BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match T::NAME { + "i32" => "Time32", + "i64" => "Time64", + _ => "", + }, + ); } +} +impl<'de, T: NamedType + Integer> SimpleDeserializer<'de> for TimeDeserializer<'de, T> { fn deserialize_any>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - T::deserialize_any(self, visitor) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + T::deserialize_any(&mut *self, visitor) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_option>(&mut self, visitor: V) -> Result { - if self.0.peek_next()? { - visitor.visit_some(Mut(self)) - } else { - self.0.consume_next(); - visitor.visit_none() - } + try_(|| { + if self.array.peek_next()? { + visitor.visit_some(Mut(&mut *self)) + } else { + self.array.consume_next(); + visitor.visit_none() + } + }) + .ctx(self) } fn deserialize_i32>(&mut self, visitor: V) -> Result { - visitor.visit_i32(self.0.next_required()?.into_i32()?) + try_(|| visitor.visit_i32(self.array.next_required()?.into_i32()?)).ctx(self) } fn deserialize_i64>(&mut self, visitor: V) -> Result { - visitor.visit_i64(self.0.next_required()?.into_i64()?) + try_(|| visitor.visit_i64(self.array.next_required()?.into_i64()?)).ctx(self) } fn deserialize_str>(&mut self, visitor: V) -> Result { - self.deserialize_string(visitor) + try_(|| self.deserialize_string(visitor)).ctx(self) } fn deserialize_string>(&mut self, visitor: V) -> Result { - let ts = self.0.next_required()?.into_i64()?; - visitor.visit_string(self.get_string_repr(ts)?) + try_(|| { + let ts = self.array.next_required()?.into_i64()?; + visitor.visit_string(self.get_string_repr(ts)?) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/deserialization/utils.rs b/serde_arrow/src/internal/deserialization/utils.rs index eb55086c..3f85e58e 100644 --- a/serde_arrow/src/internal/deserialization/utils.rs +++ b/serde_arrow/src/internal/deserialization/utils.rs @@ -7,7 +7,7 @@ use crate::internal::{ pub fn bitset_is_set(set: &BitsWithOffset<'_>, idx: usize) -> Result { let flag = 1 << ((idx + set.offset) % 8); let Some(byte) = set.data.get((idx + set.offset) / 8) else { - fail!("invalid access in bitset"); + fail!("Invalid access in bitset"); }; Ok(byte & flag == flag) } @@ -29,7 +29,7 @@ impl<'a, T: Copy> ArrayBufferIterator<'a, T> { pub fn next(&mut self) -> Result> { if self.next > self.buffer.len() { - fail!("Tried to deserialize a value from an exhausted FloatDeserializer"); + fail!("Exhausted deserializer"); } if let Some(validity) = &self.validity { @@ -45,14 +45,14 @@ impl<'a, T: Copy> ArrayBufferIterator<'a, T> { pub fn next_required(&mut self) -> Result { let Some(next) = self.next()? else { - fail!("missing value"); + fail!("Exhausted deserializer"); }; Ok(next) } pub fn peek_next(&self) -> Result { if self.next > self.buffer.len() { - fail!("Tried to deserialize a value from an exhausted StringDeserializer"); + fail!("Exhausted deserializer"); } if let Some(validity) = &self.validity { @@ -86,14 +86,14 @@ pub fn check_supported_list_layout<'a, O: Offset>( }; if offsets.is_empty() { - fail!("list offsets must be non empty"); + fail!("Unsupported: list offsets must be non empty"); } for i in 0..offsets.len().saturating_sub(1) { let curr = offsets[i].try_into_usize()?; let next = offsets[i + 1].try_into_usize()?; if !bitset_is_set(&validity, i)? && (next - curr) != 0 { - fail!("lists with data in null values are currently not supported in deserialization"); + fail!("Unsupported: lists with data in null values are currently not supported in deserialization"); } } diff --git a/serde_arrow/src/internal/deserializer.rs b/serde_arrow/src/internal/deserializer.rs index e4db6f9d..a5642df4 100644 --- a/serde_arrow/src/internal/deserializer.rs +++ b/serde_arrow/src/internal/deserializer.rs @@ -11,6 +11,8 @@ use crate::internal::{ utils::array_view_ext::ArrayViewExt, }; +use super::utils::ChildName; + /// A structure to deserialize Arrow arrays into Rust objects /// #[cfg_attr(any(has_arrow, has_arrow2), doc = r"It can be constructed via")] @@ -33,7 +35,11 @@ impl<'de> Deserializer<'de> { fail!("Cannot deserialize from arrays with different lengths"); } let strategy = get_strategy_from_metadata(&field.metadata)?; - let deserializer = ArrayDeserializer::new(strategy.as_ref(), view)?; + let deserializer = ArrayDeserializer::new( + format!("$.{child}", child = ChildName(&field.name)), + strategy.as_ref(), + view, + )?; deserializers.push((field.name.clone(), deserializer)); } diff --git a/serde_arrow/src/internal/error.rs b/serde_arrow/src/internal/error.rs index 4cc29514..35b11f30 100644 --- a/serde_arrow/src/internal/error.rs +++ b/serde_arrow/src/internal/error.rs @@ -4,14 +4,35 @@ use std::{ convert::Infallible, }; +pub fn set_default>( + annotations: &mut BTreeMap, + key: &str, + value: V, +) { + if !annotations.contains_key(key) { + annotations.insert(String::from(key), value.into()); + } +} + +/// Execute a faillible function and return the result +/// +/// This function is mostly useful to add annotations to a complex block of operations +pub fn try_(func: impl FnOnce() -> Result) -> Result { + func() +} + /// An object that offers additional context to an error pub trait Context { - fn annotations(&self) -> BTreeMap; + fn annotate(&self, annotations: &mut BTreeMap); } impl Context for BTreeMap { - fn annotations(&self) -> BTreeMap { - self.clone() + fn annotate(&self, annotations: &mut BTreeMap) { + for (k, v) in self { + if !annotations.contains_key(k) { + annotations.insert(k.to_owned(), v.to_owned()); + } + } } } @@ -37,7 +58,11 @@ impl> ContextSupport for E { type Output = Error; fn ctx(self, context: &C) -> Self::Output { - self.into().with_annotations(context.annotations()) + let Error::Custom(mut error) = self.into(); + if error.0.annotations.is_empty() { + context.annotate(&mut error.0.annotations); + } + Error::Custom(error) } } @@ -85,14 +110,6 @@ impl Error { } } -impl Error { - pub(crate) fn with_annotations(self, annotations: BTreeMap) -> Self { - let Self::Custom(mut this) = self; - this.0.annotations = annotations; - Self::Custom(this) - } -} - /// Access information about the error impl Error { pub fn message(&self) -> &str { @@ -216,8 +233,9 @@ macro_rules! fail { { #[allow(unused)] use $crate::internal::error::Context; - let annotations = $context.annotations(); - return Err($crate::internal::error::Error::custom(format!($($tt)*)).with_annotations(annotations)) + let $crate::internal::error::Error::Custom(mut err) = $crate::internal::error::Error::custom(format!($($tt)*)); + $context.annotate(&mut err.0.annotations); + return Err($crate::internal::error::Error::Custom(err)); } }; ($($tt:tt)*) => { diff --git a/serde_arrow/src/internal/serialization/array_builder.rs b/serde_arrow/src/internal/serialization/array_builder.rs index 3f9281aa..75eb6ef5 100644 --- a/serde_arrow/src/internal/serialization/array_builder.rs +++ b/serde_arrow/src/internal/serialization/array_builder.rs @@ -112,8 +112,8 @@ impl ArrayBuilder { } impl Context for ArrayBuilder { - fn annotations(&self) -> BTreeMap { - dispatch!(self, Self(builder) => builder.annotations()) + fn annotate(&self, annotations: &mut BTreeMap) { + dispatch!(self, Self(builder) => builder.annotate(annotations)) } } diff --git a/serde_arrow/src/internal/serialization/binary_builder.rs b/serde_arrow/src/internal/serialization/binary_builder.rs index 2126dea6..b5cb2ff4 100644 --- a/serde_arrow/src/internal/serialization/binary_builder.rs +++ b/serde_arrow/src/internal/serialization/binary_builder.rs @@ -4,10 +4,10 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, BytesArray}, - error::{Context, ContextSupport, Result}, + error::{set_default, Context, ContextSupport, Result}, utils::{ array_ext::{new_bytes_array, ArrayExt, ScalarArrayExt, SeqArrayExt}, - btree_map, Mut, NamedType, Offset, + Mut, NamedType, Offset, }, }; @@ -79,13 +79,17 @@ impl BinaryBuilder { } impl Context for BinaryBuilder { - fn annotations(&self) -> std::collections::BTreeMap { - let data_type = match O::NAME { - "i32" => "Binary", - "i64" => "LargeBinary", - _ => "", - }; - btree_map!("field" => self.path.clone(), "data_type" => data_type) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match O::NAME { + "i32" => "Binary", + "i64" => "LargeBinary", + _ => "", + }, + ); } } @@ -142,9 +146,7 @@ impl SimpleSerializer for BinaryBuilder { struct U8Serializer(u8); impl Context for U8Serializer { - fn annotations(&self) -> BTreeMap { - Default::default() - } + fn annotate(&self, _: &mut BTreeMap) {} } impl SimpleSerializer for U8Serializer { diff --git a/serde_arrow/src/internal/serialization/bool_builder.rs b/serde_arrow/src/internal/serialization/bool_builder.rs index 3976f155..08b64877 100644 --- a/serde_arrow/src/internal/serialization/bool_builder.rs +++ b/serde_arrow/src/internal/serialization/bool_builder.rs @@ -2,11 +2,8 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, BooleanArray}, - error::{Context, ContextSupport, Result}, - utils::{ - array_ext::{set_bit_buffer, set_validity, set_validity_default}, - btree_map, - }, + error::{set_default, try_, Context, ContextSupport, Result}, + utils::array_ext::{set_bit_buffer, set_validity, set_validity_default}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -50,30 +47,40 @@ impl BoolBuilder { } impl Context for BoolBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Boolean") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Boolean"); } } impl SimpleSerializer for BoolBuilder { fn serialize_default(&mut self) -> Result<()> { - set_validity_default(self.array.validity.as_mut(), self.array.len); - set_bit_buffer(&mut self.array.values, self.array.len, false); - self.array.len += 1; - Ok(()) + try_(|| { + set_validity_default(self.array.validity.as_mut(), self.array.len); + set_bit_buffer(&mut self.array.values, self.array.len, false); + self.array.len += 1; + Ok(()) + }) + .ctx(self) } fn serialize_none(&mut self) -> Result<()> { - set_validity(self.array.validity.as_mut(), self.array.len, false).ctx(self)?; - set_bit_buffer(&mut self.array.values, self.array.len, false); - self.array.len += 1; - Ok(()) + try_(|| { + set_validity(self.array.validity.as_mut(), self.array.len, false)?; + set_bit_buffer(&mut self.array.values, self.array.len, false); + self.array.len += 1; + Ok(()) + }) + .ctx(self) } fn serialize_bool(&mut self, v: bool) -> Result<()> { - set_validity(self.array.validity.as_mut(), self.array.len, true).ctx(self)?; - set_bit_buffer(&mut self.array.values, self.array.len, v); - self.array.len += 1; - Ok(()) + try_(|| { + set_validity(self.array.validity.as_mut(), self.array.len, true)?; + set_bit_buffer(&mut self.array.values, self.array.len, v); + self.array.len += 1; + Ok(()) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/date32_builder.rs b/serde_arrow/src/internal/serialization/date32_builder.rs index e01a4d8a..6fefed26 100644 --- a/serde_arrow/src/internal/serialization/date32_builder.rs +++ b/serde_arrow/src/internal/serialization/date32_builder.rs @@ -4,11 +4,8 @@ use chrono::{NaiveDate, NaiveDateTime}; use crate::internal::{ arrow::{Array, PrimitiveArray}, - error::{Context, ContextSupport, Result}, - utils::{ - array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, - }, + error::{set_default, try_, Context, ContextSupport, Result}, + utils::array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -54,26 +51,30 @@ impl Date32Builder { } impl Context for Date32Builder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Date32") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Date32"); } } impl SimpleSerializer for Date32Builder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - let days_since_epoch = self.parse_str_to_days_since_epoch(v).ctx(self)?; - self.array.push_scalar_value(days_since_epoch).ctx(self) + try_(|| { + let days_since_epoch = self.parse_str_to_days_since_epoch(v)?; + self.array.push_scalar_value(days_since_epoch) + }) + .ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.array.push_scalar_value(v).ctx(self) + try_(|| self.array.push_scalar_value(v)).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/date64_builder.rs b/serde_arrow/src/internal/serialization/date64_builder.rs index 7d85feb2..405d9b28 100644 --- a/serde_arrow/src/internal/serialization/date64_builder.rs +++ b/serde_arrow/src/internal/serialization/date64_builder.rs @@ -2,11 +2,8 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, PrimitiveArray, TimeUnit, TimestampArray}, - error::{fail, Context, ContextSupport, Result}, - utils::{ - array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, - }, + error::{fail, set_default, try_, Context, ContextSupport, Result}, + utils::array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -94,31 +91,38 @@ impl Date64Builder { } impl Context for Date64Builder { - fn annotations(&self) -> BTreeMap { - let data_type = if self.meta.is_some() { - "Timestamp(..)" - } else { - "Date64" - }; - btree_map!("field" => self.path.clone(), "data_type" => data_type) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + if self.meta.is_some() { + "Timestamp(..)" + } else { + "Date64" + }, + ); } } impl SimpleSerializer for Date64Builder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - let timestamp = self.parse_str_to_timestamp(v).ctx(self)?; - self.array.push_scalar_value(timestamp) + try_(|| { + let timestamp = self.parse_str_to_timestamp(v)?; + self.array.push_scalar_value(timestamp) + }) + .ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.array.push_scalar_value(v).ctx(self) + try_(|| self.array.push_scalar_value(v)).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/decimal_builder.rs b/serde_arrow/src/internal/serialization/decimal_builder.rs index d1616ed3..0d4a22fb 100644 --- a/serde_arrow/src/internal/serialization/decimal_builder.rs +++ b/serde_arrow/src/internal/serialization/decimal_builder.rs @@ -2,10 +2,9 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, DecimalArray, PrimitiveArray}, - error::{Context, ContextSupport, Result}, + error::{set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, decimal::{self, DecimalParser}, }, }; @@ -63,39 +62,38 @@ impl DecimalBuilder { } impl Context for DecimalBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Decimal128(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "filed", &self.path); + set_default(annotations, "data_type", "Decimal128(..)"); } } impl SimpleSerializer for DecimalBuilder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_f32(&mut self, v: f32) -> Result<()> { - self.array - .push_scalar_value((v * self.f32_factor) as i128) - .ctx(self) + try_(|| self.array.push_scalar_value((v * self.f32_factor) as i128)).ctx(self) } fn serialize_f64(&mut self, v: f64) -> Result<()> { - self.array - .push_scalar_value((v * self.f64_factor) as i128) - .ctx(self) + try_(|| self.array.push_scalar_value((v * self.f64_factor) as i128)).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - let mut parse_buffer = [0; decimal::BUFFER_SIZE_I128]; - let val = self - .parser - .parse_decimal128(&mut parse_buffer, v.as_bytes()) - .ctx(self)?; + try_(|| { + let mut parse_buffer = [0; decimal::BUFFER_SIZE_I128]; + let val = self + .parser + .parse_decimal128(&mut parse_buffer, v.as_bytes())?; - self.array.push_scalar_value(val).ctx(self) + self.array.push_scalar_value(val) + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/dictionary_utf8_builder.rs b/serde_arrow/src/internal/serialization/dictionary_utf8_builder.rs index 61998668..66a50f81 100644 --- a/serde_arrow/src/internal/serialization/dictionary_utf8_builder.rs +++ b/serde_arrow/src/internal/serialization/dictionary_utf8_builder.rs @@ -4,8 +4,8 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, DictionaryArray}, - error::{fail, Context, ContextSupport, Result}, - utils::{btree_map, Mut}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, + utils::Mut, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -50,32 +50,35 @@ impl DictionaryUtf8Builder { } impl Context for DictionaryUtf8Builder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Dictionary(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Dictionary(..)"); } } impl SimpleSerializer for DictionaryUtf8Builder { fn serialize_default(&mut self) -> Result<()> { - self.indices.serialize_none().ctx(self) + try_(|| self.indices.serialize_none()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.indices.serialize_none().ctx(self) + try_(|| self.indices.serialize_none().ctx(self)).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - // the only faillible operations concern children: do not apply the context - let idx = match self.index.get(v) { - Some(idx) => *idx, - None => { - let idx = self.index.len(); - self.values.serialize_str(v)?; - self.index.insert(v.to_string(), idx); - idx - } - }; - idx.serialize(Mut(self.indices.as_mut())) + try_(|| { + let idx = match self.index.get(v) { + Some(idx) => *idx, + None => { + let idx = self.index.len(); + self.values.serialize_str(v)?; + self.index.insert(v.to_string(), idx); + idx + } + }; + idx.serialize(Mut(self.indices.as_mut())) + }) + .ctx(self) } fn serialize_unit_variant( @@ -84,8 +87,7 @@ impl SimpleSerializer for DictionaryUtf8Builder { _: u32, variant: &'static str, ) -> Result<()> { - // NOTE: context logic is implemented in serialize_str - self.serialize_str(variant) + try_(|| self.serialize_str(variant)).ctx(self) } fn serialize_tuple_variant_start<'this>( diff --git a/serde_arrow/src/internal/serialization/duration_builder.rs b/serde_arrow/src/internal/serialization/duration_builder.rs index a1e37b09..2c73e250 100644 --- a/serde_arrow/src/internal/serialization/duration_builder.rs +++ b/serde_arrow/src/internal/serialization/duration_builder.rs @@ -2,11 +2,8 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, PrimitiveArray, TimeArray, TimeUnit}, - error::{Context, ContextSupport, Result}, - utils::{ - array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, - }, + error::{set_default, try_, Context, ContextSupport, Result}, + utils::array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -49,51 +46,50 @@ impl DurationBuilder { } impl Context for DurationBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Duration(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Duration(..)"); } } impl SimpleSerializer for DurationBuilder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_i8(&mut self, v: i8) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_i16(&mut self, v: i16) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.array.push_scalar_value(v).ctx(self) + try_(|| self.array.push_scalar_value(v)).ctx(self) } fn serialize_u8(&mut self, v: u8) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_u16(&mut self, v: u16) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_u32(&mut self, v: u32) -> Result<()> { - self.array.push_scalar_value(i64::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(i64::from(v))).ctx(self) } fn serialize_u64(&mut self, v: u64) -> Result<()> { - self.array - .push_scalar_value(i64::try_from(v).ctx(self)?) - .ctx(self) + try_(|| self.array.push_scalar_value(i64::try_from(v)?)).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/fixed_size_binary_builder.rs b/serde_arrow/src/internal/serialization/fixed_size_binary_builder.rs index 32ca9b85..1b5cfa48 100644 --- a/serde_arrow/src/internal/serialization/fixed_size_binary_builder.rs +++ b/serde_arrow/src/internal/serialization/fixed_size_binary_builder.rs @@ -4,10 +4,10 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, FixedSizeBinaryArray}, - error::{fail, Context, ContextSupport, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{ArrayExt, CountArray, SeqArrayExt}, - btree_map, Mut, + Mut, }, }; @@ -86,86 +86,93 @@ impl FixedSizeBinaryBuilder { } impl Context for FixedSizeBinaryBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "FixedSizeBinary(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "FixedSizeBinary(..)"); } } impl SimpleSerializer for FixedSizeBinaryBuilder { fn serialize_default(&mut self) -> Result<()> { - self.seq.push_seq_default().ctx(self)?; - for _ in 0..self.n { - self.buffer.push(0); - } - Ok(()) + try_(|| { + self.seq.push_seq_default()?; + for _ in 0..self.n { + self.buffer.push(0); + } + Ok(()) + }) + .ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.seq.push_seq_none().ctx(self)?; - for _ in 0..self.n { - self.buffer.push(0); - } - Ok(()) + try_(|| { + self.seq.push_seq_none()?; + for _ in 0..self.n { + self.buffer.push(0); + } + Ok(()) + }) + .ctx(self) } fn serialize_seq_start(&mut self, _: Option) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_seq_element(&mut self, value: &V) -> Result<()> { - self.element(value).ctx(self) + try_(|| self.element(value)).ctx(self) } fn serialize_seq_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_start(&mut self, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_element(&mut self, value: &V) -> Result<()> { - self.element(value).ctx(self) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_struct_start(&mut self, _: &'static str, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_struct_field(&mut self, value: &V) -> Result<()> { - self.element(value).ctx(self) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_struct_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_bytes(&mut self, v: &[u8]) -> Result<()> { - if v.len() != self.n { - fail!( - in self, - "Invalid number of elements for fixed size binary: got {actual}, expected {expected}", - actual = v.len(), - expected = self.n, - ); - } - - self.seq.start_seq().ctx(self)?; - self.buffer.extend(v); - self.seq.end_seq().ctx(self) + try_(|| { + if v.len() != self.n { + fail!( + in self, + "Invalid number of elements for fixed size binary: got {actual}, expected {expected}", + actual = v.len(), + expected = self.n, + ); + } + + self.seq.start_seq()?; + self.buffer.extend(v); + self.seq.end_seq() + }).ctx(self) } } struct U8Serializer(u8); impl Context for U8Serializer { - fn annotations(&self) -> BTreeMap { - btree_map!() - } + fn annotate(&self, _: &mut BTreeMap) {} } impl SimpleSerializer for U8Serializer { diff --git a/serde_arrow/src/internal/serialization/fixed_size_list_builder.rs b/serde_arrow/src/internal/serialization/fixed_size_list_builder.rs index 0c986bd7..03595b23 100644 --- a/serde_arrow/src/internal/serialization/fixed_size_list_builder.rs +++ b/serde_arrow/src/internal/serialization/fixed_size_list_builder.rs @@ -4,10 +4,10 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, FieldMeta, FixedSizeListArray}, - error::{fail, Context, ContextSupport, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{ArrayExt, CountArray, SeqArrayExt}, - btree_map, Mut, + Mut, }, }; @@ -76,7 +76,7 @@ impl FixedSizeListBuilder { fn element(&mut self, value: &V) -> Result<()> { self.current_count += 1; - self.seq.push_seq_elements(1).ctx(self)?; + self.seq.push_seq_elements(1)?; value.serialize(Mut(self.element.as_mut())) } @@ -94,61 +94,68 @@ impl FixedSizeListBuilder { } impl Context for FixedSizeListBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "FixedSizeList(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "FixedSizeList(..)"); } } impl SimpleSerializer for FixedSizeListBuilder { fn serialize_default(&mut self) -> Result<()> { - self.seq.push_seq_default().ctx(self)?; - for _ in 0..self.n { - self.element.serialize_default()?; - } - Ok(()) + try_(|| { + self.seq.push_seq_default()?; + for _ in 0..self.n { + self.element.serialize_default()?; + } + Ok(()) + }) + .ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.seq.push_seq_none().ctx(self)?; - for _ in 0..self.n { - self.element.serialize_default()?; - } - Ok(()) + try_(|| { + self.seq.push_seq_none()?; + for _ in 0..self.n { + self.element.serialize_default()?; + } + Ok(()) + }) + .ctx(self) } fn serialize_seq_start(&mut self, _: Option) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_seq_element(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_seq_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_start(&mut self, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_element(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_struct_start(&mut self, _: &'static str, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_struct_field(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_struct_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/float_builder.rs b/serde_arrow/src/internal/serialization/float_builder.rs index 7769e8ba..5eb6ad58 100644 --- a/serde_arrow/src/internal/serialization/float_builder.rs +++ b/serde_arrow/src/internal/serialization/float_builder.rs @@ -4,10 +4,10 @@ use half::f16; use crate::internal::{ arrow::{Array, PrimitiveArray}, - error::{Context, ContextSupport, Result}, + error::{set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, Mut, + Mut, }, }; @@ -58,141 +58,144 @@ impl_into_array!(f32, F32, Float32); impl_into_array!(f64, F64, Float64); impl Context for FloatBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Float16") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Float16"); } } impl Context for FloatBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Float32") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Float32"); } } impl Context for FloatBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Float64") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Float64"); } } impl SimpleSerializer for FloatBuilder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_some(&mut self, value: &V) -> Result<()> { - value.serialize(Mut(self)) + try_(|| value.serialize(Mut(&mut *self))).ctx(self) } fn serialize_i8(&mut self, v: i8) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_i16(&mut self, v: i16) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_u8(&mut self, v: u8) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_u16(&mut self, v: u16) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_u32(&mut self, v: u32) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_u64(&mut self, v: u64) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } fn serialize_f32(&mut self, v: f32) -> Result<()> { - self.array.push_scalar_value(v).ctx(self) + try_(|| self.array.push_scalar_value(v)).ctx(self) } fn serialize_f64(&mut self, v: f64) -> Result<()> { - self.array.push_scalar_value(v as f32).ctx(self) + try_(|| self.array.push_scalar_value(v as f32)).ctx(self) } } impl SimpleSerializer for FloatBuilder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_i8(&mut self, v: i8) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_i16(&mut self, v: i16) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_u8(&mut self, v: u8) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_u16(&mut self, v: u16) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_u32(&mut self, v: u32) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_u64(&mut self, v: u64) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_f32(&mut self, v: f32) -> Result<()> { - self.array.push_scalar_value(v as f64).ctx(self) + try_(|| self.array.push_scalar_value(v as f64)).ctx(self) } fn serialize_f64(&mut self, v: f64) -> Result<()> { - self.array.push_scalar_value(v).ctx(self) + try_(|| self.array.push_scalar_value(v)).ctx(self) } } impl SimpleSerializer for FloatBuilder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_f32(&mut self, v: f32) -> Result<()> { - self.array.push_scalar_value(f16::from_f32(v)).ctx(self) + try_(|| self.array.push_scalar_value(f16::from_f32(v))).ctx(self) } fn serialize_f64(&mut self, v: f64) -> Result<()> { - self.array.push_scalar_value(f16::from_f64(v)).ctx(self) + try_(|| self.array.push_scalar_value(f16::from_f64(v))).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/int_builder.rs b/serde_arrow/src/internal/serialization/int_builder.rs index 7f65e4f7..34fa7e3a 100644 --- a/serde_arrow/src/internal/serialization/int_builder.rs +++ b/serde_arrow/src/internal/serialization/int_builder.rs @@ -2,10 +2,10 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, PrimitiveArray}, - error::{Context, ContextSupport, Error, Result}, + error::{set_default, try_, Context, ContextSupport, Error, Result}, utils::{ array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, NamedType, + NamedType, }, }; @@ -61,29 +61,23 @@ impl_into_array!(u32, U32, UInt32); impl_into_array!(u64, U64, UInt64); impl Context for IntBuilder { - fn annotations(&self) -> BTreeMap { - let data_type = match I::NAME { - "i8" => "Int8", - "i16" => "Int16", - "i32" => "Int32", - "i64" => "Int64", - "u8" => "UInt8", - "u16" => "UInt16", - "u32" => "UInt32", - "u64" => "UInt64", - _ => "", - }; - btree_map!("field" => self.path.clone(), "data_type" => data_type) - } -} - -impl IntBuilder { - fn push_value(&mut self, v: J) -> Result<()> - where - I: Default + TryFrom + 'static, - Error: From<>::Error>, - { - self.array.push_scalar_value(I::try_from(v)?) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match I::NAME { + "i8" => "Int8", + "i16" => "Int16", + "i32" => "Int32", + "i64" => "Int64", + "u8" => "UInt8", + "u16" => "UInt16", + "u32" => "UInt32", + "u64" => "UInt64", + _ => "", + }, + ); } } @@ -110,51 +104,54 @@ where Error: From<>::Error>, { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_bool(&mut self, v: bool) -> Result<()> { - let v: u8 = if v { 1 } else { 0 }; - self.push_value(v).ctx(self) + try_(|| { + let v: u8 = if v { 1 } else { 0 }; + self.array.push_scalar_value(I::try_from(v)?) + }) + .ctx(self) } fn serialize_i8(&mut self, v: i8) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_i16(&mut self, v: i16) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_u8(&mut self, v: u8) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_u16(&mut self, v: u16) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_u32(&mut self, v: u32) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_u64(&mut self, v: u64) -> Result<()> { - self.push_value(v).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(v)?)).ctx(self) } fn serialize_char(&mut self, v: char) -> Result<()> { - self.push_value(u32::from(v)).ctx(self) + try_(|| self.array.push_scalar_value(I::try_from(u32::from(v))?)).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/list_builder.rs b/serde_arrow/src/internal/serialization/list_builder.rs index d7efb6d3..321eb95b 100644 --- a/serde_arrow/src/internal/serialization/list_builder.rs +++ b/serde_arrow/src/internal/serialization/list_builder.rs @@ -4,10 +4,10 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, FieldMeta, ListArray}, - error::{Context, ContextSupport, Result}, + error::{set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{ArrayExt, OffsetsArray, SeqArrayExt}, - btree_map, Mut, NamedType, Offset, + Mut, NamedType, Offset, }, }; @@ -82,7 +82,7 @@ impl ListBuilder { } fn element(&mut self, value: &V) -> Result<()> { - self.offsets.push_seq_elements(1).ctx(self)?; + self.offsets.push_seq_elements(1)?; value.serialize(Mut(self.element.as_mut())) } @@ -92,66 +92,73 @@ impl ListBuilder { } impl Context for ListBuilder { - fn annotations(&self) -> BTreeMap { - let data_type = if O::NAME == "i32" { - "List" - } else { - "LargeList" - }; - btree_map!("field" => self.path.clone(), "data_type" => data_type) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + if O::NAME == "i32" { + "List" + } else { + "LargeList" + }, + ); } } impl SimpleSerializer for ListBuilder { fn serialize_default(&mut self) -> Result<()> { - self.offsets.push_seq_default().ctx(self) + try_(|| self.offsets.push_seq_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.offsets.push_seq_none().ctx(self) + try_(|| self.offsets.push_seq_none()).ctx(self) } fn serialize_seq_start(&mut self, _: Option) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_seq_element(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_seq_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_start(&mut self, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_element(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_struct_start(&mut self, _: &'static str, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_struct_field(&mut self, value: &V) -> Result<()> { - self.element(value) + try_(|| self.element(value)).ctx(self) } fn serialize_tuple_struct_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_bytes(&mut self, v: &[u8]) -> Result<()> { - self.start().ctx(self)?; - for item in v { - self.element(item)?; - } - self.end().ctx(self) + try_(|| { + self.start()?; + for item in v { + self.element(item)?; + } + self.end() + }) + .ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/map_builder.rs b/serde_arrow/src/internal/serialization/map_builder.rs index 09018096..b7529cce 100644 --- a/serde_arrow/src/internal/serialization/map_builder.rs +++ b/serde_arrow/src/internal/serialization/map_builder.rs @@ -4,11 +4,8 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, FieldMeta, ListArray}, - error::{fail, Context, ContextSupport, Result}, - utils::{ - array_ext::{ArrayExt, OffsetsArray, SeqArrayExt}, - btree_map, - }, + error::{fail, set_default, try_, Context, ContextSupport, Result}, + utils::array_ext::{ArrayExt, OffsetsArray, SeqArrayExt}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -71,36 +68,43 @@ impl MapBuilder { } impl Context for MapBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Map(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Map(..)"); } } impl SimpleSerializer for MapBuilder { fn serialize_default(&mut self) -> Result<()> { - self.offsets.push_seq_default().ctx(self) + try_(|| self.offsets.push_seq_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.offsets.push_seq_none().ctx(self) + try_(|| self.offsets.push_seq_none()).ctx(self) } fn serialize_map_start(&mut self, _: Option) -> Result<()> { - self.offsets.start_seq().ctx(self) + try_(|| self.offsets.start_seq()).ctx(self) } fn serialize_map_key(&mut self, key: &V) -> Result<()> { - self.offsets.push_seq_elements(1).ctx(self)?; - self.entry.serialize_tuple_start(2).ctx(self)?; - self.entry.serialize_tuple_element(key) + try_(|| { + self.offsets.push_seq_elements(1)?; + self.entry.serialize_tuple_start(2)?; + self.entry.serialize_tuple_element(key) + }) + .ctx(self) } fn serialize_map_value(&mut self, value: &V) -> Result<()> { - self.entry.serialize_tuple_element(value)?; - self.entry.serialize_tuple_end().ctx(self) + try_(|| { + self.entry.serialize_tuple_element(value)?; + self.entry.serialize_tuple_end() + }) + .ctx(self) } fn serialize_map_end(&mut self) -> Result<()> { - self.offsets.end_seq().ctx(self) + try_(|| self.offsets.end_seq()).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/null_builder.rs b/serde_arrow/src/internal/serialization/null_builder.rs index d8e056ac..a08c6c87 100644 --- a/serde_arrow/src/internal/serialization/null_builder.rs +++ b/serde_arrow/src/internal/serialization/null_builder.rs @@ -2,8 +2,7 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, NullArray}, - error::{Context, Result}, - utils::btree_map, + error::{set_default, Context, Result}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -36,8 +35,9 @@ impl NullBuilder { } impl Context for NullBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Null") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Null"); } } diff --git a/serde_arrow/src/internal/serialization/outer_sequence_builder.rs b/serde_arrow/src/internal/serialization/outer_sequence_builder.rs index 0e5fa071..f727ef85 100644 --- a/serde_arrow/src/internal/serialization/outer_sequence_builder.rs +++ b/serde_arrow/src/internal/serialization/outer_sequence_builder.rs @@ -11,7 +11,7 @@ use crate::internal::{ fixed_size_binary_builder::FixedSizeBinaryBuilder, fixed_size_list_builder::FixedSizeListBuilder, }, - utils::{btree_map, meta_from_field, Mut}, + utils::{btree_map, meta_from_field, ChildName, Mut}, }; use super::{ @@ -62,8 +62,8 @@ impl OuterSequenceBuilder { } impl Context for OuterSequenceBuilder { - fn annotations(&self) -> BTreeMap { - self.0.annotations() + fn annotate(&self, annotations: &mut BTreeMap) { + self.0.annotate(annotations) } } @@ -123,7 +123,7 @@ fn build_struct(path: String, struct_fields: &[Field], nullable: bool) -> Result fn build_builder(path: String, field: &Field) -> Result { use {ArrayBuilder as A, DataType as T}; - let ctx: BTreeMap = btree_map!("path" => path.clone()); + let ctx: BTreeMap = btree_map!("field" => path.clone()); let builder = match &field.data_type { T::Null => match get_strategy_from_metadata(&field.metadata)? { @@ -285,15 +285,3 @@ fn is_utc_strategy(strategy: Option<&Strategy>) -> Result { Some(st) => fail!("Cannot builder Date64 builder with strategy {st}"), } } - -struct ChildName<'a>(&'a str); - -impl<'a> std::fmt::Display for ChildName<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if !self.0.is_empty() { - write!(f, "{}", self.0) - } else { - write!(f, "") - } - } -} diff --git a/serde_arrow/src/internal/serialization/struct_builder.rs b/serde_arrow/src/internal/serialization/struct_builder.rs index 4b0786ea..d1a2e9e3 100644 --- a/serde_arrow/src/internal/serialization/struct_builder.rs +++ b/serde_arrow/src/internal/serialization/struct_builder.rs @@ -4,10 +4,10 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, FieldMeta, StructArray}, - error::{fail, Context, ContextSupport, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{ArrayExt, CountArray, SeqArrayExt}, - btree_map, Mut, + Mut, }, }; @@ -110,7 +110,7 @@ impl StructBuilder { } fn element(&mut self, idx: usize, value: &T) -> Result<()> { - self.seq.push_seq_elements(1).ctx(self)?; + self.seq.push_seq_elements(1)?; if self.seen[idx] { fail!(in self, "Duplicate field {key}", key = self.fields[idx].1.name); } @@ -123,31 +123,38 @@ impl StructBuilder { } impl Context for StructBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Struct(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Struct(..)"); } } impl SimpleSerializer for StructBuilder { fn serialize_default(&mut self) -> Result<()> { - self.seq.push_seq_default().ctx(self)?; - for (builder, _) in &mut self.fields { - builder.serialize_default()?; - } + try_(|| { + self.seq.push_seq_default()?; + for (builder, _) in &mut self.fields { + builder.serialize_default()?; + } - Ok(()) + Ok(()) + }) + .ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.seq.push_seq_none().ctx(self)?; - for (builder, _) in &mut self.fields { - builder.serialize_default()?; - } - Ok(()) + try_(|| { + self.seq.push_seq_none()?; + for (builder, _) in &mut self.fields { + builder.serialize_default()?; + } + Ok(()) + }) + .ctx(self) } fn serialize_struct_start(&mut self, _: &'static str, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_struct_field( @@ -155,72 +162,83 @@ impl SimpleSerializer for StructBuilder { key: &'static str, value: &T, ) -> Result<()> { - let Some(idx) = self.lookup.lookup(self.next, key) else { - // ignore unknown fields - return Ok(()); - }; - self.element(idx, value) + try_(|| { + let Some(idx) = self.lookup.lookup(self.next, key) else { + // ignore unknown fields + return Ok(()); + }; + self.element(idx, value) + }) + .ctx(self) } fn serialize_struct_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_start(&mut self, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_element(&mut self, value: &V) -> Result<()> { - self.element(self.next, value) + try_(|| self.element(self.next, value)).ctx(self) } fn serialize_tuple_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_tuple_struct_start(&mut self, _: &'static str, _: usize) -> Result<()> { - self.start().ctx(self) + try_(|| self.start()).ctx(self) } fn serialize_tuple_struct_field(&mut self, value: &V) -> Result<()> { - // ignore extra tuple fields - if self.next < self.fields.len() { - self.element(self.next, value)?; - } - Ok(()) + try_(|| { + // ignore extra tuple fields + if self.next < self.fields.len() { + self.element(self.next, value)?; + } + Ok(()) + }) + .ctx(self) } fn serialize_tuple_struct_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } fn serialize_map_start(&mut self, _: Option) -> Result<()> { - self.start()?; - // always re-set to an invalid field to force that `_key()` is called before `_value()`. - self.next = UNKNOWN_KEY; - Ok(()) + try_(|| { + self.start()?; + // always re-set to an invalid field to force that `_key()` is called before `_value()`. + self.next = UNKNOWN_KEY; + Ok(()) + }) + .ctx(self) } fn serialize_map_key(&mut self, key: &V) -> Result<()> { - self.next = self - .lookup - .lookup_serialize(key) - .ctx(self)? - .unwrap_or(UNKNOWN_KEY); - Ok(()) + try_(|| { + self.next = self.lookup.lookup_serialize(key)?.unwrap_or(UNKNOWN_KEY); + Ok(()) + }) + .ctx(self) } fn serialize_map_value(&mut self, value: &V) -> Result<()> { - if self.next != UNKNOWN_KEY { - self.element(self.next, value)?; - } - // see serialize_map_start - self.next = UNKNOWN_KEY; - Ok(()) + try_(|| { + if self.next != UNKNOWN_KEY { + self.element(self.next, value)?; + } + // see serialize_map_start + self.next = UNKNOWN_KEY; + Ok(()) + }) + .ctx(self) } fn serialize_map_end(&mut self) -> Result<()> { - self.end().ctx(self) + try_(|| self.end()).ctx(self) } } @@ -301,9 +319,7 @@ impl<'a> KeyLookupSerializer<'a> { } impl<'a> Context for KeyLookupSerializer<'a> { - fn annotations(&self) -> BTreeMap { - btree_map!() - } + fn annotate(&self, _: &mut BTreeMap) {} } impl<'a> SimpleSerializer for KeyLookupSerializer<'a> { diff --git a/serde_arrow/src/internal/serialization/time_builder.rs b/serde_arrow/src/internal/serialization/time_builder.rs index 8728d068..d52543ec 100644 --- a/serde_arrow/src/internal/serialization/time_builder.rs +++ b/serde_arrow/src/internal/serialization/time_builder.rs @@ -4,10 +4,10 @@ use chrono::Timelike; use crate::internal::{ arrow::{Array, PrimitiveArray, TimeArray, TimeUnit}, - error::{Context, ContextSupport, Error, Result}, + error::{set_default, try_, Context, ContextSupport, Error, Result}, utils::{ array_ext::{new_primitive_array, ArrayExt, ScalarArrayExt}, - btree_map, NamedType, + NamedType, }, }; @@ -71,13 +71,17 @@ impl TimeBuilder { } impl Context for TimeBuilder { - fn annotations(&self) -> BTreeMap { - let data_type = match I::NAME { - "i32" => "Time32", - "i64" => "Time64", - _ => "", - }; - btree_map!("field" => self.path.clone(), "data_type" => data_type) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + match I::NAME { + "i32" => "Time32", + "i64" => "Time64", + _ => "", + }, + ); } } @@ -88,40 +92,37 @@ where Error: From<>::Error>, { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - let (seconds_factor, nanoseconds_factor) = match self.unit { - TimeUnit::Nanosecond => (1_000_000_000, 1), - TimeUnit::Microsecond => (1_000_000, 1_000), - TimeUnit::Millisecond => (1_000, 1_000_000), - TimeUnit::Second => (1, 1_000_000_000), - }; - - use chrono::naive::NaiveTime; - let time = v.parse::().ctx(self)?; - let timestamp = i64::from(time.num_seconds_from_midnight()) * seconds_factor - + i64::from(time.nanosecond()) / nanoseconds_factor; - - self.array - .push_scalar_value(timestamp.try_into().ctx(self)?) - .ctx(self) + try_(|| { + let (seconds_factor, nanoseconds_factor) = match self.unit { + TimeUnit::Nanosecond => (1_000_000_000, 1), + TimeUnit::Microsecond => (1_000_000, 1_000), + TimeUnit::Millisecond => (1_000, 1_000_000), + TimeUnit::Second => (1, 1_000_000_000), + }; + + use chrono::naive::NaiveTime; + let time = v.parse::()?; + let timestamp = i64::from(time.num_seconds_from_midnight()) * seconds_factor + + i64::from(time.nanosecond()) / nanoseconds_factor; + + self.array.push_scalar_value(timestamp.try_into()?) + }) + .ctx(self) } fn serialize_i32(&mut self, v: i32) -> Result<()> { - self.array - .push_scalar_value(v.try_into().ctx(self)?) - .ctx(self) + try_(|| self.array.push_scalar_value(v.try_into()?)).ctx(self) } fn serialize_i64(&mut self, v: i64) -> Result<()> { - self.array - .push_scalar_value(v.try_into().ctx(self)?) - .ctx(self) + try_(|| self.array.push_scalar_value(v.try_into()?)).ctx(self) } } diff --git a/serde_arrow/src/internal/serialization/union_builder.rs b/serde_arrow/src/internal/serialization/union_builder.rs index 1a9b7483..c675d480 100644 --- a/serde_arrow/src/internal/serialization/union_builder.rs +++ b/serde_arrow/src/internal/serialization/union_builder.rs @@ -2,8 +2,8 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, DenseUnionArray, FieldMeta}, - error::{fail, Context, ContextSupport, Result}, - utils::{btree_map, Mut}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, + utils::Mut, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -76,8 +76,9 @@ impl UnionBuilder { } impl Context for UnionBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "Union(..)") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", "Union(..)"); } } @@ -88,10 +89,10 @@ impl SimpleSerializer for UnionBuilder { variant_index: u32, _: &'static str, ) -> Result<()> { - let ctx = self.annotations(); - self.serialize_variant(variant_index) - .ctx(&ctx)? - .serialize_unit() + let mut ctx = BTreeMap::new(); + self.annotate(&mut ctx); + + try_(|| self.serialize_variant(variant_index)?.serialize_unit()).ctx(&ctx) } fn serialize_newtype_variant( @@ -101,9 +102,14 @@ impl SimpleSerializer for UnionBuilder { _: &'static str, value: &V, ) -> Result<()> { - let ctx = self.annotations(); - let variant_builder = self.serialize_variant(variant_index).ctx(&ctx)?; - value.serialize(Mut(variant_builder)) + let mut ctx = BTreeMap::new(); + self.annotate(&mut ctx); + + try_(|| { + let variant_builder = self.serialize_variant(variant_index)?; + value.serialize(Mut(variant_builder)) + }) + .ctx(&ctx) } fn serialize_struct_variant_start<'this>( @@ -113,10 +119,15 @@ impl SimpleSerializer for UnionBuilder { variant: &'static str, len: usize, ) -> Result<&'this mut ArrayBuilder> { - let ctx = self.annotations(); - let variant_builder = self.serialize_variant(variant_index).ctx(&ctx)?; - variant_builder.serialize_struct_start(variant, len)?; - Ok(variant_builder) + let mut ctx = BTreeMap::new(); + self.annotate(&mut ctx); + + try_(|| { + let variant_builder = self.serialize_variant(variant_index)?; + variant_builder.serialize_struct_start(variant, len)?; + Ok(variant_builder) + }) + .ctx(&ctx) } fn serialize_tuple_variant_start<'this>( @@ -126,9 +137,14 @@ impl SimpleSerializer for UnionBuilder { variant: &'static str, len: usize, ) -> Result<&'this mut ArrayBuilder> { - let ctx = self.annotations(); - let variant_builder = self.serialize_variant(variant_index).ctx(&ctx)?; - variant_builder.serialize_tuple_struct_start(variant, len)?; - Ok(variant_builder) + let mut ctx = BTreeMap::new(); + self.annotate(&mut ctx); + + try_(|| { + let variant_builder = self.serialize_variant(variant_index)?; + variant_builder.serialize_tuple_struct_start(variant, len)?; + Ok(variant_builder) + }) + .ctx(&ctx) } } diff --git a/serde_arrow/src/internal/serialization/unknown_variant_builder.rs b/serde_arrow/src/internal/serialization/unknown_variant_builder.rs index 753cee8c..7cb164e4 100644 --- a/serde_arrow/src/internal/serialization/unknown_variant_builder.rs +++ b/serde_arrow/src/internal/serialization/unknown_variant_builder.rs @@ -4,8 +4,7 @@ use serde::Serialize; use crate::internal::{ arrow::{Array, NullArray}, - error::{fail, Context, Result}, - utils::btree_map, + error::{fail, set_default, Context, Result}, }; use super::{array_builder::ArrayBuilder, simple_serializer::SimpleSerializer}; @@ -36,8 +35,9 @@ impl UnknownVariantBuilder { } impl Context for UnknownVariantBuilder { - fn annotations(&self) -> BTreeMap { - btree_map!("field" => self.path.clone(), "data_type" => "") + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default(annotations, "data_type", ""); } } diff --git a/serde_arrow/src/internal/serialization/utf8_builder.rs b/serde_arrow/src/internal/serialization/utf8_builder.rs index 17fed8ce..baf9a93f 100644 --- a/serde_arrow/src/internal/serialization/utf8_builder.rs +++ b/serde_arrow/src/internal/serialization/utf8_builder.rs @@ -2,10 +2,10 @@ use std::collections::BTreeMap; use crate::internal::{ arrow::{Array, BytesArray}, - error::{fail, Context, ContextSupport, Result}, + error::{fail, set_default, try_, Context, ContextSupport, Result}, utils::{ array_ext::{new_bytes_array, ArrayExt, ScalarArrayExt}, - btree_map, NamedType, Offset, + NamedType, Offset, }, }; @@ -58,28 +58,31 @@ impl Utf8Builder { } impl Context for Utf8Builder { - fn annotations(&self) -> BTreeMap { - let data_type = if O::NAME == "i32" { - "Utf8" - } else { - "LargeUtf8" - }; - - btree_map!("field" => self.path.clone(), "data_type" => data_type) + fn annotate(&self, annotations: &mut BTreeMap) { + set_default(annotations, "field", &self.path); + set_default( + annotations, + "data_type", + if O::NAME == "i32" { + "Utf8" + } else { + "LargeUtf8" + }, + ); } } impl SimpleSerializer for Utf8Builder { fn serialize_default(&mut self) -> Result<()> { - self.array.push_scalar_default().ctx(self) + try_(|| self.array.push_scalar_default()).ctx(self) } fn serialize_none(&mut self) -> Result<()> { - self.array.push_scalar_none().ctx(self) + try_(|| self.array.push_scalar_none()).ctx(self) } fn serialize_str(&mut self, v: &str) -> Result<()> { - self.array.push_scalar_value(v.as_bytes()).ctx(self) + try_(|| self.array.push_scalar_value(v.as_bytes())).ctx(self) } fn serialize_unit_variant( @@ -88,7 +91,7 @@ impl SimpleSerializer for Utf8Builder { _: u32, variant: &'static str, ) -> Result<()> { - self.array.push_scalar_value(variant.as_bytes()).ctx(self) + try_(|| self.array.push_scalar_value(variant.as_bytes())).ctx(self) } fn serialize_tuple_variant_start<'this>( diff --git a/serde_arrow/src/internal/utils/mod.rs b/serde_arrow/src/internal/utils/mod.rs index 8a39ba1e..7fa35e5d 100644 --- a/serde_arrow/src/internal/utils/mod.rs +++ b/serde_arrow/src/internal/utils/mod.rs @@ -219,3 +219,15 @@ macro_rules! btree_map { } pub(crate) use btree_map; + +pub struct ChildName<'a>(pub &'a str); + +impl<'a> std::fmt::Display for ChildName<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.0.is_empty() { + write!(f, "{}", self.0) + } else { + write!(f, "") + } + } +} diff --git a/serde_arrow/src/test/error_messages/deserializers.rs b/serde_arrow/src/test/error_messages/deserializers.rs new file mode 100644 index 00000000..fd9f8977 --- /dev/null +++ b/serde_arrow/src/test/error_messages/deserializers.rs @@ -0,0 +1,64 @@ +use std::collections::HashMap; + +use serde::Deserialize; +use serde_json::json; + +use crate::{ + internal::{ + arrow::{ArrayView, BitsWithOffset, BooleanArrayView, FieldMeta, StructArrayView}, + testing::assert_error_contains, + }, + schema::{SchemaLike, SerdeArrowSchema}, + Deserializer, +}; + +#[test] +fn example_exhausted() { + let views = vec![ArrayView::Struct(StructArrayView { + len: 5, + validity: None, + fields: vec![( + ArrayView::Boolean(BooleanArrayView { + len: 2, + validity: None, + values: BitsWithOffset { + data: &[0b_0001_0011], + offset: 0, + }, + }), + FieldMeta { + name: String::from("nested"), + nullable: false, + metadata: HashMap::new(), + }, + )], + })]; + + let schema = SerdeArrowSchema::from_value(&json!([{ + "name": "item", + "data_type": "Struct", + "children": [ + {"name": "nested", "data_type": "Bool"}, + ], + }])) + .unwrap(); + + let deserializer = Deserializer::new(&schema.fields, views).unwrap(); + + #[derive(Deserialize)] + struct S { + #[allow(dead_code)] + item: Nested, + } + + #[derive(Deserialize)] + struct Nested { + #[allow(dead_code)] + nested: bool, + } + + let res = Vec::::deserialize(deserializer); + assert_error_contains(&res, "Exhausted deserializer"); + assert_error_contains(&res, "field: \"$.item.nested\""); + assert_error_contains(&res, "data_type: \"Boolean\""); +} diff --git a/serde_arrow/src/test/error_messages/mod.rs b/serde_arrow/src/test/error_messages/mod.rs index 7730e5aa..d1c9cca5 100644 --- a/serde_arrow/src/test/error_messages/mod.rs +++ b/serde_arrow/src/test/error_messages/mod.rs @@ -1 +1,2 @@ +mod deserializers; mod push_validity;