Skip to content

Commit

Permalink
Merge branch 'main' into develop-0.12
Browse files Browse the repository at this point in the history
# Conflicts:
#	Changes.md
#	serde_arrow/src/internal/schema/tracer.rs
  • Loading branch information
chmp committed Aug 31, 2024
2 parents 9162036 + 4df3f9c commit 1216f45
Show file tree
Hide file tree
Showing 6 changed files with 530 additions and 116 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Changes.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Change log

## 0.12
## 0.12.0

Refactor the underlying implementation to prepare for further development

Expand All @@ -23,6 +23,10 @@ Removed deprecated API
`TryFrom` conversions to convert between fields and `SerdeArrowSchema`
- Remove `SerdeArrowSchema::new()`, `Overwrites::new()`

## 0.11.7

- Fix tracing of JSON mixing nulls with non-null data

## 0.11.6

- Add `arrow=52` support
Expand Down
2 changes: 1 addition & 1 deletion serde_arrow/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "serde_arrow"
version = "0.11.6"
version = "0.11.7"
authors = ["Christopher Prohm <mail@cprohm.de>"]
edition = "2021"
description = "Convert sequences of Rust objects to Arrow arrays and back again"
Expand Down
207 changes: 94 additions & 113 deletions serde_arrow/src/internal/schema/tracer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,9 @@ impl Tracer {
self.enforce_depth_limit()?;

match self {
this @ Self::Unknown(_) => {
this if matches!(this, Self::Unknown(_))
|| matches!(this, Self::Primitive(ref tracer) if tracer.item_type == DataType::Null) =>
{
let field_names = fields
.iter()
.map(|field| field.to_string())
Expand Down Expand Up @@ -327,7 +329,9 @@ impl Tracer {
self.enforce_depth_limit()?;

match self {
this @ Self::Unknown(_) => {
this if matches!(this, Self::Unknown(_))
|| matches!(this, Self::Primitive(ref tracer) if tracer.item_type == DataType::Null) =>
{
let tracer = dispatch_tracer!(this, tracer => TupleTracer {
name: tracer.name.clone(),
path: tracer.path.clone(),
Expand Down Expand Up @@ -359,7 +363,9 @@ impl Tracer {
self.enforce_depth_limit()?;

match self {
this @ Self::Unknown(_) => {
this if matches!(this, Self::Unknown(_))
|| matches!(this, Self::Primitive(ref tracer) if tracer.item_type == DataType::Null) =>
{
let tracer = dispatch_tracer!(this, tracer => UnionTracer {
name: tracer.name.clone(),
path: tracer.path.clone(),
Expand Down Expand Up @@ -395,7 +401,9 @@ impl Tracer {
self.enforce_depth_limit()?;

match self {
this @ Self::Unknown(_) => {
this if matches!(this, Self::Unknown(_))
|| matches!(this, Self::Primitive(ref tracer) if tracer.item_type == DataType::Null) =>
{
let tracer = dispatch_tracer!(this, tracer => ListTracer {
name: tracer.name.clone(),
path: tracer.path.clone(),
Expand All @@ -422,7 +430,9 @@ impl Tracer {
self.enforce_depth_limit()?;

match self {
this @ Self::Unknown(_) => {
this if matches!(this, Self::Unknown(_))
|| matches!(this, Self::Primitive(ref tracer) if tracer.item_type == DataType::Null) =>
{
let tracer = dispatch_tracer!(this, tracer => MapTracer {
name: tracer.name.clone(),
path: tracer.path.clone(),
Expand Down Expand Up @@ -450,132 +460,103 @@ impl Tracer {
Ok(())
}

pub fn ensure_utf8(&mut self, item_type: DataType, strategy: Option<Strategy>) -> Result<()> {
if self.is_unknown() {
let tracer = dispatch_tracer!(self, tracer => PrimitiveTracer::new(
tracer.name.clone(),
tracer.path.clone(),
tracer.options.clone(),
item_type,
tracer.nullable,
))
.with_strategy(strategy);
*self = Self::Primitive(tracer);
} else if let Tracer::Primitive(tracer) = self {
use {
DataType::Date64, DataType::LargeUtf8, Strategy::NaiveStrAsDate64,
Strategy::UtcStrAsDate64,
};
let (item_type, strategy) = match ((&tracer.item_type), (item_type)) {
(Date64, Date64) => match (&tracer.strategy, strategy) {
(Some(NaiveStrAsDate64), Some(NaiveStrAsDate64)) => {
(Date64, Some(NaiveStrAsDate64))
}
(Some(UtcStrAsDate64), Some(UtcStrAsDate64)) => (Date64, Some(UtcStrAsDate64)),
// incompatible strategies, coerce to string
(_, _) => (LargeUtf8, None),
},
(LargeUtf8, _) | (_, LargeUtf8) => (LargeUtf8, None),
(prev_ty, new_ty) => {
fail!(
"mismatched types, previous {prev_ty}, current {new_ty}",
prev_ty = DataTypeDisplay(prev_ty),
new_ty = DataTypeDisplay(&new_ty),
)
}
};
tracer.item_type = item_type;
tracer.strategy = strategy;
} else {
let Some(ty) = self.get_type() else {
unreachable!("tracer cannot be unknown");
};
fail!(
"mismatched types, previous {ty}, current {item_type}",
item_type = DataTypeDisplay(&item_type),
);
}
Ok(())
pub fn ensure_utf8(
&mut self,
item_type: DataType,
strategy: Option<Strategy>,
) -> Result<()> {
self.ensure_primitive_with_strategy(item_type, strategy)
}

pub fn ensure_primitive(&mut self, item_type: DataType) -> Result<()> {
match self {
this @ Self::Unknown(_) => {
let tracer = dispatch_tracer!(this, tracer => PrimitiveTracer::new(
tracer.name.clone(),
tracer.path.clone(),
tracer.options.clone(),
item_type,
tracer.nullable,
));
*this = Self::Primitive(tracer);
}
Self::Primitive(tracer) if tracer.item_type == item_type => {}
_ => fail!(
"mismatched types, previous {:?}, current {:?}",
self.get_type(),
item_type
),
}
Ok(())
self.ensure_primitive_with_strategy(item_type, None)
}

pub fn ensure_number(&mut self, item_type: DataType) -> Result<()> {
self.ensure_primitive_with_strategy(item_type, None)
}

fn ensure_primitive_with_strategy(
&mut self,
item_type: DataType,
strategy: Option<Strategy>,
) -> Result<()> {
match self {
this @ Self::Unknown(_) => {
let tracer = dispatch_tracer!(this, tracer => PrimitiveTracer::new(
tracer.name.clone(),
tracer.path.clone(),
tracer.options.clone(),
let is_null_type = matches!(item_type, DataType::Null);
let tracer = dispatch_tracer!(this, tracer => PrimitiveTracer {
name: tracer.name.clone(),
path: tracer.path.clone(),
options: tracer.options.clone(),
nullable: tracer.nullable || is_null_type,
item_type,
tracer.nullable,
));
strategy,
});
*this = Self::Primitive(tracer);
}
Self::Primitive(tracer) if tracer.options.coerce_numbers => {
use DataType::{
Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8,
};
let item_type = match (&tracer.item_type, item_type) {
// unsigned x unsigned -> u64
(UInt8 | UInt16 | UInt32 | UInt64, UInt8 | UInt16 | UInt32 | UInt64) => UInt64,
// signed x signed -> i64
(Int8 | Int16 | Int32 | Int64, Int8 | Int16 | Int32 | Int64) => Int64,
// signed x unsigned -> i64
(Int8 | Int16 | Int32 | Int64, UInt8 | UInt16 | UInt32 | UInt64) => Int64,
// unsigned x signed -> i64
(UInt8 | UInt16 | UInt32 | UInt64, Int8 | Int16 | Int32 | Int64) => Int64,
// float x float -> f64
(Float32 | Float64, Float32 | Float64) => Float64,
// int x float -> f64
(
Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64,
Float32 | Float64,
) => Float64,
// float x int -> f64
(
Float32 | Float64,
Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64,
) => Float64,
(ty, ev) => fail!(
"Cannot accept event {ev} for tracer of primitive type {ty}",
ev = DataTypeDisplay(&ev),
ty = DataTypeDisplay(ty),
),
};
this @ (Self::List(_)
| Self::Map(_)
| Self::Struct(_)
| Self::Tuple(_)
| Self::Union(_)) => {
if matches!(item_type, DataType::Null) {
dispatch_tracer!(this, tracer => { tracer.nullable = true });
} else {
fail!("Cannot merge {ty:?} with {item_type:?}", ty = this.get_type());
}
}
Self::Primitive(tracer) => {
let (item_type, nullable, strategy) = coerce_primitive_type(
(&tracer.item_type, tracer.nullable, tracer.strategy.as_ref()),
(item_type, strategy),
tracer.options.as_ref(),
)?;

tracer.item_type = item_type;
tracer.strategy = strategy;
tracer.nullable = nullable;
}
Self::Primitive(tracer) if tracer.item_type == item_type => {}
_ => fail!(
"mismatched types, previous {:?}, current {:?}",
self.get_type(),
item_type
),
}
Ok(())
}
}

fn coerce_primitive_type(
prev: (&DataType, bool, Option<&Strategy>),
curr: (DataType, Option<Strategy>),
options: &TracingOptions,
) -> Result<(DataType, bool, Option<Strategy>)> {
use DataType::{
Date64, LargeUtf8, Null, Float32, Float64, Int16, Int32, Int64, Int8, UInt16, UInt32, UInt64, UInt8,
};

let res = match (prev, curr) {
((prev_ty, nullable, prev_st), (curr_ty, curr_st)) if prev_ty == &curr_ty && prev_st == curr_st.as_ref() => (curr_ty, nullable, curr_st),
((Null, _, _), (curr_ty, curr_st)) => (curr_ty, true, curr_st),
((prev_ty, _, prev_st), (Null, _)) => (prev_ty.clone(), true, prev_st.cloned()),
// unsigned x unsigned -> u64
((UInt8 | UInt16 | UInt32 | UInt64, nullable, _), (UInt8 | UInt16 | UInt32 | UInt64, _,)) if options.coerce_numbers => (UInt64, nullable, None),
// signed x signed -> i64
((Int8 | Int16 | Int32 | Int64, nullable, _), (Int8 | Int16 | Int32 | Int64, _)) if options.coerce_numbers => (Int64, nullable, None),
// signed x unsigned -> i64
((Int8 | Int16 | Int32 | Int64, nullable, _), (UInt8 | UInt16 | UInt32 | UInt64, _)) if options.coerce_numbers => (Int64, nullable, None),
// unsigned x signed -> i64
((UInt8 | UInt16 | UInt32 | UInt64, nullable, _), (Int8 | Int16 | Int32 | Int64, _)) if options.coerce_numbers => (Int64, nullable, None),
// float x float -> f64
((Float32 | Float64, nullable, _), (Float32 | Float64, _)) if options.coerce_numbers=> (Float64, nullable, None),
// int x float -> f64
((Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, nullable, _), (Float32 | Float64, _)) if options.coerce_numbers => (Float64, nullable, None),
// float x int -> f64
((Float32 | Float64, nullable, _), (Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64, _)) if options.coerce_numbers => (Float64, nullable, None),
// incompatible formats, coerce to string
((Date64, nullable, _), (LargeUtf8, _)) => (LargeUtf8, nullable, None),
((LargeUtf8, nullable, _), (Date64, _)) => (LargeUtf8, nullable, None),
((Date64, nullable, prev_st), (Date64, curr_st)) if prev_st != curr_st.as_ref() => (LargeUtf8, nullable, None),
((prev_ty, _, prev_st), (curr_ty, curr_st)) => fail!("Cannot accept event {curr_ty:?} with strategy {curr_st:?} for tracer of primitive type {prev_ty:?} with strategy {prev_st:?}"),
};
Ok(res)
}

#[derive(Debug, PartialEq, Clone)]
pub struct UnknownTracer {
pub name: String,
Expand Down
1 change: 1 addition & 0 deletions serde_arrow/src/test/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod api_chrono;
mod error;
mod schema_like;
mod schema_tracing;
Loading

0 comments on commit 1216f45

Please sign in to comment.