diff --git a/rust/examples/basic_operations.rs b/rust/examples/basic_operations.rs index 0732791b74..566e38b3f1 100644 --- a/rust/examples/basic_operations.rs +++ b/rust/examples/basic_operations.rs @@ -1,10 +1,11 @@ use arrow::{ array::{Int32Array, StringArray, TimestampMicrosecondArray}, - datatypes::{DataType, Field, Schema as ArrowSchema, TimeUnit}, + datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema, TimeUnit}, record_batch::RecordBatch, }; +use deltalake::kernel::{DataType, PrimitiveType, StructField}; use deltalake::operations::collect_sendable_stream; -use deltalake::{protocol::SaveMode, DeltaOps, SchemaDataType, SchemaField}; +use deltalake::{protocol::SaveMode, DeltaOps}; use parquet::{ basic::{Compression, ZstdLevel}, file::properties::WriterProperties, @@ -12,36 +13,33 @@ use parquet::{ use std::sync::Arc; -fn get_table_columns() -> Vec { +fn get_table_columns() -> Vec { vec![ - SchemaField::new( + StructField::new( String::from("int"), - SchemaDataType::primitive(String::from("integer")), + DataType::Primitive(PrimitiveType::Integer), false, - Default::default(), ), - SchemaField::new( + StructField::new( String::from("string"), - SchemaDataType::primitive(String::from("string")), + DataType::Primitive(PrimitiveType::String), true, - Default::default(), ), - SchemaField::new( + StructField::new( String::from("timestamp"), - SchemaDataType::primitive(String::from("timestamp")), + DataType::Primitive(PrimitiveType::Timestamp), true, - Default::default(), ), ] } fn get_table_batches() -> RecordBatch { let schema = Arc::new(ArrowSchema::new(vec![ - Field::new("int", DataType::Int32, false), - Field::new("string", DataType::Utf8, true), + Field::new("int", ArrowDataType::Int32, false), + Field::new("string", ArrowDataType::Utf8, true), Field::new( "timestamp", - DataType::Timestamp(TimeUnit::Microsecond, None), + ArrowDataType::Timestamp(TimeUnit::Microsecond, None), true, ), ])); diff --git a/rust/examples/recordbatch-writer.rs b/rust/examples/recordbatch-writer.rs index 42e81ba1ac..f4d3231b85 100644 --- a/rust/examples/recordbatch-writer.rs +++ b/rust/examples/recordbatch-writer.rs @@ -11,6 +11,7 @@ use chrono::prelude::*; use deltalake::arrow::array::*; use deltalake::arrow::record_batch::RecordBatch; use deltalake::errors::DeltaTableError; +use deltalake::kernel::{DataType, PrimitiveType, StructField, StructType}; use deltalake::writer::{DeltaWriter, RecordBatchWriter}; use deltalake::*; use log::*; @@ -19,8 +20,6 @@ use parquet::{ basic::{Compression, ZstdLevel}, file::properties::WriterProperties, }; - -use std::collections::HashMap; use std::sync::Arc; /* @@ -86,31 +85,27 @@ struct WeatherRecord { } impl WeatherRecord { - fn columns() -> Vec { + fn columns() -> Vec { vec![ - SchemaField::new( + StructField::new( "timestamp".to_string(), - SchemaDataType::primitive("timestamp".to_string()), + DataType::Primitive(PrimitiveType::Timestamp), true, - HashMap::new(), ), - SchemaField::new( + StructField::new( "temp".to_string(), - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), true, - HashMap::new(), ), - SchemaField::new( + StructField::new( "lat".to_string(), - SchemaDataType::primitive("double".to_string()), + DataType::Primitive(PrimitiveType::Float), true, - HashMap::new(), ), - SchemaField::new( + StructField::new( "long".to_string(), - SchemaDataType::primitive("double".to_string()), + DataType::Primitive(PrimitiveType::Float), true, - HashMap::new(), ), ] } @@ -167,7 +162,7 @@ fn convert_to_batch(table: &DeltaTable, records: &Vec) -> RecordB let metadata = table .get_metadata() .expect("Failed to get metadata for the table"); - let arrow_schema = >::try_from( + let arrow_schema = >::try_from( &metadata.schema.clone(), ) .expect("Failed to convert to arrow schema"); diff --git a/rust/src/kernel/actions/mod.rs b/rust/src/kernel/actions/mod.rs index 32094ab98e..4f95825e6f 100644 --- a/rust/src/kernel/actions/mod.rs +++ b/rust/src/kernel/actions/mod.rs @@ -52,6 +52,7 @@ pub enum ActionType { #[serde(rename_all = "camelCase")] #[allow(missing_docs)] pub enum Action { + #[serde(rename = "metaData")] Metadata(Metadata), Protocol(Protocol), Add(Add), diff --git a/rust/src/kernel/actions/types.rs b/rust/src/kernel/actions/types.rs index c36ad8027f..a0d082a6cb 100644 --- a/rust/src/kernel/actions/types.rs +++ b/rust/src/kernel/actions/types.rs @@ -4,8 +4,8 @@ use std::str::FromStr; // use std::sync::Arc; // use roaring::RoaringTreemap; +use log::warn; use serde::{Deserialize, Serialize}; -use tracing::warn; use url::Url; use super::super::schema::StructType; @@ -105,6 +105,7 @@ impl Metadata { } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] /// Defines a protocol action pub struct Protocol { /// The minimum version of the Delta read protocol that a client must implement @@ -203,6 +204,7 @@ impl ToString for StorageType { } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] /// Defines a deletion vector pub struct DeletionVectorDescriptor { /// A single character to indicate how to access the DV. Legal options are: ['u', 'i', 'p']. @@ -336,6 +338,7 @@ impl DeletionVectorDescriptor { } #[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "camelCase")] /// Defines an add action pub struct Add { /// A relative path to a data file from the root of the table or an absolute path to a file diff --git a/rust/tests/checkpoint_writer.rs b/rust/tests/checkpoint_writer.rs index 10af3d9cfa..4c3e9e3593 100644 --- a/rust/tests/checkpoint_writer.rs +++ b/rust/tests/checkpoint_writer.rs @@ -211,7 +211,7 @@ mod checkpoints_with_tombstones { use super::*; use ::object_store::path::Path as ObjectStorePath; use chrono::Utc; - use deltalake::protocol::*; + use deltalake::kernel::*; use deltalake::table::config::DeltaConfigKey; use deltalake::*; use maplit::hashmap; @@ -346,6 +346,8 @@ mod checkpoints_with_tombstones { size: None, tags: None, deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, }) .collect(); @@ -357,8 +359,8 @@ mod checkpoints_with_tombstones { let actions = removes .iter() .cloned() - .map(Action::remove) - .chain(std::iter::once(Action::add(add.clone()))) + .map(Action::Remove) + .chain(std::iter::once(Action::Add(add.clone()))) .collect(); let operation = DeltaOperation::Optimize { predicate: None, @@ -389,7 +391,7 @@ mod checkpoints_with_tombstones { let actions = actions .iter() .filter_map(|a| match a { - Action::remove(r) => Some(r.clone()), + Action::Remove(r) => Some(r.clone()), _ => None, }) .collect(); @@ -408,6 +410,8 @@ mod checkpoints_with_tombstones { size: Some(100), tags: None, deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, } } diff --git a/rust/tests/command_optimize.rs b/rust/tests/command_optimize.rs index 153d7f86d5..2695742128 100644 --- a/rust/tests/command_optimize.rs +++ b/rust/tests/command_optimize.rs @@ -4,16 +4,17 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::{collections::HashMap, error::Error, sync::Arc}; use arrow_array::{Int32Array, RecordBatch, StringArray}; -use arrow_schema::{DataType, Field, Schema as ArrowSchema}; +use arrow_schema::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use arrow_select::concat::concat_batches; use deltalake::errors::DeltaTableError; +use deltalake::kernel::{Action, DataType, PrimitiveType, Remove, StructField}; use deltalake::operations::optimize::{create_merge_plan, MetricDetails, Metrics, OptimizeType}; use deltalake::operations::transaction::commit; use deltalake::operations::DeltaOps; -use deltalake::protocol::{Action, DeltaOperation, Remove}; +use deltalake::protocol::DeltaOperation; use deltalake::storage::ObjectStoreRef; use deltalake::writer::{DeltaWriter, RecordBatchWriter}; -use deltalake::{DeltaTable, PartitionFilter, Path, SchemaDataType, SchemaField}; +use deltalake::{DeltaTable, PartitionFilter, Path}; use futures::TryStreamExt; use object_store::ObjectStore; use parquet::arrow::async_reader::ParquetObjectReader; @@ -30,23 +31,20 @@ struct Context { async fn setup_test(partitioned: bool) -> Result> { let columns = vec![ - SchemaField::new( + StructField::new( "x".to_owned(), - SchemaDataType::primitive("integer".to_owned()), + DataType::Primitive(PrimitiveType::Integer), false, - HashMap::new(), ), - SchemaField::new( + StructField::new( "y".to_owned(), - SchemaDataType::primitive("integer".to_owned()), + DataType::Primitive(PrimitiveType::Integer), false, - HashMap::new(), ), - SchemaField::new( + StructField::new( "date".to_owned(), - SchemaDataType::primitive("string".to_owned()), + DataType::Primitive(PrimitiveType::String), false, - HashMap::new(), ), ]; @@ -90,9 +88,9 @@ fn generate_random_batch>( Ok(RecordBatch::try_new( Arc::new(ArrowSchema::new(vec![ - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("date", DataType::Utf8, false), + Field::new("x", ArrowDataType::Int32, false), + Field::new("y", ArrowDataType::Int32, false), + Field::new("date", ArrowDataType::Utf8, false), ])), vec![Arc::new(x_array), Arc::new(y_array), Arc::new(date_array)], )?) @@ -119,9 +117,9 @@ fn tuples_to_batch>( Ok(RecordBatch::try_new( Arc::new(ArrowSchema::new(vec![ - Field::new("x", DataType::Int32, false), - Field::new("y", DataType::Int32, false), - Field::new("date", DataType::Utf8, false), + Field::new("x", ArrowDataType::Int32, false), + Field::new("y", ArrowDataType::Int32, false), + Field::new("date", ArrowDataType::Utf8, false), ])), vec![Arc::new(x_array), Arc::new(y_array), Arc::new(date_array)], )?) @@ -292,12 +290,14 @@ async fn test_conflict_for_remove_actions() -> Result<(), Box> { partition_values: Some(add.partition_values.clone()), tags: Some(HashMap::new()), deletion_vector: add.deletion_vector.clone(), + base_row_id: add.base_row_id, + default_row_commit_version: add.default_row_commit_version, }; let operation = DeltaOperation::Delete { predicate: None }; commit( other_dt.object_store().as_ref(), - &vec![Action::remove(remove)], + &vec![Action::Remove(remove)], operation, &other_dt.state, None, diff --git a/rust/tests/command_restore.rs b/rust/tests/command_restore.rs index 8f8ac11ca1..1c8b263eff 100644 --- a/rust/tests/command_restore.rs +++ b/rust/tests/command_restore.rs @@ -2,12 +2,12 @@ use arrow::datatypes::Schema as ArrowSchema; use arrow_array::{Int32Array, RecordBatch}; -use arrow_schema::{DataType, Field}; +use arrow_schema::{DataType as ArrowDataType, Field}; use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; +use deltalake::kernel::{DataType, PrimitiveType, StructField}; use deltalake::protocol::SaveMode; -use deltalake::{DeltaOps, DeltaTable, SchemaDataType, SchemaField}; +use deltalake::{DeltaOps, DeltaTable}; use rand::Rng; -use std::collections::HashMap; use std::error::Error; use std::fs; use std::sync::Arc; @@ -21,17 +21,15 @@ struct Context { async fn setup_test() -> Result> { let columns = vec![ - SchemaField::new( + StructField::new( "id".to_string(), - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), true, - HashMap::new(), ), - SchemaField::new( + StructField::new( "value".to_string(), - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), true, - HashMap::new(), ), ]; @@ -77,8 +75,8 @@ fn get_record_batch() -> RecordBatch { } let schema = ArrowSchema::new(vec![ - Field::new("id", DataType::Int32, true), - Field::new("value", DataType::Int32, true), + Field::new("id", ArrowDataType::Int32, true), + Field::new("value", ArrowDataType::Int32, true), ]); let id_array = Int32Array::from(id_vec); diff --git a/rust/tests/command_vacuum.rs b/rust/tests/command_vacuum.rs index 4437e9dc85..de555c5b41 100644 --- a/rust/tests/command_vacuum.rs +++ b/rust/tests/command_vacuum.rs @@ -1,9 +1,9 @@ use chrono::Duration; use common::clock::TestClock; use common::TestContext; +use deltalake::kernel::StructType; use deltalake::operations::vacuum::Clock; use deltalake::operations::DeltaOps; -use deltalake::Schema; use object_store::{path::Path, Error as ObjectStoreError, ObjectStore}; use serde_json::json; use std::sync::Arc; @@ -11,7 +11,7 @@ use std::sync::Arc; mod common; /// Basic schema -pub fn get_xy_date_schema() -> Schema { +pub fn get_xy_date_schema() -> StructType { serde_json::from_value(json!({ "type": "struct", "fields": [ @@ -24,8 +24,8 @@ pub fn get_xy_date_schema() -> Schema { } /// Schema that contains a column prefiexed with _ -pub fn get_vacuum_underscore_schema() -> Schema { - serde_json::from_value::(json!({ +pub fn get_vacuum_underscore_schema() -> StructType { + serde_json::from_value::(json!({ "type": "struct", "fields": [ {"name": "x", "type": "integer", "nullable": false, "metadata": {}}, diff --git a/rust/tests/commit_info_format.rs b/rust/tests/commit_info_format.rs index fdb1f89d92..024e59d29f 100644 --- a/rust/tests/commit_info_format.rs +++ b/rust/tests/commit_info_format.rs @@ -1,8 +1,9 @@ #![allow(dead_code)] mod fs_common; +use deltalake::kernel::Action; use deltalake::operations::transaction::commit; -use deltalake::protocol::{Action, DeltaOperation, SaveMode}; +use deltalake::protocol::{DeltaOperation, SaveMode}; use serde_json::json; use std::error::Error; use tempdir::TempDir; @@ -13,7 +14,7 @@ async fn test_operational_parameters() -> Result<(), Box> { let mut table = fs_common::create_table(path.path().to_str().unwrap(), None).await; let add = fs_common::add(0); - let actions = vec![Action::add(add)]; + let actions = vec![Action::Add(add)]; let operation = DeltaOperation::Write { mode: SaveMode::Append, partition_by: Some(vec!["some_partition".to_string()]), diff --git a/rust/tests/common/mod.rs b/rust/tests/common/mod.rs index 2ba20d0635..6b7e71b48a 100644 --- a/rust/tests/common/mod.rs +++ b/rust/tests/common/mod.rs @@ -1,12 +1,13 @@ #![allow(dead_code, unused_variables)] use bytes::Bytes; +use deltalake::kernel::{Action, Add, Remove, StructType}; use deltalake::operations::create::CreateBuilder; use deltalake::operations::transaction::commit; -use deltalake::protocol::{self, Add, DeltaOperation, Remove, SaveMode}; +use deltalake::protocol::{DeltaOperation, SaveMode}; use deltalake::storage::DeltaObjectStore; +use deltalake::DeltaTable; use deltalake::DeltaTableBuilder; -use deltalake::{DeltaTable, Schema}; use object_store::{path::Path, ObjectStore}; use std::any::Any; use std::collections::HashMap; @@ -74,7 +75,7 @@ impl TestContext { //Create and set a new table from the provided schema pub async fn create_table_from_schema( &mut self, - schema: Schema, + schema: StructType, partitions: &[&str], ) -> DeltaTable { let p = partitions @@ -86,7 +87,7 @@ impl TestContext { .with_object_store(backend) .with_table_name("delta-rs_test_table") .with_comment("Table created by delta-rs tests") - .with_columns(schema.get_fields().clone()) + .with_columns(schema.fields().clone()) .with_partition_columns(p) .await .unwrap() @@ -133,14 +134,20 @@ pub async fn add_file( modification_time: create_time, partition_values: part_values, data_change: true, - ..Default::default() + stats: None, + stats_parsed: None, + partition_values_parsed: None, + tags: None, + default_row_commit_version: None, + base_row_id: None, + deletion_vector: None, }; let operation = DeltaOperation::Write { mode: SaveMode::Append, partition_by: None, predicate: None, }; - let actions = vec![protocol::Action::add(add)]; + let actions = vec![Action::Add(add)]; commit( table.object_store().as_ref(), &actions, @@ -170,10 +177,15 @@ pub async fn remove_file( deletion_timestamp: Some(deletion_timestamp), partition_values: Some(part_values), data_change: true, - ..Default::default() + extended_file_metadata: None, + size: None, + deletion_vector: None, + default_row_commit_version: None, + base_row_id: None, + tags: None, }; let operation = DeltaOperation::Delete { predicate: None }; - let actions = vec![protocol::Action::remove(remove)]; + let actions = vec![Action::Remove(remove)]; commit( table.object_store().as_ref(), &actions, diff --git a/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000000.json b/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..3760ad9930 --- /dev/null +++ b/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1677811178585,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"635"},"engineInfo":"Databricks-Runtime/","txnId":"a6a94671-55ef-450e-9546-b8465b9147de"}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors"]}} +{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}} +{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000001.json b/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..e5bcdc1163 --- /dev/null +++ b/rust/tests/data/table-with-dv-small/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1677811194429,"operation":"DELETE","operationParameters":{"predicate":"[\"(spark_catalog.delta.`/tmp/table-with-dv-small`.value IN (0, 9))\"]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"0","numRemovedBytes":"0","numCopiedRows":"0","numDeletionVectorsAdded":"1","numDeletionVectorsRemoved":"0","numAddedChangeFiles":"0","executionTimeMs":"10364","numDeletedRows":"2","scanTimeMs":"9869","numAddedFiles":"0","numAddedBytes":"0","rewriteTimeMs":"479"},"engineInfo":"Databricks-Runtime/","txnId":"6d9555a2-0e3b-4c15-80c0-d5c3b0cf1277"}} +{"remove":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","deletionTimestamp":1677811194426,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":635,"tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues":{},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":false}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}} diff --git a/rust/tests/data/table-with-dv-small/deletion_vector_61d16c75-6994-46b7-a15b-8b538852e50e.bin b/rust/tests/data/table-with-dv-small/deletion_vector_61d16c75-6994-46b7-a15b-8b538852e50e.bin new file mode 100644 index 0000000000..f1a01e661c Binary files /dev/null and b/rust/tests/data/table-with-dv-small/deletion_vector_61d16c75-6994-46b7-a15b-8b538852e50e.bin differ diff --git a/rust/tests/data/table-with-dv-small/part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet b/rust/tests/data/table-with-dv-small/part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet new file mode 100644 index 0000000000..640e643b56 Binary files /dev/null and b/rust/tests/data/table-with-dv-small/part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet differ diff --git a/rust/tests/data/table-without-dv-small/_delta_log/00000000000000000000.json b/rust/tests/data/table-without-dv-small/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..a7941cd087 --- /dev/null +++ b/rust/tests/data/table-without-dv-small/_delta_log/00000000000000000000.json @@ -0,0 +1,4 @@ +{"commitInfo":{"timestamp":1678020185201,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"10","numOutputBytes":"548"},"engineInfo":"Apache-Spark/3.3.0 Delta-Lake/2.3.0rc1","txnId":"07c0f996-3854-4456-b68b-d1e35e3888cd"}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6524c99f-9a76-4ea1-8ad4-e428a7e065d7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1678020184802}} +{"add":{"path":"part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet","partitionValues":{},"size":548,"modificationTime":1678020185157,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0}}"}} diff --git a/rust/tests/data/table-without-dv-small/part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet b/rust/tests/data/table-without-dv-small/part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet new file mode 100644 index 0000000000..7ce78a86b0 Binary files /dev/null and b/rust/tests/data/table-without-dv-small/part-00000-517f5d32-9c95-48e8-82b4-0229cc194867-c000.snappy.parquet differ diff --git a/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000000.json b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..8ecc70e647 --- /dev/null +++ b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"84b09beb-329c-4b5e-b493-f58c6c78b8fd","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"letter\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1674611455081}} +{"commitInfo":{"timestamp":1674611455099,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[]","properties":"{\"delta.checkpointInterval\":\"2\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"d87e63fb-7388-4b1c-9afc-750a561012b7"}} diff --git a/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000001.json b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..5f4304c65c --- /dev/null +++ b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"add":{"path":"part-00000-ad1a4bb7-07e8-4f40-b50b-49910d209e0c-c000.snappy.parquet","partitionValues":{},"size":965,"modificationTime":1674611456921,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"b\",\"int\":288,\"date\":\"1978-02-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":988,\"date\":\"2020-05-01\"},\"nullCount\":{\"letter\":3,\"int\":0,\"date\":0}}"}} +{"commitInfo":{"timestamp":1674611457269,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"965"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"71d9bcd1-7f2b-46f8-bd1f-e0a8e872f3c3"}} diff --git a/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.checkpoint.parquet b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 0000000000..659bf517d6 Binary files /dev/null and b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.checkpoint.parquet differ diff --git a/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.json b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.json new file mode 100644 index 0000000000..f59c40dd67 --- /dev/null +++ b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000002.json @@ -0,0 +1,3 @@ +{"add":{"path":"part-00000-a190be9e-e3df-439e-b366-06a863f51e99-c000.snappy.parquet","partitionValues":{},"size":976,"modificationTime":1674611458901,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"a\",\"int\":120,\"date\":\"1971-07-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":667,\"date\":\"2018-02-01\"},\"nullCount\":{\"letter\":2,\"int\":0,\"date\":0}}"}} +{"remove":{"path":"part-00000-ad1a4bb7-07e8-4f40-b50b-49910d209e0c-c000.snappy.parquet","deletionTimestamp":1674611459307,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":965}} +{"commitInfo":{"timestamp":1674611459307,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"976"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"b08f5758-a8e9-4dd1-af7e-7b6e53928d7a"}} diff --git a/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000003.json b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000003.json new file mode 100644 index 0000000000..21a717332f --- /dev/null +++ b/rust/tests/data/with_checkpoint_no_last_checkpoint/_delta_log/00000000000000000003.json @@ -0,0 +1,3 @@ +{"add":{"path":"part-00000-70b1dcdf-0236-4f63-a072-124cdbafd8a0-c000.snappy.parquet","partitionValues":{},"size":1010,"modificationTime":1674611461541,"dataChange":true,"stats":"{\"numRecords\":5,\"minValues\":{\"letter\":\"a\",\"int\":93,\"date\":\"1975-06-01\"},\"maxValues\":{\"letter\":\"c\",\"int\":753,\"date\":\"2013-03-01\"},\"nullCount\":{\"letter\":1,\"int\":0,\"date\":0}}"}} +{"remove":{"path":"part-00000-a190be9e-e3df-439e-b366-06a863f51e99-c000.snappy.parquet","deletionTimestamp":1674611461982,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":976}} +{"commitInfo":{"timestamp":1674611461982,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"1","numOutputRows":"5","numOutputBytes":"1010"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.1.1","txnId":"0403bbaf-a6f2-4543-9e6c-bd068e76670f"}} diff --git a/rust/tests/fs_common/mod.rs b/rust/tests/fs_common/mod.rs index 3c5ab39e2c..39f022335c 100644 --- a/rust/tests/fs_common/mod.rs +++ b/rust/tests/fs_common/mod.rs @@ -1,9 +1,10 @@ use chrono::Utc; +use deltalake::kernel::{Action, Add, DataType, PrimitiveType, Remove, StructField, StructType}; use deltalake::operations::create::CreateBuilder; use deltalake::operations::transaction::commit; -use deltalake::protocol::{Action, Add, DeltaOperation, Remove, SaveMode}; +use deltalake::protocol::{DeltaOperation, SaveMode}; use deltalake::storage::{DeltaObjectStore, GetResult, ObjectStoreResult}; -use deltalake::{DeltaTable, Schema, SchemaDataType, SchemaField}; +use deltalake::DeltaTable; use object_store::path::Path as StorePath; use object_store::ObjectStore; use serde_json::Value; @@ -36,14 +37,14 @@ pub async fn create_table_from_json( std::fs::create_dir_all(path).unwrap(); std::fs::remove_dir_all(path).unwrap(); std::fs::create_dir_all(path).unwrap(); - let schema: Schema = serde_json::from_value(schema).unwrap(); + let schema: StructType = serde_json::from_value(schema).unwrap(); let config: HashMap> = serde_json::from_value(config).unwrap(); create_test_table(path, schema, partition_columns, config).await } pub async fn create_test_table( path: &str, - schema: Schema, + schema: StructType, partition_columns: Vec<&str>, config: HashMap>, ) -> DeltaTable { @@ -51,7 +52,7 @@ pub async fn create_test_table( .with_location(path) .with_table_name("test-table") .with_comment("A table for running tests") - .with_columns(schema.get_fields().clone()) + .with_columns(schema.fields().clone()) .with_partition_columns(partition_columns) .with_configuration(config) .await @@ -66,11 +67,10 @@ pub async fn create_table( fs::create_dir_all(&log_dir).unwrap(); cleanup_dir_except(log_dir, vec![]); - let schema = Schema::new(vec![SchemaField::new( + let schema = StructType::new(vec![StructField::new( "id".to_string(), - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), true, - HashMap::new(), )]); create_test_table(path, schema, Vec::new(), config.unwrap_or_default()).await @@ -88,6 +88,8 @@ pub fn add(offset_millis: i64) -> Add { stats_parsed: None, tags: None, deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, } } @@ -97,13 +99,13 @@ pub async fn commit_add(table: &mut DeltaTable, add: &Add) -> i64 { partition_by: None, predicate: None, }; - commit_actions(table, vec![Action::add(add.clone())], operation).await + commit_actions(table, vec![Action::Add(add.clone())], operation).await } pub async fn commit_removes(table: &mut DeltaTable, removes: Vec<&Remove>) -> i64 { let vec = removes .iter() - .map(|r| Action::remove((*r).clone())) + .map(|r| Action::Remove((*r).clone())) .collect(); let operation = DeltaOperation::Delete { predicate: None }; commit_actions(table, vec, operation).await diff --git a/rust/tests/integration_checkpoint.rs b/rust/tests/integration_checkpoint.rs index c4361ac7bf..e550adea69 100644 --- a/rust/tests/integration_checkpoint.rs +++ b/rust/tests/integration_checkpoint.rs @@ -2,9 +2,10 @@ use chrono::Utc; use deltalake::checkpoints::{cleanup_expired_logs_for, create_checkpoint}; +use deltalake::kernel::{DataType, PrimitiveType}; use deltalake::test_utils::{IntegrationContext, StorageIntegration, TestResult}; use deltalake::writer::{DeltaWriter, JsonWriter}; -use deltalake::{errors::DeltaResult, DeltaOps, DeltaTableBuilder, ObjectStore, SchemaDataType}; +use deltalake::{errors::DeltaResult, DeltaOps, DeltaTableBuilder, ObjectStore}; use object_store::path::Path; use serde_json::json; use serial_test::serial; @@ -119,7 +120,7 @@ async fn test_issue_1420_cleanup_expired_logs_for() -> DeltaResult<()> { .create() .with_column( "id", - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), false, None, ) diff --git a/rust/tests/integration_concurrent_writes.rs b/rust/tests/integration_concurrent_writes.rs index f34feac6e0..980be41752 100644 --- a/rust/tests/integration_concurrent_writes.rs +++ b/rust/tests/integration_concurrent_writes.rs @@ -1,10 +1,11 @@ #![cfg(feature = "integration_test")] +use deltalake::kernel::{Action, Add, DataType, PrimitiveType, StructField, StructType}; use deltalake::operations::transaction::commit; use deltalake::operations::DeltaOps; -use deltalake::protocol::{Action, Add, DeltaOperation, SaveMode}; +use deltalake::protocol::{DeltaOperation, SaveMode}; use deltalake::test_utils::{IntegrationContext, StorageIntegration, TestResult, TestTables}; -use deltalake::{DeltaTable, DeltaTableBuilder, Schema, SchemaDataType, SchemaField}; +use deltalake::{DeltaTable, DeltaTableBuilder}; use std::collections::HashMap; use std::future::Future; use std::iter::FromIterator; @@ -49,11 +50,10 @@ async fn test_concurrent_writes(integration: StorageIntegration) -> TestResult { async fn prepare_table( context: &IntegrationContext, ) -> Result<(DeltaTable, String), Box> { - let schema = Schema::new(vec![SchemaField::new( + let schema = StructType::new(vec![StructField::new( "Id".to_string(), - SchemaDataType::primitive("integer".to_string()), + DataType::Primitive(PrimitiveType::Integer), true, - HashMap::new(), )]); let table_uri = context.uri_for_table(TestTables::Custom("concurrent_workers".into())); @@ -64,7 +64,7 @@ async fn prepare_table( let table = DeltaOps(table) .create() - .with_columns(schema.get_fields().clone()) + .with_columns(schema.fields().clone()) .await?; assert_eq!(0, table.version()); @@ -153,7 +153,7 @@ impl Worker { partition_by: None, predicate: None, }; - let actions = vec![Action::add(Add { + let actions = vec![Action::Add(Add { path: format!("{}.parquet", name), size: 396, partition_values: HashMap::new(), @@ -164,6 +164,8 @@ impl Worker { stats_parsed: None, tags: None, deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, })]; let version = commit( self.table.object_store().as_ref(), diff --git a/rust/tests/integration_datafusion.rs b/rust/tests/integration_datafusion.rs index 5aafe52e87..1a2a4cd91f 100644 --- a/rust/tests/integration_datafusion.rs +++ b/rust/tests/integration_datafusion.rs @@ -10,11 +10,10 @@ use std::path::PathBuf; use std::sync::Arc; use arrow::array::*; -use arrow::datatypes::{ +use arrow::record_batch::RecordBatch; +use arrow_schema::{ DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, TimeUnit, }; -use arrow::record_batch::RecordBatch; -use arrow_schema::{DataType, Field}; use datafusion::assert_batches_sorted_eq; use datafusion::datasource::physical_plan::ParquetExec; use datafusion::datasource::TableProvider; @@ -32,20 +31,21 @@ use datafusion_proto::bytes::{ use url::Url; use deltalake::delta_datafusion::{DeltaPhysicalCodec, DeltaScan}; +use deltalake::kernel::{DataType, MapType, PrimitiveType, StructField, StructType}; use deltalake::operations::create::CreateBuilder; use deltalake::protocol::SaveMode; use deltalake::storage::DeltaObjectStore; use deltalake::writer::{DeltaWriter, RecordBatchWriter}; use deltalake::{ operations::{write::WriteBuilder, DeltaOps}, - DeltaTable, DeltaTableError, Schema, SchemaDataType, SchemaField, + DeltaTable, DeltaTableError, }; use std::error::Error; mod common; mod local { - use deltalake::{writer::JsonWriter, SchemaTypeMap}; + use deltalake::writer::JsonWriter; use super::*; #[tokio::test] @@ -96,14 +96,14 @@ mod local { let table_dir = tempfile::tempdir().unwrap(); let table_path = table_dir.path(); let table_uri = table_path.to_str().unwrap().to_string(); - let table_schema: Schema = batches[0].schema().try_into().unwrap(); + let table_schema: StructType = batches[0].schema().try_into().unwrap(); let mut table = DeltaOps::try_from_uri(table_uri) .await .unwrap() .create() .with_save_mode(SaveMode::Ignore) - .with_columns(table_schema.get_fields().clone()) + .with_columns(table_schema.fields().clone()) .with_partition_columns(partitions) .await .unwrap(); @@ -195,9 +195,9 @@ mod local { &ctx, &DeltaPhysicalCodec {}, )?; - let fields = Schema::try_from(source_scan.schema()) + let fields = StructType::try_from(source_scan.schema()) .unwrap() - .get_fields() + .fields() .clone(); // Create target Delta Table @@ -939,21 +939,20 @@ mod local { #[tokio::test] async fn test_issue_1619_parquet_panic_using_map_type() -> Result<()> { let _ = tokio::fs::remove_dir_all("./tests/data/issue-1619").await; - let fields: Vec = vec![SchemaField::new( + let fields: Vec = vec![StructField::new( "metadata".to_string(), - SchemaDataType::map(SchemaTypeMap::new( - Box::new(SchemaDataType::primitive("string".to_string())), - Box::new(SchemaDataType::primitive("string".to_string())), + DataType::Map(Box::new(MapType::new( + DataType::Primitive(PrimitiveType::String), + DataType::Primitive(PrimitiveType::String), true, - )), + ))), true, - HashMap::new(), )]; - let schema = deltalake::Schema::new(fields); + let schema = StructType::new(fields); let table = deltalake::DeltaTableBuilder::from_uri("./tests/data/issue-1619").build()?; let _ = DeltaOps::from(table) .create() - .with_columns(schema.get_fields().to_owned()) + .with_columns(schema.fields().to_owned()) .await?; let mut table = deltalake::open_table("./tests/data/issue-1619").await?; @@ -1082,17 +1081,15 @@ mod date_partitions { async fn setup_test() -> Result> { let columns = vec![ - SchemaField::new( + StructField::new( "id".to_owned(), - SchemaDataType::primitive("integer".to_owned()), + DataType::Primitive(PrimitiveType::Integer), false, - HashMap::new(), ), - SchemaField::new( + StructField::new( "date".to_owned(), - SchemaDataType::primitive("date".to_owned()), + DataType::Primitive(PrimitiveType::Date), false, - HashMap::new(), ), ]; @@ -1114,8 +1111,8 @@ mod date_partitions { Ok(RecordBatch::try_new( Arc::new(ArrowSchema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new("date", DataType::Date32, false), + ArrowField::new("id", ArrowDataType::Int32, false), + ArrowField::new("date", ArrowDataType::Date32, false), ])), vec![Arc::new(ids_array), Arc::new(date_array)], )?) diff --git a/rust/tests/integration_read.rs b/rust/tests/integration_read.rs index 1fbf35b878..4ff383450a 100644 --- a/rust/tests/integration_read.rs +++ b/rust/tests/integration_read.rs @@ -60,7 +60,7 @@ mod local { assert_eq!(table.get_files(), vec![Path::from(a.path.clone())]); // Remove added file. - let r = deltalake::protocol::Remove { + let r = deltalake::kernel::Remove { path: a.path.clone(), deletion_timestamp: Some(chrono::Utc::now().timestamp_millis()), data_change: false, @@ -69,6 +69,8 @@ mod local { size: None, tags: None, deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, }; assert_eq!(2, fs_common::commit_removes(&mut table, vec![&r]).await); @@ -210,12 +212,17 @@ async fn read_simple_table(integration: &IntegrationContext) -> TestResult { ); let tombstones = table.get_state().all_tombstones(); assert_eq!(tombstones.len(), 31); - assert!(tombstones.contains(&deltalake::protocol::Remove { + assert!(tombstones.contains(&deltalake::kernel::Remove { path: "part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet".to_string(), deletion_timestamp: Some(1587968596250), data_change: true, extended_file_metadata: None, - ..Default::default() + deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, + size: None, + partition_values: None, + tags: None, })); Ok(()) @@ -246,11 +253,17 @@ async fn read_simple_table_with_version(integration: &IntegrationContext) -> Tes ); let tombstones = table.get_state().all_tombstones(); assert_eq!(tombstones.len(), 29); - assert!(tombstones.contains(&deltalake::protocol::Remove { + assert!(tombstones.contains(&deltalake::kernel::Remove { path: "part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet".to_string(), deletion_timestamp: Some(1587968596250), data_change: true, - ..Default::default() + tags: None, + partition_values: None, + base_row_id: None, + default_row_commit_version: None, + size: None, + deletion_vector: None, + extended_file_metadata: None, })); Ok(()) @@ -309,11 +322,17 @@ mod gcs { ); let tombstones = table.get_state().all_tombstones(); assert_eq!(tombstones.len(), 31); - assert!(tombstones.contains(&deltalake::protocol::Remove { + assert!(tombstones.contains(&deltalake::kernel::Remove { path: "part-00006-63ce9deb-bc0f-482d-b9a1-7e717b67f294-c000.snappy.parquet".to_string(), deletion_timestamp: Some(1587968596250), data_change: true, - ..Default::default() + extended_file_metadata: None, + deletion_vector: None, + base_row_id: None, + default_row_commit_version: None, + size: None, + partition_values: None, + tags: None, })); } } diff --git a/rust/tests/read_delta_partitions_test.rs b/rust/tests/read_delta_partitions_test.rs index 7b05e2f93c..47973a5b7d 100644 --- a/rust/tests/read_delta_partitions_test.rs +++ b/rust/tests/read_delta_partitions_test.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::convert::TryFrom; -use deltalake::schema::SchemaDataType; +use deltalake::kernel::{DataType, PrimitiveType}; #[allow(dead_code)] mod fs_common; @@ -50,7 +50,7 @@ fn test_match_partition() { key: "month".to_string(), value: deltalake::PartitionValue::Equal("12".to_string()), }; - let string_type = SchemaDataType::primitive(String::from("string")); + let string_type = DataType::Primitive(PrimitiveType::String); assert!(!partition_year_2020_filter.match_partition(&partition_2021, &string_type)); assert!(partition_year_2020_filter.match_partition(&partition_2020, &string_type)); @@ -71,11 +71,13 @@ fn test_match_filters() { }, ]; - let string_type = SchemaDataType::primitive(String::from("string")); - let partition_data_types: HashMap<&str, &SchemaDataType> = - vec![("year", &string_type), ("month", &string_type)] - .into_iter() - .collect(); + let string_type = DataType::Primitive(PrimitiveType::String); + let partition_data_types: HashMap<&String, &DataType> = vec![ + (&partitions[0].key, &string_type), + (&partitions[1].key, &string_type), + ] + .into_iter() + .collect(); let valid_filters = deltalake::PartitionFilter { key: "year".to_string(), @@ -101,7 +103,7 @@ fn test_match_filters() { #[cfg(all(feature = "arrow", feature = "parquet"))] #[tokio::test] async fn read_null_partitions_from_checkpoint() { - use deltalake::protocol::Add; + use deltalake::kernel::Add; use maplit::hashmap; use serde_json::json; diff --git a/rust/tests/serde/checkpoint_schema.json b/rust/tests/serde/checkpoint_schema.json new file mode 100644 index 0000000000..9e397cd978 --- /dev/null +++ b/rust/tests/serde/checkpoint_schema.json @@ -0,0 +1,267 @@ +{ + "type": "struct", + "fields": [ + { + "name": "txn", + "type": { + "type": "struct", + "fields": [ + { + "name": "appId", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "version", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "lastUpdated", + "type": "long", + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "add", + "type": { + "type": "struct", + "fields": [ + { + "name": "path", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "partitionValues", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "size", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "modificationTime", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "dataChange", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "tags", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "stats", + "type": "string", + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "remove", + "type": { + "type": "struct", + "fields": [ + { + "name": "path", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "deletionTimestamp", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "dataChange", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "extendedFileMetadata", + "type": "boolean", + "nullable": true, + "metadata": {} + }, + { + "name": "partitionValues", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "size", + "type": "long", + "nullable": true, + "metadata": {} + }, + { + "name": "tags", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "metaData", + "type": { + "type": "struct", + "fields": [ + { + "name": "id", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "name", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "description", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "format", + "type": { + "type": "struct", + "fields": [ + { + "name": "provider", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "options", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "schemaString", + "type": "string", + "nullable": true, + "metadata": {} + }, + { + "name": "partitionColumns", + "type": { + "type": "array", + "elementType": "string", + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "configuration", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "createdTime", + "type": "long", + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "protocol", + "type": { + "type": "struct", + "fields": [ + { + "name": "minReaderVersion", + "type": "integer", + "nullable": true, + "metadata": {} + }, + { + "name": "minWriterVersion", + "type": "integer", + "nullable": true, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + } + ] +} diff --git a/rust/tests/serde/schema.json b/rust/tests/serde/schema.json new file mode 100644 index 0000000000..710a9e5080 --- /dev/null +++ b/rust/tests/serde/schema.json @@ -0,0 +1,68 @@ +{ + "type": "struct", + "fields": [ + { + "name": "a", + "type": "integer", + "nullable": false, + "metadata": {} + }, + { + "name": "b", + "type": { + "type": "struct", + "fields": [ + { + "name": "d", + "type": "integer", + "nullable": false, + "metadata": {} + } + ] + }, + "nullable": true, + "metadata": {} + }, + { + "name": "c", + "type": { + "type": "array", + "elementType": "integer", + "containsNull": false + }, + "nullable": true, + "metadata": {} + }, + { + "name": "e", + "type": { + "type": "array", + "elementType": { + "type": "struct", + "fields": [ + { + "name": "d", + "type": "integer", + "nullable": false, + "metadata": {} + } + ] + }, + "containsNull": true + }, + "nullable": true, + "metadata": {} + }, + { + "name": "f", + "type": { + "type": "map", + "keyType": "string", + "valueType": "string", + "valueContainsNull": true + }, + "nullable": true, + "metadata": {} + } + ] +}