From f13c8f3c31c300cdfe75d39a016cfbb29afd8e39 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 17:14:53 -0400 Subject: [PATCH 01/36] Update prost, prost-derive, pbjson --- datafusion-cli/Cargo.lock | 32 +++++++++++++------------- datafusion-examples/Cargo.toml | 2 +- datafusion/proto-common/Cargo.toml | 2 +- datafusion/proto-common/gen/Cargo.toml | 4 ++-- datafusion/proto/Cargo.toml | 2 +- datafusion/proto/gen/Cargo.toml | 4 ++-- datafusion/substrait/Cargo.toml | 2 +- 7 files changed, 24 insertions(+), 24 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 42ec5922a73f..16ba4b603815 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -443,7 +443,7 @@ dependencies = [ "fastrand 1.9.0", "hex", "http 0.2.12", - "hyper 0.14.29", + "hyper 0.14.30", "ring 0.16.20", "time", "tokio", @@ -609,7 +609,7 @@ dependencies = [ "fastrand 1.9.0", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.29", + "hyper 0.14.30", "hyper-rustls 0.23.2", "lazy_static", "pin-project-lite", @@ -631,7 +631,7 @@ dependencies = [ "futures-core", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.29", + "hyper 0.14.30", "once_cell", "percent-encoding", "pin-project-lite", @@ -875,9 +875,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.106" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +checksum = "eaff6f8ce506b9773fa786672d63fc7a191ffea1be33f72bbd4aeacefca9ffc8" dependencies = [ "jobserver", "libc", @@ -1941,9 +1941,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.29" +version = "0.14.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" +checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" dependencies = [ "bytes", "futures-channel", @@ -1965,9 +1965,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4fe55fb7a772d59a5ff1dfbff4fe0258d19b89fec4b233e75d35d5d2316badc" +checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" dependencies = [ "bytes", "futures-channel", @@ -1990,7 +1990,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" dependencies = [ "http 0.2.12", - "hyper 0.14.29", + "hyper 0.14.30", "log", "rustls 0.20.9", "rustls-native-certs 0.6.3", @@ -2006,7 +2006,7 @@ checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" dependencies = [ "futures-util", "http 1.1.0", - "hyper 1.4.0", + "hyper 1.4.1", "hyper-util", "rustls 0.23.11", "rustls-native-certs 0.7.1", @@ -2027,7 +2027,7 @@ dependencies = [ "futures-util", "http 1.1.0", "http-body 1.0.0", - "hyper 1.4.0", + "hyper 1.4.1", "pin-project-lite", "socket2", "tokio", @@ -2502,7 +2502,7 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper 1.4.0", + "hyper 1.4.1", "itertools", "md-5", "parking_lot", @@ -2976,7 +2976,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.4.0", + "hyper 1.4.1", "hyper-rustls 0.27.2", "hyper-util", "ipnet", @@ -3908,9 +3908,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", "serde", diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 626c365af21c..ec97dadc0438 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -72,7 +72,7 @@ log = { workspace = true } mimalloc = { version = "0.1", default-features = false } num_cpus = { workspace = true } object_store = { workspace = true, features = ["aws", "http"] } -prost = { version = "0.12", default-features = false } +prost = { version = "0.13", default-features = false } prost-derive = { version = "0.13", default-features = false } serde = { version = "1.0.136", features = ["derive"] } serde_json = { workspace = true } diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml index e5d65827cdec..bbb051450aa9 100644 --- a/datafusion/proto-common/Cargo.toml +++ b/datafusion/proto-common/Cargo.toml @@ -45,7 +45,7 @@ chrono = { workspace = true } datafusion-common = { workspace = true } object_store = { workspace = true } pbjson = { version = "0.6.0", optional = true } -prost = "0.12.0" +prost = "0.13.0" serde = { version = "1.0", optional = true } serde_json = { workspace = true, optional = true } diff --git a/datafusion/proto-common/gen/Cargo.toml b/datafusion/proto-common/gen/Cargo.toml index 54ec0e44694b..cca49dba7ed3 100644 --- a/datafusion/proto-common/gen/Cargo.toml +++ b/datafusion/proto-common/gen/Cargo.toml @@ -34,5 +34,5 @@ workspace = true [dependencies] # Pin these dependencies so that the generated output is deterministic -pbjson-build = "=0.6.2" -prost-build = "=0.12.6" +pbjson-build = "=0.7.0" +prost-build = "=0.13.1" diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 95d9e6700a50..45c24862f7ff 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -53,7 +53,7 @@ datafusion-expr = { workspace = true } datafusion-proto-common = { workspace = true } object_store = { workspace = true } pbjson = { version = "0.6.0", optional = true } -prost = "0.12.0" +prost = "0.13.0" serde = { version = "1.0", optional = true } serde_json = { workspace = true, optional = true } diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml index 401c51c94563..1dc5f7e0dddc 100644 --- a/datafusion/proto/gen/Cargo.toml +++ b/datafusion/proto/gen/Cargo.toml @@ -34,5 +34,5 @@ workspace = true [dependencies] # Pin these dependencies so that the generated output is deterministic -pbjson-build = "=0.6.2" -prost-build = "=0.12.6" +pbjson-build = "=0.7.0" +prost-build = "=0.13.1" diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 9e7ef9632ad3..731f6663b490 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -39,7 +39,7 @@ datafusion = { workspace = true, default-features = true } itertools = { workspace = true } object_store = { workspace = true } pbjson-types = "0.6" -prost = "0.12" +prost = "0.13" substrait = { version = "0.36.0", features = ["serde"] } url = { workspace = true } From c51fd50492d57d333725f1a502f6f8c7c3a061f2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 9 Jul 2024 17:20:44 -0400 Subject: [PATCH 02/36] udpate more --- datafusion-examples/Cargo.toml | 2 +- datafusion/proto-common/Cargo.toml | 2 +- datafusion/proto/Cargo.toml | 2 +- datafusion/substrait/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index ec97dadc0438..bef16ebdbc24 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -79,7 +79,7 @@ serde_json = { workspace = true } tempfile = { workspace = true } test-utils = { path = "../test-utils" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } -tonic = "0.11" +tonic = "0.12" url = { workspace = true } uuid = "1.7" diff --git a/datafusion/proto-common/Cargo.toml b/datafusion/proto-common/Cargo.toml index bbb051450aa9..2d099424f4c1 100644 --- a/datafusion/proto-common/Cargo.toml +++ b/datafusion/proto-common/Cargo.toml @@ -44,7 +44,7 @@ arrow = { workspace = true } chrono = { workspace = true } datafusion-common = { workspace = true } object_store = { workspace = true } -pbjson = { version = "0.6.0", optional = true } +pbjson = { version = "0.7.0", optional = true } prost = "0.13.0" serde = { version = "1.0", optional = true } serde_json = { workspace = true, optional = true } diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index 45c24862f7ff..7b921fc49795 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -52,7 +52,7 @@ datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } datafusion-proto-common = { workspace = true } object_store = { workspace = true } -pbjson = { version = "0.6.0", optional = true } +pbjson = { version = "0.7.0", optional = true } prost = "0.13.0" serde = { version = "1.0", optional = true } serde_json = { workspace = true, optional = true } diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 731f6663b490..8db261c0efe4 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -38,7 +38,7 @@ chrono = { workspace = true } datafusion = { workspace = true, default-features = true } itertools = { workspace = true } object_store = { workspace = true } -pbjson-types = "0.6" +pbjson-types = "0.7" prost = "0.13" substrait = { version = "0.36.0", features = ["serde"] } url = { workspace = true } From a6cadde4efac5dc06110db021653b224ef1349c6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 10 Jul 2024 06:58:37 -0400 Subject: [PATCH 03/36] Update datafusion/substrait/Cargo.toml Co-authored-by: tison --- datafusion/substrait/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 8db261c0efe4..f618f844fea7 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -40,7 +40,7 @@ itertools = { workspace = true } object_store = { workspace = true } pbjson-types = "0.7" prost = "0.13" -substrait = { version = "0.36.0", features = ["serde"] } +substrait = { version = "0.37", features = ["serde"] } url = { workspace = true } [dev-dependencies] From 7900a07e6352211138c982099c6bf402d38339bd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 10 Jul 2024 07:03:07 -0400 Subject: [PATCH 04/36] Update vendored code --- .../src/generated/datafusion_proto_common.rs | 50 +++++++++---------- datafusion/proto/src/generated/pbjson.rs | 26 ++++++++++ datafusion/proto/src/generated/prost.rs | 14 +++--- 3 files changed, 58 insertions(+), 32 deletions(-) diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index b0674ff28d75..f8b1f356a361 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -45,7 +45,7 @@ pub struct ParquetFormat { pub options: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct AvroFormat {} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -83,10 +83,10 @@ pub struct Constraints { pub constraints: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct AvroOptions {} #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct ArrowOptions {} #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -131,7 +131,7 @@ pub struct Timestamp { pub timezone: ::prost::alloc::string::String, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct Decimal { #[prost(uint32, tag = "3")] pub precision: u32, @@ -209,7 +209,7 @@ pub mod scalar_nested_value { } } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct ScalarTime32Value { #[prost(oneof = "scalar_time32_value::Value", tags = "1, 2")] pub value: ::core::option::Option, @@ -217,7 +217,7 @@ pub struct ScalarTime32Value { /// Nested message and enum types in `ScalarTime32Value`. pub mod scalar_time32_value { #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum Value { #[prost(int32, tag = "1")] Time32SecondValue(i32), @@ -226,7 +226,7 @@ pub mod scalar_time32_value { } } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct ScalarTime64Value { #[prost(oneof = "scalar_time64_value::Value", tags = "1, 2")] pub value: ::core::option::Option, @@ -234,7 +234,7 @@ pub struct ScalarTime64Value { /// Nested message and enum types in `ScalarTime64Value`. pub mod scalar_time64_value { #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum Value { #[prost(int64, tag = "1")] Time64MicrosecondValue(i64), @@ -253,7 +253,7 @@ pub struct ScalarTimestampValue { /// Nested message and enum types in `ScalarTimestampValue`. pub mod scalar_timestamp_value { #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum Value { #[prost(int64, tag = "1")] TimeMicrosecondValue(i64), @@ -274,7 +274,7 @@ pub struct ScalarDictionaryValue { pub value: ::core::option::Option<::prost::alloc::boxed::Box>, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct IntervalDayTimeValue { #[prost(int32, tag = "1")] pub days: i32, @@ -282,7 +282,7 @@ pub struct IntervalDayTimeValue { pub milliseconds: i32, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct IntervalMonthDayNanoValue { #[prost(int32, tag = "1")] pub months: i32, @@ -542,10 +542,10 @@ pub mod arrow_type { /// } /// } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct EmptyMessage {} #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct JsonWriterOptions { #[prost(enumeration = "CompressionTypeVariant", tag = "1")] pub compression: i32, @@ -636,7 +636,7 @@ pub struct CsvOptions { } /// Options controlling CSV format #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct JsonOptions { /// Compression type #[prost(enumeration = "CompressionTypeVariant", tag = "1")] @@ -692,7 +692,7 @@ pub struct ColumnOptions { /// Nested message and enum types in `ColumnOptions`. pub mod column_options { #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum BloomFilterEnabledOpt { #[prost(bool, tag = "1")] BloomFilterEnabled(bool), @@ -704,7 +704,7 @@ pub mod column_options { Encoding(::prost::alloc::string::String), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum DictionaryEnabledOpt { #[prost(bool, tag = "3")] DictionaryEnabled(bool), @@ -722,19 +722,19 @@ pub mod column_options { StatisticsEnabled(::prost::alloc::string::String), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum BloomFilterFppOpt { #[prost(double, tag = "6")] BloomFilterFpp(f64), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum BloomFilterNdvOpt { #[prost(uint64, tag = "7")] BloomFilterNdv(u64), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum MaxStatisticsSizeOpt { #[prost(uint32, tag = "8")] MaxStatisticsSize(u32), @@ -826,7 +826,7 @@ pub struct ParquetOptions { /// Nested message and enum types in `ParquetOptions`. pub mod parquet_options { #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum MetadataSizeHintOpt { #[prost(uint64, tag = "4")] MetadataSizeHint(u64), @@ -838,7 +838,7 @@ pub mod parquet_options { Compression(::prost::alloc::string::String), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum DictionaryEnabledOpt { #[prost(bool, tag = "11")] DictionaryEnabled(bool), @@ -850,13 +850,13 @@ pub mod parquet_options { StatisticsEnabled(::prost::alloc::string::String), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum MaxStatisticsSizeOpt { #[prost(uint64, tag = "14")] MaxStatisticsSize(u64), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum ColumnIndexTruncateLengthOpt { #[prost(uint64, tag = "17")] ColumnIndexTruncateLength(u64), @@ -868,13 +868,13 @@ pub mod parquet_options { Encoding(::prost::alloc::string::String), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum BloomFilterFppOpt { #[prost(double, tag = "21")] BloomFilterFpp(f64), } #[allow(clippy::derive_partial_eq_without_eq)] - #[derive(Clone, PartialEq, ::prost::Oneof)] + #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum BloomFilterNdvOpt { #[prost(uint64, tag = "22")] BloomFilterNdv(u64), diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 905f0d984955..7150e12fd065 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -12,6 +12,7 @@ impl serde::Serialize for AggLimit { let mut struct_ser = serializer.serialize_struct("datafusion.AggLimit", len)?; if self.limit != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("limit", ToString::to_string(&self.limit).as_str())?; } struct_ser.end() @@ -2545,6 +2546,7 @@ impl serde::Serialize for CopyToNode { } if !self.file_type.is_empty() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fileType", pbjson::private::base64::encode(&self.file_type).as_str())?; } if !self.partition_by.is_empty() { @@ -4132,6 +4134,7 @@ impl serde::Serialize for CustomTableScanNode { } if !self.custom_table_data.is_empty() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("customTableData", pbjson::private::base64::encode(&self.custom_table_data).as_str())?; } struct_ser.end() @@ -5216,10 +5219,12 @@ impl serde::Serialize for FileRange { let mut struct_ser = serializer.serialize_struct("datafusion.FileRange", len)?; if self.start != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("start", ToString::to_string(&self.start).as_str())?; } if self.end != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("end", ToString::to_string(&self.end).as_str())?; } struct_ser.end() @@ -6101,6 +6106,7 @@ impl serde::Serialize for GlobalLimitExecNode { } if self.fetch != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fetch", ToString::to_string(&self.fetch).as_str())?; } struct_ser.end() @@ -6536,6 +6542,7 @@ impl serde::Serialize for HashRepartition { } if self.partition_count != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("partitionCount", ToString::to_string(&self.partition_count).as_str())?; } struct_ser.end() @@ -8588,10 +8595,12 @@ impl serde::Serialize for LimitNode { } if self.skip != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("skip", ToString::to_string(&self.skip).as_str())?; } if self.fetch != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fetch", ToString::to_string(&self.fetch).as_str())?; } struct_ser.end() @@ -10053,6 +10062,7 @@ impl serde::Serialize for LogicalExtensionNode { let mut struct_ser = serializer.serialize_struct("datafusion.LogicalExtensionNode", len)?; if !self.node.is_empty() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("node", pbjson::private::base64::encode(&self.node).as_str())?; } if !self.inputs.is_empty() { @@ -12175,14 +12185,17 @@ impl serde::Serialize for PartitionStats { let mut struct_ser = serializer.serialize_struct("datafusion.PartitionStats", len)?; if self.num_rows != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("numRows", ToString::to_string(&self.num_rows).as_str())?; } if self.num_batches != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("numBatches", ToString::to_string(&self.num_batches).as_str())?; } if self.num_bytes != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("numBytes", ToString::to_string(&self.num_bytes).as_str())?; } if !self.column_stats.is_empty() { @@ -12339,10 +12352,12 @@ impl serde::Serialize for PartitionedFile { } if self.size != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("size", ToString::to_string(&self.size).as_str())?; } if self.last_modified_ns != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("lastModifiedNs", ToString::to_string(&self.last_modified_ns).as_str())?; } if !self.partition_values.is_empty() { @@ -12507,6 +12522,7 @@ impl serde::Serialize for Partitioning { match v { partitioning::PartitionMethod::RoundRobin(v) => { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("roundRobin", ToString::to_string(&v).as_str())?; } partitioning::PartitionMethod::Hash(v) => { @@ -12514,6 +12530,7 @@ impl serde::Serialize for Partitioning { } partitioning::PartitionMethod::Unknown(v) => { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("unknown", ToString::to_string(&v).as_str())?; } } @@ -13800,6 +13817,7 @@ impl serde::Serialize for PhysicalExtensionNode { let mut struct_ser = serializer.serialize_struct("datafusion.PhysicalExtensionNode", len)?; if !self.node.is_empty() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("node", pbjson::private::base64::encode(&self.node).as_str())?; } if !self.inputs.is_empty() { @@ -13914,6 +13932,7 @@ impl serde::Serialize for PhysicalHashRepartition { } if self.partition_count != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("partitionCount", ToString::to_string(&self.partition_count).as_str())?; } struct_ser.end() @@ -15130,6 +15149,7 @@ impl serde::Serialize for PhysicalScalarUdfNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } if let Some(v) = self.return_type.as_ref() { @@ -16940,6 +16960,7 @@ impl serde::Serialize for RepartitionNode { match v { repartition_node::PartitionMethod::RoundRobin(v) => { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("roundRobin", ToString::to_string(&v).as_str())?; } repartition_node::PartitionMethod::Hash(v) => { @@ -17162,6 +17183,7 @@ impl serde::Serialize for ScalarUdfExprNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } struct_ser.end() @@ -17730,6 +17752,7 @@ impl serde::Serialize for SortExecNode { } if self.fetch != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fetch", ToString::to_string(&self.fetch).as_str())?; } if self.preserve_partitioning { @@ -17999,6 +18022,7 @@ impl serde::Serialize for SortNode { } if self.fetch != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fetch", ToString::to_string(&self.fetch).as_str())?; } struct_ser.end() @@ -18127,6 +18151,7 @@ impl serde::Serialize for SortPreservingMergeExecNode { } if self.fetch != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("fetch", ToString::to_string(&self.fetch).as_str())?; } struct_ser.end() @@ -19572,6 +19597,7 @@ impl serde::Serialize for ValuesNode { let mut struct_ser = serializer.serialize_struct("datafusion.ValuesNode", len)?; if self.n_cols != 0 { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("nCols", ToString::to_string(&self.n_cols).as_str())?; } if !self.values_list.is_empty() { diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index b16d26ee6e1e..caf2452e6baa 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -231,7 +231,7 @@ pub struct HashRepartition { pub partition_count: u64, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct EmptyRelationNode { #[prost(bool, tag = "1")] pub produce_one_row: bool, @@ -435,7 +435,7 @@ pub struct UnnestNode { pub options: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct UnnestOptions { #[prost(bool, tag = "1")] pub preserve_nulls: bool, @@ -921,7 +921,7 @@ pub struct WindowFrameBound { pub bound_value: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct FixedSizeBinary { #[prost(int32, tag = "1")] pub length: i32, @@ -1471,7 +1471,7 @@ pub struct FileGroup { pub files: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct ScanLimit { /// wrap into a message to make it optional #[prost(uint32, tag = "1")] @@ -1716,7 +1716,7 @@ pub struct MaybePhysicalSortExprs { pub sort_expr: ::prost::alloc::vec::Vec, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct AggLimit { /// wrap into a message to make it optional #[prost(uint64, tag = "1")] @@ -1870,7 +1870,7 @@ pub struct JoinFilter { pub schema: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct ColumnIndex { #[prost(uint32, tag = "1")] pub index: u32, @@ -1896,7 +1896,7 @@ pub struct PartitionedFile { pub statistics: ::core::option::Option, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct FileRange { #[prost(int64, tag = "1")] pub start: i64, From 5ea5ced2bfee3a36b23c282fc7d6ff6dcafc399d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 10 Jul 2024 07:04:48 -0400 Subject: [PATCH 05/36] revert upgrade in datafusion-examples until arrow-flight is updated --- datafusion-examples/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index bef16ebdbc24..626c365af21c 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -72,14 +72,14 @@ log = { workspace = true } mimalloc = { version = "0.1", default-features = false } num_cpus = { workspace = true } object_store = { workspace = true, features = ["aws", "http"] } -prost = { version = "0.13", default-features = false } +prost = { version = "0.12", default-features = false } prost-derive = { version = "0.13", default-features = false } serde = { version = "1.0.136", features = ["derive"] } serde_json = { workspace = true } tempfile = { workspace = true } test-utils = { path = "../test-utils" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } -tonic = "0.12" +tonic = "0.11" url = { workspace = true } uuid = "1.7" From 669498364371de08b410fcb3f9691350c429f3fb Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:04:24 -0400 Subject: [PATCH 06/36] Pin to pre-release arrow-rs --- Cargo.toml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 02b1f1ccd92a..23a02ae5090e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -164,3 +164,20 @@ large_futures = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_imports = "deny" + + +## Temporary arrow-rs patch until 53.0.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } From 6cc5db13cbcf51f24995cbfaec49398407226df2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:06:05 -0400 Subject: [PATCH 07/36] update pyo3 --- datafusion/common/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 8435d0632576..e5a657b88730 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -61,7 +61,7 @@ num_cpus = { workspace = true } object_store = { workspace = true, optional = true } parquet = { workspace = true, optional = true, default-features = true } paste = "1.0.15" -pyo3 = { version = "0.21.0", optional = true } +pyo3 = { version = "0.22.0", optional = true } sqlparser = { workspace = true } [target.'cfg(target_family = "wasm")'.dependencies] From 68b6e6c12d03f84866850f3f097a7812e9bc543e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:11:34 -0400 Subject: [PATCH 08/36] Update to use new arrow apis --- .../core/src/datasource/file_format/parquet.rs | 2 +- .../physical_plan/parquet/page_filter.rs | 5 ++++- .../physical_plan/parquet/row_group_filter.rs | 16 ++++++++++++++-- datafusion/functions/src/regex/regexpreplace.rs | 3 +-- .../physical-expr-common/src/binary_view_map.rs | 2 +- datafusion/physical-plan/src/coalesce_batches.rs | 5 +++-- 6 files changed, 24 insertions(+), 9 deletions(-) diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index f233f3842c8c..125688a38b00 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -1999,7 +1999,7 @@ mod tests { // test result in int_col let int_col_index = page_index.get(4).unwrap(); - let int_col_offset = offset_index.get(4).unwrap(); + let int_col_offset = offset_index.get(4).unwrap().page_locations(); // 325 pages in int_col assert_eq!(int_col_offset.len(), 325); diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs index e4d26a460ecd..4e71993b5153 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs @@ -392,13 +392,16 @@ impl<'a> PagesPruningStatistics<'a> { trace!("No page offsets for row group {row_group_index}, skipping"); return None; }; - let Some(page_offsets) = row_group_page_offsets.get(parquet_column_index) else { + let Some(offset_index_metadata) = + row_group_page_offsets.get(parquet_column_index) + else { trace!( "No page offsets for column {:?} in row group {row_group_index}, skipping", converter.arrow_field() ); return None; }; + let page_offsets = offset_index_metadata.page_locations(); Some(Self { row_group_index, diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index 6a6910748fc8..6def5e6270ad 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -487,11 +487,23 @@ mod tests { let schema_descr = get_test_schema_descr(vec![field]); let rgm1 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::int32(Some(1), Some(10), None, 0, false)], + vec![ParquetStatistics::int32( + Some(1), + Some(10), + None, + Some(0), + false, + )], ); let rgm2 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)], + vec![ParquetStatistics::int32( + Some(11), + Some(20), + None, + Some(0), + false, + )], ); let metrics = parquet_file_metrics(); diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index d28c6cd36d65..0b0f7287e1ec 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -401,8 +401,7 @@ fn _regexp_replace_static_pattern_replace( DataType::Utf8View => { let string_view_array = as_string_view_array(&args[0])?; - let mut builder = StringViewBuilder::with_capacity(string_view_array.len()) - .with_block_size(1024 * 1024 * 2); + let mut builder = StringViewBuilder::with_capacity(string_view_array.len()); for val in string_view_array.iter() { if let Some(val) = val { diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs index 18bc6801aa60..bdcf7bbacc69 100644 --- a/datafusion/physical-expr-common/src/binary_view_map.rs +++ b/datafusion/physical-expr-common/src/binary_view_map.rs @@ -149,7 +149,7 @@ where output_type, map: hashbrown::raw::RawTable::with_capacity(INITIAL_MAP_CAPACITY), map_size: 0, - builder: GenericByteViewBuilder::new().with_block_size(2 * 1024 * 1024), + builder: GenericByteViewBuilder::new(), random_state: RandomState::new(), hashes_buffer: vec![], null: None, diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 5589027694fe..34b5a0d997cd 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -535,7 +535,7 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch { // See https://github.com/apache/arrow-rs/issues/6094 for more details. let mut builder = StringViewBuilder::with_capacity(s.len()); if ideal_buffer_size > 0 { - builder = builder.with_block_size(ideal_buffer_size as u32); + builder = builder.with_fixed_block_size(ideal_buffer_size as u32); } for v in s.iter() { @@ -856,7 +856,8 @@ mod tests { impl StringViewTest { /// Create a `StringViewArray` with the parameters specified in this struct fn build(self) -> StringViewArray { - let mut builder = StringViewBuilder::with_capacity(100).with_block_size(8192); + let mut builder = + StringViewBuilder::with_capacity(100).with_fixed_block_size(8192); loop { for &v in self.strings.iter() { builder.append_option(v); From aa310fe4b06c697dbc358ab15e1a2f5fbbc6d299 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:13:58 -0400 Subject: [PATCH 09/36] update for new api --- .../physical_plan/parquet/row_group_filter.rs | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index 6def5e6270ad..27c2151baf27 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -532,11 +532,11 @@ mod tests { let schema_descr = get_test_schema_descr(vec![field]); let rgm1 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::int32(None, None, None, 0, false)], + vec![ParquetStatistics::int32(None, None, None, Some(0), false)], ); let rgm2 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::int32(Some(11), Some(20), None, 0, false)], + vec![ParquetStatistics::int32(Some(11), Some(20), None, Some(0), false)], ); let metrics = parquet_file_metrics(); // missing statistics for first row group mean that the result from the predicate expression @@ -572,15 +572,15 @@ mod tests { let rgm1 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(1), Some(10), None, 0, false), - ParquetStatistics::int32(Some(1), Some(10), None, 0, false), + ParquetStatistics::int32(Some(1), Some(10), None, Some(0), false), + ParquetStatistics::int32(Some(1), Some(10), None, Some(0), false), ], ); let rgm2 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(11), Some(20), None, 0, false), - ParquetStatistics::int32(Some(11), Some(20), None, 0, false), + ParquetStatistics::int32(Some(11), Some(20), None, Some(0), false), + ParquetStatistics::int32(Some(11), Some(20), None, Some(0), false), ], ); @@ -645,16 +645,16 @@ mod tests { let rgm1 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(-10), Some(-1), None, 0, false), // c2 - ParquetStatistics::int32(Some(1), Some(10), None, 0, false), + ParquetStatistics::int32(Some(-10), Some(-1), None, Some(0), false), // c2 + ParquetStatistics::int32(Some(1), Some(10), None, Some(0), false), ], ); // rg1 has c2 greater than zero, c1 less than zero let rgm2 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(1), Some(10), None, 0, false), - ParquetStatistics::int32(Some(-10), Some(-1), None, 0, false), + ParquetStatistics::int32(Some(1), Some(10), None, Some(0), false), + ParquetStatistics::int32(Some(-10), Some(-1), None, Some(0), false), ], ); @@ -681,15 +681,15 @@ mod tests { let rgm1 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(1), Some(10), None, 0, false), - ParquetStatistics::boolean(Some(false), Some(true), None, 0, false), + ParquetStatistics::int32(Some(1), Some(10), None, Some(0), false), + ParquetStatistics::boolean(Some(false), Some(true), None, Some(0), false), ], ); let rgm2 = get_row_group_meta_data( &schema_descr, vec![ - ParquetStatistics::int32(Some(11), Some(20), None, 0, false), - ParquetStatistics::boolean(Some(false), Some(true), None, 1, false), + ParquetStatistics::int32(Some(11), Some(20), None, Some(0), false), + ParquetStatistics::boolean(Some(false), Some(true), None, Some(1), false), ], ); vec![rgm1, rgm2] @@ -787,7 +787,7 @@ mod tests { Some(100), Some(600), None, - 0, + Some(0), false, )], ); @@ -795,13 +795,13 @@ mod tests { &schema_descr, // [0.1, 0.2] // c1 > 5, this row group will not be included in the results. - vec![ParquetStatistics::int32(Some(10), Some(20), None, 0, false)], + vec![ParquetStatistics::int32(Some(10), Some(20), None, Some(0), false)], ); let rgm3 = get_row_group_meta_data( &schema_descr, // [1, None] // c1 > 5, this row group can not be filtered out, so will be included in the results. - vec![ParquetStatistics::int32(Some(100), None, None, 0, false)], + vec![ParquetStatistics::int32(Some(100), None, None, Some(0), false)], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(3)); @@ -849,7 +849,7 @@ mod tests { Some(100), Some(600), None, - 0, + Some(0), false, )], ); @@ -857,19 +857,19 @@ mod tests { &schema_descr, // [10, 20] // c1 > 5, this row group will be included in the results. - vec![ParquetStatistics::int32(Some(10), Some(20), None, 0, false)], + vec![ParquetStatistics::int32(Some(10), Some(20), None, Some(0), false)], ); let rgm3 = get_row_group_meta_data( &schema_descr, // [0, 2] // c1 > 5, this row group will not be included in the results. - vec![ParquetStatistics::int32(Some(0), Some(2), None, 0, false)], + vec![ParquetStatistics::int32(Some(0), Some(2), None, Some(0), false)], ); let rgm4 = get_row_group_meta_data( &schema_descr, // [None, 2] // c1 > 5, this row group can not be filtered out, so will be included in the results. - vec![ParquetStatistics::int32(None, Some(2), None, 0, false)], + vec![ParquetStatistics::int32(None, Some(2), None, Some(0), false)], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(4)); @@ -908,19 +908,19 @@ mod tests { Some(600), Some(800), None, - 0, + Some(0), false, )], ); let rgm2 = get_row_group_meta_data( &schema_descr, // [0.1, 0.2] - vec![ParquetStatistics::int64(Some(10), Some(20), None, 0, false)], + vec![ParquetStatistics::int64(Some(10), Some(20), None, Some(0), false)], ); let rgm3 = get_row_group_meta_data( &schema_descr, // [0.1, 0.2] - vec![ParquetStatistics::int64(None, None, None, 0, false)], + vec![ParquetStatistics::int64(None, None, None, Some(0), false)], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(3)); @@ -969,7 +969,7 @@ mod tests { 8000i128.to_be_bytes().to_vec(), ))), None, - 0, + Some(0), false, )], ); @@ -985,7 +985,7 @@ mod tests { 20000i128.to_be_bytes().to_vec(), ))), None, - 0, + Some(0), false, )], ); @@ -993,7 +993,7 @@ mod tests { let rgm3 = get_row_group_meta_data( &schema_descr, vec![ParquetStatistics::fixed_len_byte_array( - None, None, None, 0, false, + None, None, None, Some(0), false, )], ); let metrics = parquet_file_metrics(); @@ -1039,7 +1039,7 @@ mod tests { // 80.00 Some(ByteArray::from(8000i128.to_be_bytes().to_vec())), None, - 0, + Some(0), false, )], ); @@ -1051,13 +1051,13 @@ mod tests { // 200.00 Some(ByteArray::from(20000i128.to_be_bytes().to_vec())), None, - 0, + Some(0), false, )], ); let rgm3 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::byte_array(None, None, None, 0, false)], + vec![ParquetStatistics::byte_array(None, None, None, Some(0), false)], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(3)); From f454e89613c67aceb0ca577e7ece9bcfac4c5889 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:14:06 -0400 Subject: [PATCH 10/36] Update tonic in examples --- datafusion-examples/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 626c365af21c..18cdb34a6ea7 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -79,7 +79,7 @@ serde_json = { workspace = true } tempfile = { workspace = true } test-utils = { path = "../test-utils" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } -tonic = "0.11" +tonic = "0.12.1" url = { workspace = true } uuid = "1.7" From 98bb11a2273b171e6d9c13d5e85b43d13402c6c6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:16:43 -0400 Subject: [PATCH 11/36] update prost --- datafusion-examples/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index 18cdb34a6ea7..3334ac29da77 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -72,8 +72,8 @@ log = { workspace = true } mimalloc = { version = "0.1", default-features = false } num_cpus = { workspace = true } object_store = { workspace = true, features = ["aws", "http"] } -prost = { version = "0.12", default-features = false } -prost-derive = { version = "0.13", default-features = false } +prost = { version = "0.13.1", default-features = false } +prost-derive = { version = "0.13.1", default-features = false } serde = { version = "1.0.136", features = ["derive"] } serde_json = { workspace = true } tempfile = { workspace = true } From 5b0fa44d31055e61881da234a6fa9bdc33ae012f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:36:22 -0400 Subject: [PATCH 12/36] update datafusion-cli/cargo --- datafusion-cli/Cargo.lock | 142 +++++++++++++++++++------------------- datafusion-cli/Cargo.toml | 18 +++++ 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 90995c1d116a..a03960f02ba8 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -131,8 +131,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-arith", "arrow-array", @@ -152,8 +151,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -167,8 +165,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "ahash", "arrow-buffer", @@ -184,8 +181,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "bytes", "half", @@ -195,8 +191,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -216,8 +211,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,8 +229,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-buffer", "arrow-schema", @@ -247,8 +240,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -262,8 +254,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -272,7 +263,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.3.0", + "indexmap 2.4.0", "lexical-core", "num", "serde", @@ -282,8 +273,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,8 +287,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "ahash", "arrow-array", @@ -311,14 +300,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" [[package]] name = "arrow-select" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "ahash", "arrow-array", @@ -331,8 +318,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "arrow-array", "arrow-buffer", @@ -875,12 +861,13 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.10" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292" +checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" dependencies = [ "jobserver", "libc", + "shlex", ] [[package]] @@ -1163,7 +1150,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.5", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.12.1", "log", "num-traits", @@ -1386,7 +1373,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.5", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.12.1", "log", "paste", @@ -1415,7 +1402,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "hex", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.12.1", "log", "paste", @@ -1483,7 +1470,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.5", - "indexmap 2.3.0", + "indexmap 2.4.0", "itertools 0.12.1", "log", "once_cell", @@ -1848,7 +1835,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.3.0", + "indexmap 2.4.0", "slab", "tokio", "tokio-util", @@ -1867,7 +1854,7 @@ dependencies = [ "futures-core", "futures-sink", "http 1.1.0", - "indexmap 2.3.0", + "indexmap 2.4.0", "slab", "tokio", "tokio-util", @@ -2159,9 +2146,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.3.0" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -2226,9 +2213,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] @@ -2305,9 +2292,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.155" +version = "0.2.156" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" [[package]] name = "libflate" @@ -2668,8 +2655,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" dependencies = [ "ahash", "arrow-array", @@ -2729,7 +2715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.3.0", + "indexmap 2.4.0", ] [[package]] @@ -3388,18 +3374,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5665e14a49a4ea1b91029ba7d3bca9f299e1f7cfa194388ccc20f14743e784f2" +checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.207" +version = "1.0.208" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6aea2634c86b0e8ef2cfdc0c340baede54ec27b1e46febd7f80dffb2aa44a00e" +checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", @@ -3408,9 +3394,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.124" +version = "1.0.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66ad62847a56b3dba58cc891acd13884b9c61138d330c0d7b6181713d4fce38d" +checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" dependencies = [ "itoa", "memchr", @@ -3441,6 +3427,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -3847,15 +3839,15 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -4057,19 +4049,20 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", @@ -4082,9 +4075,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" dependencies = [ "cfg-if", "js-sys", @@ -4094,9 +4087,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4104,9 +4097,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", @@ -4117,9 +4110,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "wasm-streams" @@ -4136,9 +4129,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", @@ -4433,10 +4426,15 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", ] + +[[patch.unused]] +name = "arrow-flight" +version = "52.2.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index cbd9ffd0feba..564ad378e034 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -62,3 +62,21 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.17" + + + +## Temporary arrow-rs patch until 53.0.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } From 6f501bcd677d22c185b3ef2e65e43a9bc0eda4c5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:40:18 -0400 Subject: [PATCH 13/36] update test output --- datafusion/sqllogictest/test_files/aggregate.slt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt index 0cda24d6ff5e..8d3d00c9b3cf 100644 --- a/datafusion/sqllogictest/test_files/aggregate.slt +++ b/datafusion/sqllogictest/test_files/aggregate.slt @@ -1826,7 +1826,7 @@ from values (interval '2 month 15 days'), (interval '-2 month') ---- -Interval(MonthDayNano) 0 years -2 mons 0 days 0 hours 0 mins 0.000000000 secs 0 years 2 mons 15 days 0 hours 0 mins 0.000000000 secs +Interval(MonthDayNano) -2 mons 2 mons 15 days # aggregate Interval(DayTime) min/max query T?? @@ -1837,7 +1837,7 @@ from values (arrow_cast('-3 minutes', 'Interval(DayTime)')), (arrow_cast('30 minutes', 'Interval(DayTime)')); ---- -Interval(DayTime) 0 years 0 mons 0 days 0 hours -3 mins 0.000 secs 0 years 0 mons 0 days 1 hours 0 mins 0.000 secs +Interval(DayTime) -3 mins 1 hours # aggregate Interval(YearMonth) min/max query T?? @@ -1848,7 +1848,7 @@ from values (arrow_cast('13 months', 'Interval(YearMonth)')), (arrow_cast('1 year', 'Interval(YearMonth)')); ---- -Interval(YearMonth) -1 years 0 mons 0 days 0 hours 0 mins 0.00 secs 1 years 1 mons 0 days 0 hours 0 mins 0.00 secs +Interval(YearMonth) -1 years 0 mons 1 years 1 mons # aggregate query II From 3542aea36500d85930f80a95b148a459fec8156a Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:41:23 -0400 Subject: [PATCH 14/36] update --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 448706744305..73183b60675a 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -289,22 +289,22 @@ query ? --- select arrow_cast(interval '30 minutes', 'Interval(MonthDayNano)'); ---- -0 years 0 mons 0 days 0 hours 30 mins 0.000000000 secs +30 mins query ? select arrow_cast('30 minutes', 'Interval(DayTime)'); ---- -0 years 0 mons 0 days 0 hours 30 mins 0.000 secs +30 mins query ? select arrow_cast('1 year 5 months', 'Interval(YearMonth)'); ---- -1 years 5 mons 0 days 0 hours 0 mins 0.00 secs +1 years 5 mons query ? select arrow_cast('30 minutes', 'Interval(MonthDayNano)'); ---- -0 years 0 mons 0 days 0 hours 30 mins 0.000000000 secs +30 mins ## Duration From e6416c367d2811e740e45143d073e4de37b14519 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:41:59 -0400 Subject: [PATCH 15/36] updates --- .../sqllogictest/test_files/interval.slt | 102 +++++++++--------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt index afb262cf95a5..077f38d5d5bb 100644 --- a/datafusion/sqllogictest/test_files/interval.slt +++ b/datafusion/sqllogictest/test_files/interval.slt @@ -45,250 +45,250 @@ Interval(MonthDayNano) Interval(MonthDayNano) query ? select interval '5' years ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs # check all different kinds of intervals query ? select interval '5' year ---- -0 years 60 mons 0 days 0 hours 0 mins 0.000000000 secs +60 mons query ? select interval '5' month ---- -0 years 5 mons 0 days 0 hours 0 mins 0.000000000 secs +5 mons query ? select interval '5' months ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? select interval '5' week ---- -0 years 0 mons 35 days 0 hours 0 mins 0.000000000 secs +35 days query ? select interval '5' day ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000000 secs +5 days query ? select interval '5' hour ---- -0 years 0 mons 0 days 5 hours 0 mins 0.000000000 secs +5 hours ## This seems wrong (5 mons) query ? select interval '5' hours ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? select interval '5' minute ---- -0 years 0 mons 0 days 0 hours 5 mins 0.000000000 secs +5 mins query ? select interval '5' second ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? select interval '5' millisecond ---- -0 years 0 mons 0 days 0 hours 0 mins 0.005000000 secs +0.005000000 secs query ? select interval '5' milliseconds ---- -0 years 0 mons 0 days 0 hours 0 mins 0.005000000 secs +0.005000000 secs query ? select interval '5' microsecond ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000005000 secs +0.000005000 secs query ? select interval '5' microseconds ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000005000 secs +0.000005000 secs query ? select interval '5' nanosecond ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000005 secs +0.000000005 secs query ? select interval '5' nanoseconds ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000005 secs +0.000000005 secs query ? select interval '5 YEAR' ---- -0 years 60 mons 0 days 0 hours 0 mins 0.000000000 secs +60 mons query ? select interval '5 MONTH' ---- -0 years 5 mons 0 days 0 hours 0 mins 0.000000000 secs +5 mons query ? select interval '5 WEEK' ---- -0 years 0 mons 35 days 0 hours 0 mins 0.000000000 secs +35 days query ? select interval '5 DAY' ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000000 secs +5 days query ? select interval '5 HOUR' ---- -0 years 0 mons 0 days 5 hours 0 mins 0.000000000 secs +5 hours query ? select interval '5 HOURS' ---- -0 years 0 mons 0 days 5 hours 0 mins 0.000000000 secs +5 hours query ? select interval '5 MINUTE' ---- -0 years 0 mons 0 days 0 hours 5 mins 0.000000000 secs +5 mins query ? select interval '5 SECOND' ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? select interval '5 SECONDS' ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? select interval '5 MILLISECOND' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.005000000 secs +0.005000000 secs query ? select interval '5 MILLISECONDS' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.005000000 secs +0.005000000 secs query ? select interval '5 MICROSECOND' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000005000 secs +0.000005000 secs query ? select interval '5 MICROSECONDS' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000005000 secs +0.000005000 secs query ? select interval '5 NANOSECOND' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000005 secs +0.000000005 secs query ? select interval '5 NANOSECONDS' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.000000005 secs +0.000000005 secs query ? select interval '5 YEAR 5 MONTH 5 DAY 5 HOUR 5 MINUTE 5 SECOND 5 MILLISECOND 5 MICROSECOND 5 NANOSECOND' ---- -0 years 65 mons 5 days 5 hours 5 mins 5.005005005 secs +65 mons 5 days 5 hours 5 mins 5.005005005 secs # Interval with string literal addition query ? select interval '1 month' + '1 month' ---- -0 years 2 mons 0 days 0 hours 0 mins 0.000000000 secs +2 mons # Interval with string literal addition and leading field query ? select interval '1' + '1' month ---- -0 years 2 mons 0 days 0 hours 0 mins 0.000000000 secs +2 mons # Interval with nested string literal addition query ? select interval '1 month' + '1 month' + '1 month' ---- -0 years 3 mons 0 days 0 hours 0 mins 0.000000000 secs +3 mons # Interval with nested string literal addition and leading field query ? select interval '1' + '1' + '1' month ---- -0 years 3 mons 0 days 0 hours 0 mins 0.000000000 secs +3 mons # Interval mega nested string literal addition query ? select interval '1 year' + '1 month' + '1 day' + '1 hour' + '1 minute' + '1 second' + '1 millisecond' + '1 microsecond' + '1 nanosecond' ---- -0 years 13 mons 1 days 1 hours 1 mins 1.001001001 secs +13 mons 1 days 1 hours 1 mins 1.001001001 secs # Interval with string literal subtraction query ? select interval '1 month' - '1 day'; ---- -0 years 1 mons -1 days 0 hours 0 mins 0.000000000 secs +1 mons -1 days # Interval with string literal subtraction and leading field query ? select interval '5' - '1' - '2' year; ---- -0 years 24 mons 0 days 0 hours 0 mins 0.000000000 secs +24 mons # Interval with nested string literal subtraction query ? select interval '1 month' - '1 day' - '1 hour'; ---- -0 years 1 mons -1 days -1 hours 0 mins 0.000000000 secs +1 mons -1 days -1 hours # Interval with nested string literal subtraction and leading field query ? select interval '10' - '1' - '1' month; ---- -0 years 8 mons 0 days 0 hours 0 mins 0.000000000 secs +8 mons # Interval mega nested string literal subtraction query ? select interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond' ---- -0 years 11 mons -1 days -1 hours -1 mins -1.001001001 secs +11 mons -1 days -1 hours -1 mins -1.001001001 secs # Interval with string literal negation and leading field query ? select -interval '5' - '1' - '2' year; ---- -0 years -96 mons 0 days 0 hours 0 mins 0.000000000 secs +-96 mons # Interval with nested string literal negation query ? select -interval '1 month' + '1 day' + '1 hour'; ---- -0 years -1 mons 1 days 1 hours 0 mins 0.000000000 secs +-1 mons 1 days 1 hours # Interval with nested string literal negation and leading field query ? select -interval '10' - '1' - '1' month; ---- -0 years -12 mons 0 days 0 hours 0 mins 0.000000000 secs +-12 mons # Interval mega nested string literal negation query ? select -interval '1 year' - '1 month' - '1 day' - '1 hour' - '1 minute' - '1 second' - '1 millisecond' - '1 microsecond' - '1 nanosecond' ---- -0 years -13 mons -1 days -1 hours -1 mins -1.001001001 secs +-13 mons -1 days -1 hours -1 mins -1.001001001 secs # Interval string literal + date query D @@ -343,7 +343,7 @@ select arrow_typeof(i) from t; ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000003 secs Interval(MonthDayNano) +5 days 0.000000003 secs Interval(MonthDayNano) statement ok @@ -359,8 +359,8 @@ insert into t values ('6 days 7 nanoseconds'::interval) query ? rowsort select -i from t order by 1; ---- -0 years 0 mons -5 days 0 hours 0 mins -0.000000003 secs -0 years 0 mons -6 days 0 hours 0 mins -0.000000007 secs +-5 days -0.000000003 secs +-6 days -0.000000007 secs query ?T rowsort select @@ -368,8 +368,8 @@ select arrow_typeof(i) from t; ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000003 secs Interval(MonthDayNano) -0 years 0 mons 6 days 0 hours 0 mins 0.000000007 secs Interval(MonthDayNano) +5 days 0.000000003 secs Interval(MonthDayNano) +6 days 0.000000007 secs Interval(MonthDayNano) statement ok drop table t; From 3a0648854011410362af53f5c40dc2a5546ce99b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:42:36 -0400 Subject: [PATCH 16/36] updates --- datafusion/sqllogictest/test_files/repartition_scan.slt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt index 6b9cb521f5f8..4c86312f9e51 100644 --- a/datafusion/sqllogictest/test_files/repartition_scan.slt +++ b/datafusion/sqllogictest/test_files/repartition_scan.slt @@ -61,7 +61,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..104], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:104..208], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:208..312], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:312..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..87], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:87..174], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:174..261], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:261..347]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] # disable round robin repartitioning statement ok @@ -77,7 +77,7 @@ logical_plan physical_plan 01)CoalesceBatchesExec: target_batch_size=8192 02)--FilterExec: column1@0 != 42 -03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..104], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:104..208], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:208..312], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:312..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +03)----ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..87], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:87..174], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:174..261], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:261..347]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] # enable round robin repartitioning again statement ok @@ -102,7 +102,7 @@ physical_plan 02)--SortExec: expr=[column1@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----CoalesceBatchesExec: target_batch_size=8192 04)------FilterExec: column1@0 != 42 -05)--------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..205], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:205..405, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..5], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:5..210], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:210..414]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +05)--------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..172], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:172..338, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..6], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:6..178], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:178..347]]}, projection=[column1], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] ## Read the files as though they are ordered @@ -138,7 +138,7 @@ physical_plan 01)SortPreservingMergeExec: [column1@0 ASC NULLS LAST] 02)--CoalesceBatchesExec: target_batch_size=8192 03)----FilterExec: column1@0 != 42 -04)------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..202], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..207], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:207..414], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:202..405]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] +04)------ParquetExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:0..169], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:0..173], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/2.parquet:173..347], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition_scan/parquet_table/1.parquet:169..338]]}, projection=[column1], output_ordering=[column1@0 ASC NULLS LAST], predicate=column1@0 != 42, pruning_predicate=CASE WHEN column1_null_count@2 = column1_row_count@3 THEN false ELSE column1_min@0 != 42 OR 42 != column1_max@1 END, required_guarantees=[column1 not in (42)] # Cleanup statement ok From 5d7b0fe3f5d145bc92e9446a8b539d3923e6ecfd Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:45:42 -0400 Subject: [PATCH 17/36] update math --- datafusion/sqllogictest/test_files/math.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt index 6884d762612d..eece56942317 100644 --- a/datafusion/sqllogictest/test_files/math.slt +++ b/datafusion/sqllogictest/test_files/math.slt @@ -673,7 +673,7 @@ query error DataFusion error: Arrow error: Compute error: Signed integer overflo select lcm(2, 9223372036854775803); -query error DataFusion error: Arrow error: Compute error: Overflow happened on: 2107754225 \^ 1221660777 +query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 2107754225 \^ 1221660777 select power(2107754225, 1221660777); # factorial overflow From cb623d9f2e011ae3ee5fe30ef455c7c207803d12 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:46:21 -0400 Subject: [PATCH 18/36] update more --- datafusion/sqllogictest/test_files/ddl.slt | 2 +- datafusion/sqllogictest/test_files/expr.slt | 80 +++++++++---------- .../sqllogictest/test_files/timestamps.slt | 16 ++-- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index a35e688479e7..c6b718d91831 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -707,7 +707,7 @@ create table t (i interval, x int) as values (interval '5 days 3 nanoseconds', C query ?I select * from t; ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000003 secs 1 +5 days 0.000000003 secs 1 statement ok drop table t; diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 3c3b0631e3ff..f893e38de657 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -122,203 +122,203 @@ SELECT query ? SELECT interval '1' ---- -0 years 0 mons 0 days 0 hours 0 mins 1.000000000 secs +1.000000000 secs query ? SELECT interval '1 second' ---- -0 years 0 mons 0 days 0 hours 0 mins 1.000000000 secs +1.000000000 secs query ? SELECT interval '500 milliseconds' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.500000000 secs +0.500000000 secs query ? SELECT interval '5 second' ---- -0 years 0 mons 0 days 0 hours 0 mins 5.000000000 secs +5.000000000 secs query ? SELECT interval '0.5 minute' ---- -0 years 0 mons 0 days 0 hours 0 mins 30.000000000 secs +30.000000000 secs query ? SELECT interval '.5 minute' ---- -0 years 0 mons 0 days 0 hours 0 mins 30.000000000 secs +30.000000000 secs query ? SELECT interval '5 minute' ---- -0 years 0 mons 0 days 0 hours 5 mins 0.000000000 secs +5 mins query ? SELECT interval '5 minute 1 second' ---- -0 years 0 mons 0 days 0 hours 5 mins 1.000000000 secs +5 mins 1.000000000 secs query ? SELECT interval '1 hour' ---- -0 years 0 mons 0 days 1 hours 0 mins 0.000000000 secs +1 hours query ? SELECT interval '5 hour' ---- -0 years 0 mons 0 days 5 hours 0 mins 0.000000000 secs +5 hours query ? SELECT interval '1 day' ---- -0 years 0 mons 1 days 0 hours 0 mins 0.000000000 secs +1 days query ? SELECT interval '1 week' ---- -0 years 0 mons 7 days 0 hours 0 mins 0.000000000 secs +7 days query ? SELECT interval '2 weeks' ---- -0 years 0 mons 14 days 0 hours 0 mins 0.000000000 secs +14 days query ? SELECT interval '1 day 1' ---- -0 years 0 mons 1 days 0 hours 0 mins 1.000000000 secs +1 days 1.000000000 secs query ? SELECT interval '0.5' ---- -0 years 0 mons 0 days 0 hours 0 mins 0.500000000 secs +0.500000000 secs query ? SELECT interval '0.5 day 1' ---- -0 years 0 mons 0 days 12 hours 0 mins 1.000000000 secs +12 hours 1.000000000 secs query ? SELECT interval '0.49 day' ---- -0 years 0 mons 0 days 11 hours 45 mins 36.000000000 secs +11 hours 45 mins 36.000000000 secs query ? SELECT interval '0.499 day' ---- -0 years 0 mons 0 days 11 hours 58 mins 33.600000000 secs +11 hours 58 mins 33.600000000 secs query ? SELECT interval '0.4999 day' ---- -0 years 0 mons 0 days 11 hours 59 mins 51.360000000 secs +11 hours 59 mins 51.360000000 secs query ? SELECT interval '0.49999 day' ---- -0 years 0 mons 0 days 11 hours 59 mins 59.136000000 secs +11 hours 59 mins 59.136000000 secs query ? SELECT interval '0.49999999999 day' ---- -0 years 0 mons 0 days 11 hours 59 mins 59.999999136 secs +11 hours 59 mins 59.999999136 secs query ? SELECT interval '5 day' ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000000 secs +5 days # Hour is ignored, this matches PostgreSQL query ? SELECT interval '5 day' hour ---- -0 years 0 mons 5 days 0 hours 0 mins 0.000000000 secs +5 days query ? SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds' ---- -0 years 0 mons 5 days 4 hours 3 mins 2.100000000 secs +5 days 4 hours 3 mins 2.100000000 secs query ? SELECT interval '0.5 month' ---- -0 years 0 mons 15 days 0 hours 0 mins 0.000000000 secs +15 days query ? SELECT interval '0.5' month ---- -0 years 0 mons 15 days 0 hours 0 mins 0.000000000 secs +15 days query ? SELECT interval '1 month' ---- -0 years 1 mons 0 days 0 hours 0 mins 0.000000000 secs +1 mons query ? SELECT interval '1' MONTH ---- -0 years 1 mons 0 days 0 hours 0 mins 0.000000000 secs +1 mons query ? SELECT interval '5 month' ---- -0 years 5 mons 0 days 0 hours 0 mins 0.000000000 secs +5 mons query ? SELECT interval '13 month' ---- -0 years 13 mons 0 days 0 hours 0 mins 0.000000000 secs +13 mons query ? SELECT interval '0.5 year' ---- -0 years 6 mons 0 days 0 hours 0 mins 0.000000000 secs +6 mons query ? SELECT interval '1 year' ---- -0 years 12 mons 0 days 0 hours 0 mins 0.000000000 secs +12 mons query ? SELECT interval '1 decade' ---- -0 years 120 mons 0 days 0 hours 0 mins 0.000000000 secs +120 mons query ? SELECT interval '2 decades' ---- -0 years 240 mons 0 days 0 hours 0 mins 0.000000000 secs +240 mons query ? SELECT interval '1 century' ---- -0 years 1200 mons 0 days 0 hours 0 mins 0.000000000 secs +1200 mons query ? SELECT interval '2 year' ---- -0 years 24 mons 0 days 0 hours 0 mins 0.000000000 secs +24 mons query ? SELECT interval '1 year 1 day' ---- -0 years 12 mons 1 days 0 hours 0 mins 0.000000000 secs +12 mons 1 days query ? SELECT interval '1 year 1 day 1 hour' ---- -0 years 12 mons 1 days 1 hours 0 mins 0.000000000 secs +12 mons 1 days 1 hours query ? SELECT interval '1 year 1 day 1 hour 1 minute' ---- -0 years 12 mons 1 days 1 hours 1 mins 0.000000000 secs +12 mons 1 days 1 hours 1 mins query ? SELECT interval '1 year 1 day 1 hour 1 minute 1 second' ---- -0 years 12 mons 1 days 1 hours 1 mins 1.000000000 secs +12 mons 1 days 1 hours 1 mins 1.000000000 secs query I SELECT ascii('') diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index fb0fd8397f2d..4b11e338da70 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -1509,19 +1509,19 @@ SELECT val, ts1 - ts2 FROM foo ORDER BY ts2 - ts1; query ? SELECT i1 - i2 FROM bar; ---- -0 years 0 mons -1 days 0 hours 0 mins 0.000000000 secs -0 years 2 mons -13 days 0 hours 0 mins 0.000000000 secs -0 years 0 mons 1 days 2 hours 56 mins 0.000000000 secs -0 years 0 mons 1 days 0 hours 0 mins -3.999999993 secs +-1 days +2 mons -13 days +1 days 2 hours 56 mins +1 days -3.999999993 secs # Interval + Interval query ? SELECT i1 + i2 FROM bar; ---- -0 years 0 mons 3 days 0 hours 0 mins 0.000000000 secs -0 years 2 mons 13 days 0 hours 0 mins 0.000000000 secs -0 years 0 mons 1 days 3 hours 4 mins 0.000000000 secs -0 years 0 mons 1 days 0 hours 0 mins 4.000000007 secs +3 days +2 mons 13 days +1 days 3 hours 4 mins +1 days 4.000000007 secs # Timestamp - Interval query P From c2a6bf50881b0874774b424df9003a706541b413 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 08:49:05 -0400 Subject: [PATCH 19/36] fix scalar tests --- datafusion/common/src/scalar/mod.rs | 6 +- .../physical_plan/parquet/row_group_filter.rs | 70 ++++++++++++++++--- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 677685b2c65b..16df8e6f05e0 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -4331,7 +4331,7 @@ mod tests { .strip_backtrace(); assert_eq!( err, - "Arrow error: Compute error: Overflow happened on: 2147483647 - -2147483648" + "Arrow error: Arithmetic overflow: Overflow happened on: 2147483647 - -2147483648" ) } @@ -4352,7 +4352,7 @@ mod tests { .sub_checked(&int_value_2) .unwrap_err() .strip_backtrace(); - assert_eq!(err, "Arrow error: Compute error: Overflow happened on: 9223372036854775807 - -9223372036854775808") + assert_eq!(err, "Arrow error: Arithmetic overflow: Overflow happened on: 9223372036854775807 - -9223372036854775808") } #[test] @@ -5868,7 +5868,7 @@ mod tests { let root_err = err.find_root(); match root_err{ DataFusionError::ArrowError( - ArrowError::ComputeError(_), + ArrowError::ArithmeticOverflow(_), _, ) => {} _ => return Err(err), diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index 27c2151baf27..d570be8c7495 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -536,7 +536,13 @@ mod tests { ); let rgm2 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::int32(Some(11), Some(20), None, Some(0), false)], + vec![ParquetStatistics::int32( + Some(11), + Some(20), + None, + Some(0), + false, + )], ); let metrics = parquet_file_metrics(); // missing statistics for first row group mean that the result from the predicate expression @@ -795,13 +801,25 @@ mod tests { &schema_descr, // [0.1, 0.2] // c1 > 5, this row group will not be included in the results. - vec![ParquetStatistics::int32(Some(10), Some(20), None, Some(0), false)], + vec![ParquetStatistics::int32( + Some(10), + Some(20), + None, + Some(0), + false, + )], ); let rgm3 = get_row_group_meta_data( &schema_descr, // [1, None] // c1 > 5, this row group can not be filtered out, so will be included in the results. - vec![ParquetStatistics::int32(Some(100), None, None, Some(0), false)], + vec![ParquetStatistics::int32( + Some(100), + None, + None, + Some(0), + false, + )], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(3)); @@ -857,19 +875,37 @@ mod tests { &schema_descr, // [10, 20] // c1 > 5, this row group will be included in the results. - vec![ParquetStatistics::int32(Some(10), Some(20), None, Some(0), false)], + vec![ParquetStatistics::int32( + Some(10), + Some(20), + None, + Some(0), + false, + )], ); let rgm3 = get_row_group_meta_data( &schema_descr, // [0, 2] // c1 > 5, this row group will not be included in the results. - vec![ParquetStatistics::int32(Some(0), Some(2), None, Some(0), false)], + vec![ParquetStatistics::int32( + Some(0), + Some(2), + None, + Some(0), + false, + )], ); let rgm4 = get_row_group_meta_data( &schema_descr, // [None, 2] // c1 > 5, this row group can not be filtered out, so will be included in the results. - vec![ParquetStatistics::int32(None, Some(2), None, Some(0), false)], + vec![ParquetStatistics::int32( + None, + Some(2), + None, + Some(0), + false, + )], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(4)); @@ -915,7 +951,13 @@ mod tests { let rgm2 = get_row_group_meta_data( &schema_descr, // [0.1, 0.2] - vec![ParquetStatistics::int64(Some(10), Some(20), None, Some(0), false)], + vec![ParquetStatistics::int64( + Some(10), + Some(20), + None, + Some(0), + false, + )], ); let rgm3 = get_row_group_meta_data( &schema_descr, @@ -993,7 +1035,11 @@ mod tests { let rgm3 = get_row_group_meta_data( &schema_descr, vec![ParquetStatistics::fixed_len_byte_array( - None, None, None, Some(0), false, + None, + None, + None, + Some(0), + false, )], ); let metrics = parquet_file_metrics(); @@ -1057,7 +1103,13 @@ mod tests { ); let rgm3 = get_row_group_meta_data( &schema_descr, - vec![ParquetStatistics::byte_array(None, None, None, Some(0), false)], + vec![ParquetStatistics::byte_array( + None, + None, + None, + Some(0), + false, + )], ); let metrics = parquet_file_metrics(); let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(3)); From 1ac178722d5071d79ed9504066a5ebfa0bd366cf Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 09:26:56 -0400 Subject: [PATCH 20/36] Port statistics to use new API --- datafusion-cli/src/functions.rs | 99 +++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 41 deletions(-) diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs index a85c43f3576f..8cc6cbd53202 100644 --- a/datafusion-cli/src/functions.rs +++ b/datafusion-cli/src/functions.rs @@ -250,46 +250,68 @@ impl TableProvider for ParquetMetadataTable { fn convert_parquet_statistics( value: &Statistics, converted_type: ConvertedType, -) -> (String, String) { +) -> (Option, Option) { match (value, converted_type) { - (Statistics::Boolean(val), _) => (val.min().to_string(), val.max().to_string()), - (Statistics::Int32(val), _) => (val.min().to_string(), val.max().to_string()), - (Statistics::Int64(val), _) => (val.min().to_string(), val.max().to_string()), - (Statistics::Int96(val), _) => (val.min().to_string(), val.max().to_string()), - (Statistics::Float(val), _) => (val.min().to_string(), val.max().to_string()), - (Statistics::Double(val), _) => (val.min().to_string(), val.max().to_string()), + (Statistics::Boolean(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int32(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int64(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int96(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Float(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Double(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), (Statistics::ByteArray(val), ConvertedType::UTF8) => { - let min_bytes = val.min(); - let max_bytes = val.max(); - let min = min_bytes - .as_utf8() - .map(|v| v.to_string()) - .unwrap_or_else(|_| min_bytes.to_string()); - - let max = max_bytes - .as_utf8() - .map(|v| v.to_string()) - .unwrap_or_else(|_| max_bytes.to_string()); + let min = val.min_opt().map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }); + let max = val.max_opt().map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }); + (min, max) } - (Statistics::ByteArray(val), _) => (val.min().to_string(), val.max().to_string()), + (Statistics::ByteArray(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), (Statistics::FixedLenByteArray(val), ConvertedType::UTF8) => { - let min_bytes = val.min(); - let max_bytes = val.max(); - let min = min_bytes - .as_utf8() - .map(|v| v.to_string()) - .unwrap_or_else(|_| min_bytes.to_string()); - - let max = max_bytes - .as_utf8() - .map(|v| v.to_string()) - .unwrap_or_else(|_| max_bytes.to_string()); + let min = val.min_opt().map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }); + let max = val.max_opt().map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }); + (min, max) } - (Statistics::FixedLenByteArray(val), _) => { - (val.min().to_string(), val.max().to_string()) - } + (Statistics::FixedLenByteArray(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), } } @@ -376,16 +398,11 @@ impl TableFunctionImpl for ParquetMetadataFunc { let converted_type = column.column_descr().converted_type(); if let Some(s) = column.statistics() { - let (min_val, max_val) = if s.has_min_max_set() { - let (min_val, max_val) = - convert_parquet_statistics(s, converted_type); - (Some(min_val), Some(max_val)) - } else { - (None, None) - }; + let (min_val, max_val) = + convert_parquet_statistics(s, converted_type); stats_min_arr.push(min_val.clone()); stats_max_arr.push(max_val.clone()); - stats_null_count_arr.push(Some(s.null_count() as i64)); + stats_null_count_arr.push(s.null_count_opt().map(|c| c as i64)); stats_distinct_count_arr.push(s.distinct_count().map(|c| c as i64)); stats_min_value_arr.push(min_val); stats_max_value_arr.push(max_val); From 67ad234f224b3b89054eae6f2501cf5711cdcc2c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 09:31:35 -0400 Subject: [PATCH 21/36] factor into a function --- datafusion-cli/src/functions.rs | 55 ++++++++++++++++----------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs index 8cc6cbd53202..b3358030e629 100644 --- a/datafusion-cli/src/functions.rs +++ b/datafusion-cli/src/functions.rs @@ -32,6 +32,7 @@ use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::ExecutionPlan; use datafusion::scalar::ScalarValue; use parquet::basic::ConvertedType; +use parquet::data_type::{ByteArray, FixedLenByteArray}; use parquet::file::reader::FileReader; use parquet::file::serialized_reader::SerializedFileReader; use parquet::file::statistics::Statistics; @@ -276,38 +277,18 @@ fn convert_parquet_statistics( val.min_opt().map(|v| v.to_string()), val.max_opt().map(|v| v.to_string()), ), - (Statistics::ByteArray(val), ConvertedType::UTF8) => { - let min = val.min_opt().map(|v| { - v.as_utf8() - .map(|s| s.to_string()) - .unwrap_or_else(|_e| v.to_string()) - }); - let max = val.max_opt().map(|v| { - v.as_utf8() - .map(|s| s.to_string()) - .unwrap_or_else(|_e| v.to_string()) - }); - - (min, max) - } + (Statistics::ByteArray(val), ConvertedType::UTF8) => ( + byte_array_to_string(val.min_opt()), + byte_array_to_string(val.max_opt()), + ), (Statistics::ByteArray(val), _) => ( val.min_opt().map(|v| v.to_string()), val.max_opt().map(|v| v.to_string()), ), - (Statistics::FixedLenByteArray(val), ConvertedType::UTF8) => { - let min = val.min_opt().map(|v| { - v.as_utf8() - .map(|s| s.to_string()) - .unwrap_or_else(|_e| v.to_string()) - }); - let max = val.max_opt().map(|v| { - v.as_utf8() - .map(|s| s.to_string()) - .unwrap_or_else(|_e| v.to_string()) - }); - - (min, max) - } + (Statistics::FixedLenByteArray(val), ConvertedType::UTF8) => ( + fixed_len_byte_array_to_string(val.min_opt()), + fixed_len_byte_array_to_string(val.max_opt()), + ), (Statistics::FixedLenByteArray(val), _) => ( val.min_opt().map(|v| v.to_string()), val.max_opt().map(|v| v.to_string()), @@ -315,6 +296,24 @@ fn convert_parquet_statistics( } } +/// Convert to a string if it has utf8 encoding, otherwise print bytes directly +fn byte_array_to_string(val: Option<&ByteArray>) -> Option { + val.map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }) +} + +/// Convert to a string if it has utf8 encoding, otherwise print bytes directly +fn fixed_len_byte_array_to_string(val: Option<&FixedLenByteArray>) -> Option { + val.map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }) +} + pub struct ParquetMetadataFunc {} impl TableFunctionImpl for ParquetMetadataFunc { From 5b6498eb7ed10789f856f57776ddaa240e00aef3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 16 Aug 2024 09:34:44 -0400 Subject: [PATCH 22/36] update generated files --- datafusion/proto/src/generated/datafusion_proto_common.rs | 4 ++-- datafusion/proto/src/generated/pbjson.rs | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 3f9adbc1f0e9..3d7b1007b04e 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -48,7 +48,7 @@ pub struct ParquetFormat { #[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct AvroFormat {} #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct NdJsonFormat { #[prost(message, optional, tag = "1")] pub options: ::core::option::Option, @@ -145,7 +145,7 @@ pub struct Decimal { pub scale: i32, } #[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] +#[derive(Clone, Copy, PartialEq, ::prost::Message)] pub struct Decimal256Type { #[prost(uint32, tag = "3")] pub precision: u32, diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index c25f973480f7..75f32b8eae9f 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -614,6 +614,7 @@ impl serde::Serialize for AggregateUdfExprNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } struct_ser.end() @@ -12473,6 +12474,7 @@ impl serde::Serialize for PhysicalAggregateExprNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } if let Some(v) = self.aggregate_function.as_ref() { @@ -13655,6 +13657,7 @@ impl serde::Serialize for PhysicalExtensionExprNode { let mut struct_ser = serializer.serialize_struct("datafusion.PhysicalExtensionExprNode", len)?; if !self.expr.is_empty() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("expr", pbjson::private::base64::encode(&self.expr).as_str())?; } if !self.inputs.is_empty() { @@ -15701,6 +15704,7 @@ impl serde::Serialize for PhysicalWindowExprNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } if let Some(v) = self.window_function.as_ref() { @@ -20227,6 +20231,7 @@ impl serde::Serialize for WindowExprNode { } if let Some(v) = self.fun_definition.as_ref() { #[allow(clippy::needless_borrow)] + #[allow(clippy::needless_borrows_for_generic_args)] struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?; } if let Some(v) = self.window_function.as_ref() { From e77b7dfe62015f09c6c269d411fa91df99d9ba4d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Aug 2024 07:34:03 -0400 Subject: [PATCH 23/36] Update test --- .../src/datasource/physical_plan/parquet/row_group_filter.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index d570be8c7495..da16e80fd461 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -898,7 +898,8 @@ mod tests { let rgm4 = get_row_group_meta_data( &schema_descr, // [None, 2] - // c1 > 5, this row group can not be filtered out, so will be included in the results. + // c1 > 5, this row group will also not be included in the results + // (the min value is unknown, but the max value is 2, so no values can be greater than 5) vec![ParquetStatistics::int32( None, Some(2), @@ -916,7 +917,7 @@ mod tests { &pruning_predicate, &metrics, ); - assert_pruned(row_groups, ExpectedPruning::Some(vec![0, 1, 3])); + assert_pruned(row_groups, ExpectedPruning::Some(vec![0, 1])); } #[test] fn row_group_pruning_predicate_decimal_type3() { From a89fa87d3a867e127c334d741f8de2d11b7b5a93 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Aug 2024 07:41:18 -0400 Subject: [PATCH 24/36] add new test --- .../physical_plan/parquet/row_group_filter.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index da16e80fd461..ccd77d90be57 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -908,16 +908,29 @@ mod tests { false, )], ); + let rgm5 = get_row_group_meta_data( + &schema_descr, + // [2, None] + // c1 > 5, this row group must be included + // (the min value is 2, but the max value is unknown, so it may have values greater than 5) + vec![ParquetStatistics::int32( + Some(2), + None, + None, + Some(0), + false, + )], + ); let metrics = parquet_file_metrics(); - let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(4)); + let mut row_groups = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(5)); row_groups.prune_by_statistics( &schema, &schema_descr, - &[rgm1, rgm2, rgm3, rgm4], + &[rgm1, rgm2, rgm3, rgm4, rgm5], &pruning_predicate, &metrics, ); - assert_pruned(row_groups, ExpectedPruning::Some(vec![0, 1])); + assert_pruned(row_groups, ExpectedPruning::Some(vec![0, 1, 4])); } #[test] fn row_group_pruning_predicate_decimal_type3() { From 3717c25171358f2f3a6b049c45deca646e4bc91d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Aug 2024 09:20:49 -0400 Subject: [PATCH 25/36] update tests --- datafusion/sql/src/unparser/expr.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 9ce627aecc76..4aca5241c16a 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -2105,49 +2105,49 @@ mod tests { "1 YEAR 1 MONTH 1 DAY 3 HOUR 10 MINUTE 20 SECOND", ), IntervalStyle::PostgresVerbose, - r#"INTERVAL '0 YEARS 13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#, + r#"INTERVAL '13 MONS 1 DAYS 3 HOURS 10 MINS 20.000000000 SECS'"#, ), ( interval_month_day_nano_lit("1.5 MONTH"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '0 YEARS 1 MONS 15 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, + r#"INTERVAL '1 MONS 15 DAYS'"#, ), ( interval_month_day_nano_lit("-3 MONTH"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '0 YEARS -3 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS'"#, + r#"INTERVAL '-3 MONS'"#, ), ( interval_month_day_nano_lit("1 MONTH") .add(interval_month_day_nano_lit("1 DAY")), IntervalStyle::PostgresVerbose, - r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' + INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, + r#"(INTERVAL '1 MONS' + INTERVAL '1 DAYS')"#, ), ( interval_month_day_nano_lit("1 MONTH") .sub(interval_month_day_nano_lit("1 DAY")), IntervalStyle::PostgresVerbose, - r#"(INTERVAL '0 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.000000000 SECS' - INTERVAL '0 YEARS 0 MONS 1 DAYS 0 HOURS 0 MINS 0.000000000 SECS')"#, + r#"(INTERVAL '1 MONS' - INTERVAL '1 DAYS')"#, ), ( interval_datetime_lit("10 DAY 1 HOUR 10 MINUTE 20 SECOND"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#, + r#"INTERVAL '10 DAYS 1 HOURS 10 MINS 20.000 SECS'"#, ), ( interval_datetime_lit("10 DAY 1.5 HOUR 10 MINUTE 20 SECOND"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '0 YEARS 0 MONS 10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#, + r#"INTERVAL '10 DAYS 1 HOURS 40 MINS 20.000 SECS'"#, ), ( interval_year_month_lit("1 YEAR 1 MONTH"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '1 YEARS 1 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, + r#"INTERVAL '1 YEARS 1 MONS'"#, ), ( interval_year_month_lit("1.5 YEAR 1 MONTH"), IntervalStyle::PostgresVerbose, - r#"INTERVAL '1 YEARS 7 MONS 0 DAYS 0 HOURS 0 MINS 0.00 SECS'"#, + r#"INTERVAL '1 YEARS 7 MONS'"#, ), ( interval_year_month_lit("1 YEAR 1 MONTH"), From 230aeeca183db1c990946784b3c794b81779bad8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Aug 2024 09:22:52 -0400 Subject: [PATCH 26/36] tapelo format --- Cargo.toml | 1 - datafusion-cli/Cargo.toml | 2 -- 2 files changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b5f40acda8bc..e120d1389122 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -167,7 +167,6 @@ large_futures = "warn" unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_imports = "deny" - ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 564ad378e034..da4dcd13bfdf 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -63,8 +63,6 @@ ctor = "0.2.0" predicates = "3.0" rstest = "0.17" - - ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] From ed2b222fbc76067856989624aff18e50f4381690 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 19 Aug 2024 09:35:42 -0400 Subject: [PATCH 27/36] Update other tests --- datafusion/sql/tests/cases/plan_to_sql.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index ed23fada0cfb..94840ff7bbdd 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -611,7 +611,7 @@ fn sql_round_trip(query: &str, expect: &str) { fn test_interval_lhs_eq() { sql_round_trip( "select interval '2 seconds' = interval '2 seconds'", - "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' = INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + "SELECT (INTERVAL '2.000000000 SECS' = INTERVAL '2.000000000 SECS')", ); } @@ -619,6 +619,6 @@ fn test_interval_lhs_eq() { fn test_interval_lhs_lt() { sql_round_trip( "select interval '2 seconds' < interval '2 seconds'", - "SELECT (INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS' < INTERVAL '0 YEARS 0 MONS 0 DAYS 0 HOURS 0 MINS 2.000000000 SECS')", + "SELECT (INTERVAL '2.000000000 SECS' < INTERVAL '2.000000000 SECS')", ); } From b666c318128120b0b035f9ae619941cd1bf8a17d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 20 Aug 2024 13:00:14 -0400 Subject: [PATCH 28/36] Update datafusion pin --- Cargo.toml | 26 +++--- datafusion-cli/Cargo.lock | 191 +++++++++++++++++++++++--------------- datafusion-cli/Cargo.toml | 26 +++--- 3 files changed, 140 insertions(+), 103 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e120d1389122..bbdf1d29a289 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -124,7 +124,7 @@ indexmap = "2.0.0" itertools = "0.12" log = "^0.4" num_cpus = "1.13.0" -object_store = { version = "0.10.2", default-features = false } +object_store = { version = "0.11.0", default-features = false } parking_lot = "0.12" parquet = { version = "52.2.0", default-features = false, features = [ "arrow", @@ -170,15 +170,15 @@ unused_imports = "deny" ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 2a0ea28c6aed..24017b9f17dd 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -17,6 +17,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + [[package]] name = "adler32" version = "1.2.0" @@ -124,14 +130,14 @@ checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-arith", "arrow-array", @@ -151,7 +157,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -165,7 +171,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "ahash", "arrow-buffer", @@ -181,7 +187,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "bytes", "half", @@ -191,7 +197,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -211,7 +217,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -229,7 +235,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-buffer", "arrow-schema", @@ -240,7 +246,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -254,7 +260,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -273,7 +279,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -287,7 +293,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "ahash", "arrow-array", @@ -300,12 +306,12 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" [[package]] name = "arrow-select" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "ahash", "arrow-array", @@ -318,7 +324,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "arrow-array", "arrow-buffer", @@ -373,7 +379,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -708,7 +714,7 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", + "miniz_oxide 0.7.4", "object", "rustc-demangle", ] @@ -758,9 +764,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.3" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ec96fe9a81b5e365f9db71fe00edc4fe4ca2cc7dcb7861f0603012a7caa210" +checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" dependencies = [ "arrayref", "arrayvec", @@ -1091,7 +1097,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -1676,12 +1682,12 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.31" +version = "1.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666" dependencies = [ "crc32fast", - "miniz_oxide", + "miniz_oxide 0.8.0", ] [[package]] @@ -1764,7 +1770,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -1857,9 +1863,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -2050,7 +2056,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.5", + "h2 0.4.6", "http 1.1.0", "http-body 1.0.1", "httparse", @@ -2087,7 +2093,7 @@ dependencies = [ "hyper 1.4.1", "hyper-util", "rustls 0.23.12", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.7.2", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -2305,9 +2311,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.156" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5f43f184355eefb8d17fc948dbecf6c13be3c141f20d834ae842193a448c72a" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libflate" @@ -2441,6 +2447,15 @@ dependencies = [ "adler", ] +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + [[package]] name = "mio" version = "1.0.2" @@ -2580,9 +2595,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.10.2" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6da452820c715ce78221e8202ccc599b4a52f3e1eb3eedb487b680c81a8e3f3" +checksum = "25a0c4b3a0e31f8b66f71ad8064521efa773910196e2cde791436f13409f3b45" dependencies = [ "async-trait", "base64 0.22.1", @@ -2668,7 +2683,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" dependencies = [ "ahash", "arrow-array", @@ -2786,7 +2801,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3009,9 +3024,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", "libredox", @@ -3055,15 +3070,15 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ "base64 0.22.1", "bytes", "futures-core", "futures-util", - "h2 0.4.5", + "h2 0.4.6", "http 1.1.0", "http-body 1.0.1", "http-body-util", @@ -3079,7 +3094,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.12", - "rustls-native-certs 0.7.1", + "rustls-native-certs 0.7.2", "rustls-pemfile 2.1.3", "rustls-pki-types", "serde", @@ -3095,7 +3110,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -3234,9 +3249,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a88d6d420651b496bdd98684116959239430022a115c1240e6c3993be0b15fba" +checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa" dependencies = [ "openssl-probe", "rustls-pemfile 2.1.3", @@ -3402,7 +3417,7 @@ checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3478,24 +3493,23 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "snafu" -version = "0.7.5" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +checksum = "2b835cb902660db3415a672d862905e791e54d306c6e8189168c7f3d9ae1c79d" dependencies = [ - "doc-comment", "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.7.5" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +checksum = "38d1e02fca405f6280643174a50c942219f0bbf4dbf7d480f1dd864d6f211ae5" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.75", ] [[package]] @@ -3544,7 +3558,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3590,7 +3604,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3603,7 +3617,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3625,9 +3639,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.74" +version = "2.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fceb41e3d546d0bd83421d3409b1460cc7444cd389341a4c880fe7a042cb3d7" +checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" dependencies = [ "proc-macro2", "quote", @@ -3639,6 +3653,9 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "tempfile" @@ -3691,7 +3708,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3761,9 +3778,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.2" +version = "1.39.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daa4fb1bc778bd6f04cbfc4bb2d06a7396a8f299dc33ea1900cedaa316f467b1" +checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" dependencies = [ "backtrace", "bytes", @@ -3785,7 +3802,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3882,7 +3899,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -3927,7 +3944,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -4082,7 +4099,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", "wasm-bindgen-shared", ] @@ -4116,7 +4133,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4200,6 +4217,36 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -4348,16 +4395,6 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "xmlparser" version = "0.13.6" @@ -4391,7 +4428,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] @@ -4450,4 +4487,4 @@ dependencies = [ [[patch.unused]] name = "arrow-flight" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=042d725888358c73cd2a0d58868ea5c4bad778f7#042d725888358c73cd2a0d58868ea5c4bad778f7" +source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 29efa7dee9f0..e41987636717 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -49,7 +49,7 @@ dirs = "4.0.0" env_logger = "0.9" futures = "0.3" mimalloc = { version = "0.1", default-features = false } -object_store = { version = "0.10.1", features = ["aws", "gcp", "http"] } +object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } parking_lot = { version = "0.12" } parquet = { version = "52.2.0", default-features = false } regex = "1.8" @@ -66,15 +66,15 @@ rstest = "0.17" ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "042d725888358c73cd2a0d58868ea5c4bad778f7" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } From 2062a32cbaf20078690d1ef68ba0efcef7f94165 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 21 Aug 2024 13:28:54 -0400 Subject: [PATCH 29/36] Update for API change --- datafusion-cli/Cargo.lock | 2 +- datafusion-cli/src/functions.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index e5305d85a31a..9ca04f702241 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -991,7 +991,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.74", + "syn 2.0.75", ] [[package]] diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs index b3358030e629..99511e969386 100644 --- a/datafusion-cli/src/functions.rs +++ b/datafusion-cli/src/functions.rs @@ -402,7 +402,8 @@ impl TableFunctionImpl for ParquetMetadataFunc { stats_min_arr.push(min_val.clone()); stats_max_arr.push(max_val.clone()); stats_null_count_arr.push(s.null_count_opt().map(|c| c as i64)); - stats_distinct_count_arr.push(s.distinct_count().map(|c| c as i64)); + stats_distinct_count_arr + .push(s.distinct_count_opt().map(|c| c as i64)); stats_min_value_arr.push(min_val); stats_max_value_arr.push(max_val); } else { From 13bb1462b0862ddba38a27fd1348ed7b1f698bc0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 1 Sep 2024 08:25:00 -0400 Subject: [PATCH 30/36] Update to arrow 53.0.0 sha --- Cargo.toml | 42 +- datafusion-cli/Cargo.lock | 966 ++++++++++++++++++++++---------------- datafusion-cli/Cargo.toml | 24 +- 3 files changed, 605 insertions(+), 427 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1a9171c19a84..1eb222819c0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,22 +69,22 @@ version = "41.0.0" ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } -arrow = { version = "52.2.0", features = [ +arrow = { version = "53.0.0", features = [ "prettyprint", ] } -arrow-array = { version = "52.2.0", default-features = false, features = [ +arrow-array = { version = "53.0.0", default-features = false, features = [ "chrono-tz", ] } -arrow-buffer = { version = "52.2.0", default-features = false } -arrow-flight = { version = "52.2.0", features = [ +arrow-buffer = { version = "53.0.0", default-features = false } +arrow-flight = { version = "53.0.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "52.2.0", default-features = false, features = [ +arrow-ipc = { version = "53.0.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "52.2.0", default-features = false } -arrow-schema = { version = "52.2.0", default-features = false } -arrow-string = { version = "52.2.0", default-features = false } +arrow-ord = { version = "53.0.0", default-features = false } +arrow-schema = { version = "53.0.0", default-features = false } +arrow-string = { version = "53.0.0", default-features = false } async-trait = "0.1.73" bigdecimal = "=0.4.1" bytes = "1.4" @@ -124,7 +124,7 @@ log = "^0.4" num_cpus = "1.13.0" object_store = { version = "0.11.0", default-features = false } parking_lot = "0.12" -parquet = { version = "52.2.0", default-features = false, features = [ +parquet = { version = "53.0.0", default-features = false, features = [ "arrow", "async", "object_store", @@ -168,15 +168,15 @@ unused_imports = "deny" ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 352f2243aef6..17d6a03d8201 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -180,32 +180,68 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" +dependencies = [ + "arrow-arith 52.2.0", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-csv 52.2.0", + "arrow-data 52.2.0", + "arrow-ipc 52.2.0", + "arrow-json 52.2.0", + "arrow-ord 52.2.0", + "arrow-row 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "arrow-string 52.2.0", +] + +[[package]] +name = "arrow" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-cast 53.0.0", + "arrow-csv 53.0.0", + "arrow-data 53.0.0", + "arrow-ipc 53.0.0", + "arrow-json 53.0.0", + "arrow-ord 53.0.0", + "arrow-row 53.0.0", + "arrow-schema 53.0.0", + "arrow-select 53.0.0", + "arrow-string 53.0.0", ] [[package]] name = "arrow-arith" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-arith" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "chrono", "half", "num", @@ -214,12 +250,28 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" +dependencies = [ + "ahash", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-array" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "chrono", "chrono-tz", "half", @@ -230,7 +282,18 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "bytes", "half", @@ -240,13 +303,33 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "atoi", + "base64 0.22.1", + "chrono", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", + "arrow-select 53.0.0", "atoi", "base64 0.22.1", "chrono", @@ -260,13 +343,32 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-csv" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-cast 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "chrono", "csv", "csv-core", @@ -278,10 +380,22 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 52.2.0", + "arrow-schema 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-data" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-buffer 53.0.0", + "arrow-schema 53.0.0", "half", "num", ] @@ -289,13 +403,27 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "flatbuffers", +] + +[[package]] +name = "arrow-ipc" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-cast 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "flatbuffers", "lz4_flex", ] @@ -303,13 +431,33 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-cast 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-json" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-cast 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "chrono", "half", "indexmap", @@ -322,13 +470,28 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-ord" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", + "arrow-select 53.0.0", "half", "num", ] @@ -336,44 +499,95 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "ahash", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "half", ] [[package]] name = "arrow-schema" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" + +[[package]] +name = "arrow-schema" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" [[package]] name = "arrow-select" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "ahash", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", "num", ] [[package]] name = "arrow-string" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 52.2.0", + "arrow-buffer 52.2.0", + "arrow-data 52.2.0", + "arrow-schema 52.2.0", + "arrow-select 52.2.0", + "memchr", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "arrow-string" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-data 53.0.0", + "arrow-schema 53.0.0", + "arrow-select 53.0.0", "memchr", "num", "regex", @@ -422,7 +636,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -459,160 +673,156 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "aws-config" -version = "0.55.3" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9" +checksum = "4e95816a168520d72c0e7680c405a5a8c1fb6a035b4bc4b9d7b0de8e1a941697" dependencies = [ "aws-credential-types", - "aws-http", + "aws-runtime", "aws-sdk-sso", + "aws-sdk-ssooidc", "aws-sdk-sts", "aws-smithy-async", - "aws-smithy-client", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", "bytes", - "fastrand 1.9.0", + "fastrand", "hex", "http 0.2.12", - "hyper 0.14.30", - "ring 0.16.20", + "ring", "time", "tokio", - "tower", "tracing", + "url", "zeroize", ] [[package]] name = "aws-credential-types" -version = "0.55.3" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" dependencies = [ "aws-smithy-async", + "aws-smithy-runtime-api", "aws-smithy-types", - "fastrand 1.9.0", - "tokio", - "tracing", "zeroize", ] [[package]] -name = "aws-endpoint" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04" -dependencies = [ - "aws-smithy-http", - "aws-smithy-types", - "aws-types", - "http 0.2.12", - "regex", - "tracing", -] - -[[package]] -name = "aws-http" -version = "0.55.3" +name = "aws-runtime" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44" +checksum = "2424565416eef55906f9f8cece2072b6b6a76075e3ff81483ebe938a89a4c05f" dependencies = [ "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", "bytes", + "fastrand", "http 0.2.12", "http-body 0.4.6", - "lazy_static", + "once_cell", "percent-encoding", "pin-project-lite", "tracing", + "uuid", ] [[package]] name = "aws-sdk-sso" -version = "0.28.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8b812340d86d4a766b2ca73f740dfd47a97c2dff0c06c8517a16d88241957e4" +checksum = "e5879bec6e74b648ce12f6085e7245417bc5f6d672781028384d2e494be3eb6d" dependencies = [ "aws-credential-types", - "aws-endpoint", - "aws-http", - "aws-sig-auth", + "aws-runtime", "aws-smithy-async", - "aws-smithy-client", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", "aws-types", "bytes", "http 0.2.12", - "regex", - "tokio-stream", - "tower", + "once_cell", + "regex-lite", "tracing", ] [[package]] -name = "aws-sdk-sts" -version = "0.28.0" +name = "aws-sdk-ssooidc" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b" +checksum = "4ef4cd9362f638c22a3b959fd8df292e7e47fdf170270f86246b97109b5f2f7d" dependencies = [ "aws-credential-types", - "aws-endpoint", - "aws-http", - "aws-sig-auth", + "aws-runtime", "aws-smithy-async", - "aws-smithy-client", "aws-smithy-http", - "aws-smithy-http-tower", "aws-smithy-json", - "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", "aws-smithy-types", - "aws-smithy-xml", "aws-types", "bytes", "http 0.2.12", - "regex", - "tower", + "once_cell", + "regex-lite", "tracing", ] [[package]] -name = "aws-sig-auth" -version = "0.55.3" +name = "aws-sdk-sts" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61" +checksum = "0b1e2735d2ab28b35ecbb5496c9d41857f52a0d6a0075bbf6a8af306045ea6f6" dependencies = [ "aws-credential-types", - "aws-sigv4", + "aws-runtime", + "aws-smithy-async", "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", "aws-types", "http 0.2.12", + "once_cell", + "regex-lite", "tracing", ] [[package]] name = "aws-sigv4" -version = "0.55.3" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c" +checksum = "5df1b0fa6be58efe9d4ccc257df0a53b89cd8909e86591a13ca54817c87517be" dependencies = [ + "aws-credential-types", "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", "form_urlencoded", "hex", "hmac", "http 0.2.12", + "http 1.1.0", "once_cell", "percent-encoding", - "regex", "sha2", "time", "tracing", @@ -620,53 +830,28 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "0.55.3" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880" +checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c" dependencies = [ "futures-util", "pin-project-lite", "tokio", - "tokio-stream", -] - -[[package]] -name = "aws-smithy-client" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd" -dependencies = [ - "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-http-tower", - "aws-smithy-types", - "bytes", - "fastrand 1.9.0", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.30", - "hyper-rustls 0.23.2", - "lazy_static", - "pin-project-lite", - "rustls 0.20.9", - "tokio", - "tower", - "tracing", ] [[package]] name = "aws-smithy-http" -version = "0.55.3" +version = "0.60.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28" +checksum = "01dbcb6e2588fd64cfb6d7529661b06466419e4c54ed1c62d6510d2d0350a728" dependencies = [ + "aws-smithy-runtime-api", "aws-smithy-types", "bytes", "bytes-utils", "futures-core", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", "once_cell", "percent-encoding", "pin-project-lite", @@ -675,74 +860,110 @@ dependencies = [ ] [[package]] -name = "aws-smithy-http-tower" -version = "0.55.3" +name = "aws-smithy-json" +version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9" +checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" dependencies = [ - "aws-smithy-http", "aws-smithy-types", - "bytes", - "http 0.2.12", - "http-body 0.4.6", - "pin-project-lite", - "tower", - "tracing", ] [[package]] -name = "aws-smithy-json" -version = "0.55.3" +name = "aws-smithy-query" +version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" dependencies = [ "aws-smithy-types", + "urlencoding", ] [[package]] -name = "aws-smithy-query" -version = "0.55.3" +name = "aws-smithy-runtime" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d" +checksum = "d1ce695746394772e7000b39fe073095db6d45a862d0767dd5ad0ac0d7f8eb87" dependencies = [ + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", "aws-smithy-types", - "urlencoding", + "bytes", + "fastrand", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "http-body 1.0.1", + "httparse", + "hyper 0.14.30", + "hyper-rustls 0.24.2", + "once_cell", + "pin-project-lite", + "pin-utils", + "rustls 0.21.12", + "tokio", + "tracing", +] + +[[package]] +name = "aws-smithy-runtime-api" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" +dependencies = [ + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.1.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", ] [[package]] name = "aws-smithy-types" -version = "0.55.3" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8" +checksum = "273dcdfd762fae3e1650b8024624e7cd50e484e37abdab73a7a706188ad34543" dependencies = [ "base64-simd", + "bytes", + "bytes-utils", + "http 0.2.12", + "http 1.1.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", "itoa", "num-integer", + "pin-project-lite", + "pin-utils", "ryu", + "serde", "time", ] [[package]] name = "aws-smithy-xml" -version = "0.55.3" +version = "0.60.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b" +checksum = "d123fbc2a4adc3c301652ba8e149bf4bc1d1725affb9784eb20c953ace06bf55" dependencies = [ "xmlparser", ] [[package]] name = "aws-types" -version = "0.55.3" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829" +checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" dependencies = [ "aws-credential-types", "aws-smithy-async", - "aws-smithy-client", - "aws-smithy-http", + "aws-smithy-runtime-api", "aws-smithy-types", - "http 0.2.12", "rustc_version", "tracing", ] @@ -910,9 +1131,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.13" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72db2f7947ecee9b03b510377e8bb9077afa27176fdbff55c51027e976fdcc48" +checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" dependencies = [ "jobserver", "libc", @@ -991,7 +1212,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -1050,9 +1271,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "core-foundation" @@ -1147,7 +1368,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edb49164822f3ee45b17acd4a208cfc1251410cf0cad9a833234c9890774dd9f" dependencies = [ "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -1176,10 +1397,10 @@ version = "41.0.0" dependencies = [ "ahash", "apache-avro", - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-ipc 53.0.0", + "arrow-schema 53.0.0", "async-compression", "async-trait", "bytes", @@ -1198,7 +1419,6 @@ dependencies = [ "datafusion-optimizer", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "datafusion-physical-optimizer", "datafusion-physical-plan", "datafusion-sql", @@ -1214,7 +1434,7 @@ dependencies = [ "num_cpus", "object_store", "parking_lot", - "parquet", + "parquet 53.0.0", "paste", "pin-project-lite", "rand", @@ -1232,7 +1452,7 @@ dependencies = [ name = "datafusion-catalog" version = "41.0.0" dependencies = [ - "arrow-schema", + "arrow-schema 53.0.0", "async-trait", "datafusion-common", "datafusion-execution", @@ -1244,7 +1464,7 @@ dependencies = [ name = "datafusion-cli" version = "41.0.0" dependencies = [ - "arrow", + "arrow 52.2.0", "assert_cmd", "async-trait", "aws-config", @@ -1258,7 +1478,7 @@ dependencies = [ "mimalloc", "object_store", "parking_lot", - "parquet", + "parquet 52.2.0", "predicates", "regex", "rstest", @@ -1273,10 +1493,10 @@ version = "41.0.0" dependencies = [ "ahash", "apache-avro", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-schema", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-schema 53.0.0", "chrono", "half", "hashbrown", @@ -1284,15 +1504,17 @@ dependencies = [ "libc", "num_cpus", "object_store", - "parquet", + "parquet 53.0.0", "paste", "sqlparser", + "tokio", ] [[package]] name = "datafusion-common-runtime" version = "41.0.0" dependencies = [ + "log", "tokio", ] @@ -1300,7 +1522,7 @@ dependencies = [ name = "datafusion-execution" version = "41.0.0" dependencies = [ - "arrow", + "arrow 53.0.0", "chrono", "dashmap", "datafusion-common", @@ -1320,9 +1542,9 @@ name = "datafusion-expr" version = "41.0.0" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", "chrono", "datafusion-common", "datafusion-expr-common", @@ -1339,7 +1561,7 @@ dependencies = [ name = "datafusion-expr-common" version = "41.0.0" dependencies = [ - "arrow", + "arrow 53.0.0", "datafusion-common", "paste", ] @@ -1348,8 +1570,8 @@ dependencies = [ name = "datafusion-functions" version = "41.0.0" dependencies = [ - "arrow", - "arrow-buffer", + "arrow 53.0.0", + "arrow-buffer 53.0.0", "base64 0.22.1", "blake2", "blake3", @@ -1374,8 +1596,8 @@ name = "datafusion-functions-aggregate" version = "41.0.0" dependencies = [ "ahash", - "arrow", - "arrow-schema", + "arrow 53.0.0", + "arrow-schema 53.0.0", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1393,7 +1615,7 @@ name = "datafusion-functions-aggregate-common" version = "41.0.0" dependencies = [ "ahash", - "arrow", + "arrow 53.0.0", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", @@ -1404,16 +1626,17 @@ dependencies = [ name = "datafusion-functions-nested" version = "41.0.0" dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-ord 53.0.0", + "arrow-schema 53.0.0", "datafusion-common", "datafusion-execution", "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-physical-expr-common", "itertools", "log", "paste", @@ -1434,7 +1657,7 @@ dependencies = [ name = "datafusion-optimizer" version = "41.0.0" dependencies = [ - "arrow", + "arrow 53.0.0", "async-trait", "chrono", "datafusion-common", @@ -1453,12 +1676,12 @@ name = "datafusion-physical-expr" version = "41.0.0" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-string", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-ord 53.0.0", + "arrow-schema 53.0.0", + "arrow-string 53.0.0", "base64 0.22.1", "chrono", "datafusion-common", @@ -1483,27 +1706,13 @@ name = "datafusion-physical-expr-common" version = "41.0.0" dependencies = [ "ahash", - "arrow", + "arrow 53.0.0", "datafusion-common", "datafusion-expr-common", "hashbrown", "rand", ] -[[package]] -name = "datafusion-physical-expr-functions-aggregate" -version = "41.0.0" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "rand", -] - [[package]] name = "datafusion-physical-optimizer" version = "41.0.0" @@ -1520,11 +1729,11 @@ name = "datafusion-physical-plan" version = "41.0.0" dependencies = [ "ahash", - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-ord 53.0.0", + "arrow-schema 53.0.0", "async-trait", "chrono", "datafusion-common", @@ -1535,7 +1744,6 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr", "datafusion-physical-expr-common", - "datafusion-physical-expr-functions-aggregate", "futures", "half", "hashbrown", @@ -1553,9 +1761,9 @@ dependencies = [ name = "datafusion-sql" version = "41.0.0" dependencies = [ - "arrow", - "arrow-array", - "arrow-schema", + "arrow 53.0.0", + "arrow-array 53.0.0", + "arrow-schema 53.0.0", "datafusion-common", "datafusion-expr", "log", @@ -1690,18 +1898,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.9.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] - -[[package]] -name = "fastrand" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fd-lock" @@ -1732,9 +1931,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.32" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c0596c1eac1f9e04ed902702e9878208b336edc9d6fddc8a48387349bab3666" +checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", "miniz_oxide 0.8.0", @@ -1820,7 +2019,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -2113,17 +2312,18 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.23.2" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ + "futures-util", "http 0.2.12", "hyper 0.14.30", "log", - "rustls 0.20.9", + "rustls 0.21.12", "rustls-native-certs 0.6.3", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls 0.24.1", ] [[package]] @@ -2137,7 +2337,7 @@ dependencies = [ "hyper 1.4.1", "hyper-util", "rustls 0.23.12", - "rustls-native-certs 0.7.2", + "rustls-native-certs 0.7.3", "rustls-pki-types", "tokio", "tokio-rustls 0.26.0", @@ -2199,9 +2399,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown", @@ -2617,9 +2817,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.3" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "memchr", ] @@ -2644,7 +2844,7 @@ dependencies = [ "quick-xml", "rand", "reqwest", - "ring 0.17.8", + "ring", "rustls-pemfile 2.1.3", "serde", "serde_json", @@ -2708,16 +2908,35 @@ dependencies = [ [[package]] name = "parquet" version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "bytes", + "chrono", + "half", + "hashbrown", + "num", + "num-bigint", + "paste", + "seq-macro", + "thrift", + "twox-hash", +] + +[[package]] +name = "parquet" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +dependencies = [ + "ahash", + "arrow-array 53.0.0", + "arrow-buffer 53.0.0", + "arrow-cast 53.0.0", + "arrow-data 53.0.0", + "arrow-ipc 53.0.0", + "arrow-schema 53.0.0", + "arrow-select 53.0.0", "base64 0.22.1", "brotli", "bytes", @@ -2826,7 +3045,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -2943,7 +3162,7 @@ checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" dependencies = [ "bytes", "rand", - "ring 0.17.8", + "ring", "rustc-hash", "rustls 0.23.12", "slab", @@ -2967,9 +3186,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -3095,7 +3314,7 @@ dependencies = [ "pin-project-lite", "quinn", "rustls 0.23.12", - "rustls-native-certs 0.7.2", + "rustls-native-certs 0.7.3", "rustls-pemfile 2.1.3", "rustls-pki-types", "serde", @@ -3114,21 +3333,6 @@ dependencies = [ "windows-registry", ] -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.8" @@ -3139,8 +3343,8 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.52.0", ] @@ -3190,18 +3394,18 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "a85d50532239da68e9addb745ba38ff4612a242c1c7ceea689c4bc7c2f43c36f" dependencies = [ "bitflags 2.6.0", "errno", @@ -3212,14 +3416,14 @@ dependencies = [ [[package]] name = "rustls" -version = "0.20.9" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", - "ring 0.16.20", + "ring", + "rustls-webpki 0.101.7", "sct", - "webpki", ] [[package]] @@ -3229,9 +3433,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" dependencies = [ "once_cell", - "ring 0.17.8", + "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.102.7", "subtle", "zeroize", ] @@ -3250,9 +3454,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04182dffc9091a404e0fc069ea5cd60e5b866c3adf881eff99a32d048242dffa" +checksum = "e5bfb394eeed242e909609f56089eecfe5fda225042e8b171791b9c95f5931e5" dependencies = [ "openssl-probe", "rustls-pemfile 2.1.3", @@ -3288,13 +3492,23 @@ checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" -version = "0.102.6" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.8", + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.102.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" +dependencies = [ + "ring", "rustls-pki-types", - "untrusted 0.9.0", + "untrusted", ] [[package]] @@ -3362,8 +3576,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -3403,29 +3617,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.208" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff085d2cb684faa248efb494c39b68e522822ac0de72ccf08109abde717cfb2" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.208" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24008e81ff7613ed8e5ba0cfaf24e2c2f1e5b8a0495711e44fcd4882fca62bcf" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] name = "serde_json" -version = "1.0.125" +version = "1.0.127" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c8e735a073ccf5be70aa8066aa984eaf2fa000db6c8d0100ae605b366d31ed" +checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" dependencies = [ "itoa", "memchr", @@ -3510,7 +3724,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3529,12 +3743,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -3559,7 +3767,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3605,7 +3813,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3618,7 +3826,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3640,9 +3848,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.75" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6af063034fc1935ede7be0122941bafa9bacb949334d090b77ca98b5817c7d9" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -3665,7 +3873,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", - "fastrand 2.1.0", + "fastrand", "once_cell", "rustix", "windows-sys 0.59.0", @@ -3703,7 +3911,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3773,9 +3981,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.3" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babc99b9923bfa4804bd74722ff02c0381021eafa4db9949217e3be8e84fff5" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", @@ -3797,18 +4005,17 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] name = "tokio-rustls" -version = "0.23.4" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.20.9", + "rustls 0.21.12", "tokio", - "webpki", ] [[package]] @@ -3822,17 +4029,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-stream" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - [[package]] name = "tokio-util" version = "0.7.11" @@ -3859,7 +4055,6 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -3880,7 +4075,6 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -3894,7 +4088,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3939,7 +4133,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -3981,12 +4175,6 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -4094,7 +4282,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", "wasm-bindgen-shared", ] @@ -4128,7 +4316,7 @@ checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4162,16 +4350,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" -dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", -] - [[package]] name = "winapi" version = "0.3.9" @@ -4423,7 +4601,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.75", + "syn 2.0.77", ] [[package]] @@ -4481,5 +4659,5 @@ dependencies = [ [[patch.unused]] name = "arrow-flight" -version = "52.2.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=344ba1dd870cb8c7327a08a7020a0350d4afd687#344ba1dd870cb8c7327a08a7020a0350d4afd687" +version = "53.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 3621e94c2052..dccaa6639079 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -66,15 +66,15 @@ rstest = "0.17" ## Temporary arrow-rs patch until 53.0.0 is released [patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "344ba1dd870cb8c7327a08a7020a0350d4afd687" } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } From 03378ed910a4024bf942452737d4d5f4c72b8dc4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 1 Sep 2024 09:19:31 -0400 Subject: [PATCH 31/36] Update cli deps --- datafusion-cli/Cargo.lock | 460 +++++++++----------------------------- datafusion-cli/Cargo.toml | 5 +- 2 files changed, 113 insertions(+), 352 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 17d6a03d8201..c7e39cdae4f7 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -177,60 +177,24 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "arrow" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05048a8932648b63f21c37d88b552ccc8a65afb6dfe9fc9f30ce79174c2e7a85" -dependencies = [ - "arrow-arith 52.2.0", - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-csv 52.2.0", - "arrow-data 52.2.0", - "arrow-ipc 52.2.0", - "arrow-json 52.2.0", - "arrow-ord 52.2.0", - "arrow-row 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "arrow-string 52.2.0", -] - [[package]] name = "arrow" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-arith 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-csv 53.0.0", - "arrow-data 53.0.0", - "arrow-ipc 53.0.0", - "arrow-json 53.0.0", - "arrow-ord 53.0.0", - "arrow-row 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", - "arrow-string 53.0.0", -] - -[[package]] -name = "arrow-arith" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d8a57966e43bfe9a3277984a14c24ec617ad874e4c0e1d2a1b083a39cfbf22c" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "chrono", - "half", - "num", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", ] [[package]] @@ -238,40 +202,24 @@ name = "arrow-arith" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "half", "num", ] -[[package]] -name = "arrow-array" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f4a9468c882dc66862cef4e1fd8423d47e67972377d85d80e022786427768c" -dependencies = [ - "ahash", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "chrono", - "half", - "hashbrown", - "num", -] - [[package]] name = "arrow-array" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "ahash", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer", + "arrow-data", + "arrow-schema", "chrono", "chrono-tz", "half", @@ -279,17 +227,6 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-buffer" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c975484888fc95ec4a632cdc98be39c085b1bb518531b0c80c5d462063e5daa1" -dependencies = [ - "bytes", - "half", - "num", -] - [[package]] name = "arrow-buffer" version = "53.0.0" @@ -300,36 +237,16 @@ dependencies = [ "num", ] -[[package]] -name = "arrow-cast" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da26719e76b81d8bc3faad1d4dbdc1bcc10d14704e63dc17fc9f3e7e1e567c8e" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "atoi", - "base64 0.22.1", - "chrono", - "half", - "lexical-core", - "num", - "ryu", -] - [[package]] name = "arrow-cast" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "atoi", "base64 0.22.1", "chrono", @@ -340,35 +257,16 @@ dependencies = [ "ryu", ] -[[package]] -name = "arrow-csv" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c13c36dc5ddf8c128df19bab27898eea64bf9da2b555ec1cd17a8ff57fba9ec2" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "chrono", - "csv", - "csv-core", - "lazy_static", - "lexical-core", - "regex", -] - [[package]] name = "arrow-csv" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "csv", "csv-core", @@ -377,87 +275,41 @@ dependencies = [ "regex", ] -[[package]] -name = "arrow-data" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd9d6f18c65ef7a2573ab498c374d8ae364b4a4edf67105357491c031f716ca5" -dependencies = [ - "arrow-buffer 52.2.0", - "arrow-schema 52.2.0", - "half", - "num", -] - [[package]] name = "arrow-data" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-buffer 53.0.0", - "arrow-schema 53.0.0", + "arrow-buffer", + "arrow-schema", "half", "num", ] -[[package]] -name = "arrow-ipc" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e786e1cdd952205d9a8afc69397b317cfbb6e0095e445c69cda7e8da5c1eeb0f" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "flatbuffers", -] - [[package]] name = "arrow-ipc" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "flatbuffers", "lz4_flex", ] -[[package]] -name = "arrow-json" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb22284c5a2a01d73cebfd88a33511a3234ab45d66086b2ca2d1228c3498e445" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-cast 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "chrono", - "half", - "indexmap", - "lexical-core", - "num", - "serde", - "serde_json", -] - [[package]] name = "arrow-json" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", "chrono", "half", "indexmap", @@ -467,115 +319,49 @@ dependencies = [ "serde_json", ] -[[package]] -name = "arrow-ord" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42745f86b1ab99ef96d1c0bcf49180848a64fe2c7a7a0d945bc64fa2b21ba9bc" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "half", - "num", -] - [[package]] name = "arrow-ord" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "half", "num", ] -[[package]] -name = "arrow-row" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd09a518c602a55bd406bcc291a967b284cfa7a63edfbf8b897ea4748aad23c" -dependencies = [ - "ahash", - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "half", -] - [[package]] name = "arrow-row" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "ahash", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "half", ] -[[package]] -name = "arrow-schema" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e972cd1ff4a4ccd22f86d3e53e835c2ed92e0eea6a3e8eadb72b4f1ac802cf8" - [[package]] name = "arrow-schema" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" -[[package]] -name = "arrow-select" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "600bae05d43483d216fb3494f8c32fdbefd8aa4e1de237e790dbb3d9f44690a3" -dependencies = [ - "ahash", - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "num", -] - [[package]] name = "arrow-select" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "ahash", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "num", -] - -[[package]] -name = "arrow-string" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dc1985b67cb45f6606a248ac2b4a288849f196bab8c657ea5589f47cdd55e6" -dependencies = [ - "arrow-array 52.2.0", - "arrow-buffer 52.2.0", - "arrow-data 52.2.0", - "arrow-schema 52.2.0", - "arrow-select 52.2.0", - "memchr", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", "num", - "regex", - "regex-syntax", ] [[package]] @@ -583,11 +369,11 @@ name = "arrow-string" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-data 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", "memchr", "num", "regex", @@ -1397,10 +1183,10 @@ version = "41.0.0" dependencies = [ "ahash", "apache-avro", - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", "async-compression", "async-trait", "bytes", @@ -1434,7 +1220,7 @@ dependencies = [ "num_cpus", "object_store", "parking_lot", - "parquet 53.0.0", + "parquet", "paste", "pin-project-lite", "rand", @@ -1452,7 +1238,7 @@ dependencies = [ name = "datafusion-catalog" version = "41.0.0" dependencies = [ - "arrow-schema 53.0.0", + "arrow-schema", "async-trait", "datafusion-common", "datafusion-execution", @@ -1464,7 +1250,7 @@ dependencies = [ name = "datafusion-cli" version = "41.0.0" dependencies = [ - "arrow 52.2.0", + "arrow", "assert_cmd", "async-trait", "aws-config", @@ -1478,7 +1264,7 @@ dependencies = [ "mimalloc", "object_store", "parking_lot", - "parquet 52.2.0", + "parquet", "predicates", "regex", "rstest", @@ -1493,10 +1279,10 @@ version = "41.0.0" dependencies = [ "ahash", "apache-avro", - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", "chrono", "half", "hashbrown", @@ -1504,7 +1290,7 @@ dependencies = [ "libc", "num_cpus", "object_store", - "parquet 53.0.0", + "parquet", "paste", "sqlparser", "tokio", @@ -1522,7 +1308,7 @@ dependencies = [ name = "datafusion-execution" version = "41.0.0" dependencies = [ - "arrow 53.0.0", + "arrow", "chrono", "dashmap", "datafusion-common", @@ -1542,9 +1328,9 @@ name = "datafusion-expr" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", + "arrow", + "arrow-array", + "arrow-buffer", "chrono", "datafusion-common", "datafusion-expr-common", @@ -1561,7 +1347,7 @@ dependencies = [ name = "datafusion-expr-common" version = "41.0.0" dependencies = [ - "arrow 53.0.0", + "arrow", "datafusion-common", "paste", ] @@ -1570,8 +1356,8 @@ dependencies = [ name = "datafusion-functions" version = "41.0.0" dependencies = [ - "arrow 53.0.0", - "arrow-buffer 53.0.0", + "arrow", + "arrow-buffer", "base64 0.22.1", "blake2", "blake3", @@ -1596,8 +1382,8 @@ name = "datafusion-functions-aggregate" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1615,7 +1401,7 @@ name = "datafusion-functions-aggregate-common" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", + "arrow", "datafusion-common", "datafusion-expr-common", "datafusion-physical-expr-common", @@ -1626,11 +1412,11 @@ dependencies = [ name = "datafusion-functions-nested" version = "41.0.0" dependencies = [ - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-ord 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr", @@ -1657,7 +1443,7 @@ dependencies = [ name = "datafusion-optimizer" version = "41.0.0" dependencies = [ - "arrow 53.0.0", + "arrow", "async-trait", "chrono", "datafusion-common", @@ -1676,12 +1462,12 @@ name = "datafusion-physical-expr" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-ord 53.0.0", - "arrow-schema 53.0.0", - "arrow-string 53.0.0", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "arrow-string", "base64 0.22.1", "chrono", "datafusion-common", @@ -1706,7 +1492,7 @@ name = "datafusion-physical-expr-common" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", + "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown", @@ -1729,11 +1515,11 @@ name = "datafusion-physical-plan" version = "41.0.0" dependencies = [ "ahash", - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-ord 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", "async-trait", "chrono", "datafusion-common", @@ -1761,9 +1547,9 @@ dependencies = [ name = "datafusion-sql" version = "41.0.0" dependencies = [ - "arrow 53.0.0", - "arrow-array 53.0.0", - "arrow-schema 53.0.0", + "arrow", + "arrow-array", + "arrow-schema", "datafusion-common", "datafusion-expr", "log", @@ -2905,38 +2691,19 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "parquet" -version = "52.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e977b9066b4d3b03555c22bdc442f3fadebd96a39111249113087d0edb2691cd" -dependencies = [ - "ahash", - "bytes", - "chrono", - "half", - "hashbrown", - "num", - "num-bigint", - "paste", - "seq-macro", - "thrift", - "twox-hash", -] - [[package]] name = "parquet" version = "53.0.0" source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" dependencies = [ "ahash", - "arrow-array 53.0.0", - "arrow-buffer 53.0.0", - "arrow-cast 53.0.0", - "arrow-data 53.0.0", - "arrow-ipc 53.0.0", - "arrow-schema 53.0.0", - "arrow-select 53.0.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", "base64 0.22.1", "brotli", "bytes", @@ -4656,8 +4423,3 @@ dependencies = [ "cc", "pkg-config", ] - -[[patch.unused]] -name = "arrow-flight" -version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index dccaa6639079..37886a7e8856 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -30,7 +30,7 @@ rust-version = "1.76" readme = "README.md" [dependencies] -arrow = { version = "52.2.0" } +arrow = { version = "53.0.0" } async-trait = "0.1.73" aws-config = "1.5.5" aws-credential-types = "1.2.0" @@ -51,7 +51,7 @@ futures = "0.3" mimalloc = { version = "0.1", default-features = false } object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } parking_lot = { version = "0.12" } -parquet = { version = "52.2.0", default-features = false } +parquet = { version = "53.0.0", default-features = false } regex = "1.8" rustyline = "11.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] } @@ -76,5 +76,4 @@ arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d5 arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } From 15f4c5fced0a40b3a55f08c4aec26973b92c4c27 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 2 Sep 2024 07:26:04 -0400 Subject: [PATCH 32/36] update cargo.lock --- datafusion-cli/Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index c7e39cdae4f7..3970201c6d66 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -416,9 +416,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", @@ -2905,9 +2905,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.3" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b22d8e7369034b9a7132bc2008cac12f2013c8132b45e0554e6e20e2617f2156" +checksum = "a2d2fb862b7ba45e615c1429def928f2e15f815bdf933b27a2d3824e224c1f46" dependencies = [ "bytes", "pin-project-lite", @@ -2923,9 +2923,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.6" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" +checksum = "ea0a9b3a42929fad8a7c3de7f86ce0814cfa893328157672680e9fb1145549c5" dependencies = [ "bytes", "rand", From 5a35f3c0567d457589bb3269605da77bca9e87c5 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 2 Sep 2024 08:09:39 -0400 Subject: [PATCH 33/36] Update expected output --- datafusion/sqllogictest/test_files/arrow_typeof.slt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 4a24154e080e..77b10b41ccb3 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -432,5 +432,7 @@ MyAwesomeString Utf8View # Fails until we update to use the arrow-cast release with support for casting utf8 types to BinaryView # refer to merge commit https://github.com/apache/arrow-rs/commit/4bd737dab2aa17aca200259347909d48ed793ba1 -query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to BinaryView +query ?T select arrow_cast('MyAwesomeString', 'BinaryView'), arrow_typeof(arrow_cast('MyAwesomeString', 'BinaryView')) +---- +4d79417765736f6d65537472696e67 BinaryView From c534a2927f729e757da0da82d348c25160042644 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Sep 2024 16:10:56 -0400 Subject: [PATCH 34/36] Remove patch --- Cargo.toml | 16 ---------------- datafusion-cli/Cargo.toml | 15 --------------- 2 files changed, 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9a082d903b27..c155e475a026 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -168,19 +168,3 @@ large_futures = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_imports = "deny" - -## Temporary arrow-rs patch until 53.0.0 is released - -[patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 37886a7e8856..5daebdf06640 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -62,18 +62,3 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.17" - -## Temporary arrow-rs patch until 53.0.0 is released - -[patch.crates-io] -arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } -parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" } From 1dfd7130e49b7ac0908979ebc49e541199d48363 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Sep 2024 16:26:06 -0400 Subject: [PATCH 35/36] update datafusion-cli cargo --- datafusion-cli/Cargo.lock | 59 ++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 3970201c6d66..ac8887b86ad2 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -180,7 +180,8 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45aef0d9cf9a039bf6cd1acc451b137aca819977b0928dece52bd92811b640ba" dependencies = [ "arrow-arith", "arrow-array", @@ -200,7 +201,8 @@ dependencies = [ [[package]] name = "arrow-arith" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03675e42d1560790f3524800e41403b40d0da1c793fe9528929fde06d8c7649a" dependencies = [ "arrow-array", "arrow-buffer", @@ -214,7 +216,8 @@ dependencies = [ [[package]] name = "arrow-array" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" dependencies = [ "ahash", "arrow-buffer", @@ -230,7 +233,8 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" dependencies = [ "bytes", "half", @@ -240,7 +244,8 @@ dependencies = [ [[package]] name = "arrow-cast" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ce1018bb710d502f9db06af026ed3561552e493e989a79d0d0f5d9cf267a785" dependencies = [ "arrow-array", "arrow-buffer", @@ -260,7 +265,8 @@ dependencies = [ [[package]] name = "arrow-csv" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd178575f45624d045e4ebee714e246a05d9652e41363ee3f57ec18cca97f740" dependencies = [ "arrow-array", "arrow-buffer", @@ -278,7 +284,8 @@ dependencies = [ [[package]] name = "arrow-data" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" dependencies = [ "arrow-buffer", "arrow-schema", @@ -289,7 +296,8 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb307482348a1267f91b0912e962cd53440e5de0f7fb24c5f7b10da70b38c94a" dependencies = [ "arrow-array", "arrow-buffer", @@ -303,7 +311,8 @@ dependencies = [ [[package]] name = "arrow-json" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24805ba326758effdd6f2cbdd482fcfab749544f21b134701add25b33f474e6" dependencies = [ "arrow-array", "arrow-buffer", @@ -322,7 +331,8 @@ dependencies = [ [[package]] name = "arrow-ord" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "644046c479d80ae8ed02a7f1e1399072ea344ca6a7b0e293ab2d5d9ed924aa3b" dependencies = [ "arrow-array", "arrow-buffer", @@ -336,7 +346,8 @@ dependencies = [ [[package]] name = "arrow-row" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a29791f8eb13b340ce35525b723f5f0df17ecb955599e11f65c2a94ab34e2efb" dependencies = [ "ahash", "arrow-array", @@ -349,12 +360,14 @@ dependencies = [ [[package]] name = "arrow-schema" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" [[package]] name = "arrow-select" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cc7e6b582e23855fd1625ce46e51647aa440c20ea2e71b1d748e0839dd73cba" dependencies = [ "ahash", "arrow-array", @@ -367,7 +380,8 @@ dependencies = [ [[package]] name = "arrow-string" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0775b6567c66e56ded19b87a954b6b1beffbdd784ef95a3a2b03f59570c1d230" dependencies = [ "arrow-array", "arrow-buffer", @@ -2694,7 +2708,8 @@ dependencies = [ [[package]] name = "parquet" version = "53.0.0" -source = "git+https://github.com/apache/arrow-rs.git?rev=ffd216d57469a33303c0d2ec9b974fd25cc0e0f9#ffd216d57469a33303c0d2ec9b974fd25cc0e0f9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0fbf928021131daaa57d334ca8e3904fe9ae22f73c56244fc7db9b04eedc3d8" dependencies = [ "ahash", "arrow-array", @@ -2905,9 +2920,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2d2fb862b7ba45e615c1429def928f2e15f815bdf933b27a2d3824e224c1f46" +checksum = "8c7c5fdde3cdae7203427dc4f0a68fe0ed09833edc525a03456b153b79828684" dependencies = [ "bytes", "pin-project-lite", @@ -2923,9 +2938,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.7" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea0a9b3a42929fad8a7c3de7f86ce0814cfa893328157672680e9fb1145549c5" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", @@ -2940,15 +2955,15 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.4" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bffec3605b73c6f1754535084a85229fa8a30f86014e6c81aeec4abb68b0285" +checksum = "4fe68c2e9e1a1234e218683dbdf9f9dfcb094113c5ac2b938dfcb9bab4c4140b" dependencies = [ "libc", "once_cell", "socket2", "tracing", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] From a2613a6149c3c17c8b9e03b48d379359d5b1b2b3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 3 Sep 2024 16:50:10 -0400 Subject: [PATCH 36/36] Pin some aws sdks whose update caused CI failures --- datafusion-cli/Cargo.lock | 18 ++++++++++++------ datafusion-cli/Cargo.toml | 7 +++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index ac8887b86ad2..039f3fb9a6aa 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -540,9 +540,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.40.0" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5879bec6e74b648ce12f6085e7245417bc5f6d672781028384d2e494be3eb6d" +checksum = "11822090cf501c316c6f75711d77b96fba30658e3867a7762e5e2f5d32d31e81" dependencies = [ "aws-credential-types", "aws-runtime", @@ -562,9 +562,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.41.0" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ef4cd9362f638c22a3b959fd8df292e7e47fdf170270f86246b97109b5f2f7d" +checksum = "78a2a06ff89176123945d1bbe865603c4d7101bea216a550bb4d2e4e9ba74d74" dependencies = [ "aws-credential-types", "aws-runtime", @@ -584,9 +584,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.40.0" +version = "1.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b1e2735d2ab28b35ecbb5496c9d41857f52a0d6a0075bbf6a8af306045ea6f6" +checksum = "a20a91795850826a6f456f4a48eff1dfa59a0e69bdbf5b8c50518fd372106574" dependencies = [ "aws-credential-types", "aws-runtime", @@ -731,6 +731,7 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", + "futures-core", "http 0.2.12", "http 1.1.0", "http-body 0.4.6", @@ -743,6 +744,8 @@ dependencies = [ "ryu", "serde", "time", + "tokio", + "tokio-util", ] [[package]] @@ -1269,6 +1272,9 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", "clap", "ctor", "datafusion", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 5daebdf06640..f2f52846ab54 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -33,6 +33,13 @@ readme = "README.md" arrow = { version = "53.0.0" } async-trait = "0.1.73" aws-config = "1.5.5" +# begin pin aws-sdk crates otherwise CI MSRV check fails +# We can't update these libraries yet as it requires Rust 1.78, which is not available until Nov 2024 +# per https://github.com/apache/datafusion?tab=readme-ov-file#rust-version-compatibility-policy +aws-sdk-sso = "=1.39.0" +aws-sdk-ssooidc = "=1.40.0" +aws-sdk-sts = "=1.39.0" +# end pin aws-sdk crates aws-credential-types = "1.2.0" clap = { version = "4.5.16", features = ["derive", "cargo"] } datafusion = { path = "../datafusion/core", version = "41.0.0", features = [