diff --git a/Cargo.lock b/Cargo.lock index fb4a860498..b2664c71ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,15 +58,6 @@ dependencies = [ "alloc-no-stdlib", ] -[[package]] -name = "ansi_term" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" -dependencies = [ - "winapi", -] - [[package]] name = "ansi_term" version = "0.12.1" @@ -96,11 +87,11 @@ checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" [[package]] name = "arrow" -version = "4.2.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93811be1c0f60f4b29d80b34dad4e59fdc397a9e580f849df9e2635701498663" +checksum = "06d2bd50fddbdcecd832742b388228eec02eb3aabd33144ef46f6bc5420bf662" dependencies = [ - "cfg_aliases", + "bitflags", "chrono", "csv", "flatbuffers", @@ -111,7 +102,7 @@ dependencies = [ "multiversion", "num", "prettytable-rs", - "rand 0.7.3", + "rand 0.8.3", "regex", "serde", "serde_derive", @@ -351,12 +342,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "cfg_aliases" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" - [[package]] name = "chrono" version = "0.4.19" @@ -371,21 +356,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "clap" -version = "2.33.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" -dependencies = [ - "ansi_term 0.11.0", - "atty", - "bitflags", - "strsim 0.8.0", - "textwrap 0.11.0", - "unicode-width", - "vec_map", -] - [[package]] name = "clap" version = "3.0.0-beta.2" @@ -398,9 +368,9 @@ dependencies = [ "indexmap", "lazy_static", "os_str_bytes", - "strsim 0.10.0", + "strsim", "termcolor", - "textwrap 0.12.1", + "textwrap", "unicode-width", "vec_map", ] @@ -525,15 +495,13 @@ dependencies = [ [[package]] name = "datafusion" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d28e885465d9c4ce7154b52511a3d3263c1a484010145dc84eb686abc56c2d" +version = "4.0.0-SNAPSHOT" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=2a4f94e622a9c3db88eeb5c5ec7dd12efb98b546#2a4f94e622a9c3db88eeb5c5ec7dd12efb98b546" dependencies = [ "ahash", "arrow", "async-trait", "chrono", - "clap 2.33.3", "futures", "hashbrown 0.11.2", "lazy_static", @@ -544,8 +512,8 @@ dependencies = [ "parquet", "paste 1.0.5", "pin-project-lite", + "rand 0.8.3", "regex", - "rustyline", "sha2", "smallvec", "sqlparser", @@ -566,7 +534,7 @@ dependencies = [ "bytes", "cfg-if", "chrono", - "clap 3.0.0-beta.2", + "clap", "datafusion", "env_logger", "errno", @@ -752,9 +720,9 @@ dependencies = [ [[package]] name = "flatbuffers" -version = "0.8.4" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c502342b7d6d73beb1b8bab39dc01deba0c8ef66f4e6f1eba7c69ee6b38069" +checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" dependencies = [ "bitflags", "smallvec", @@ -804,16 +772,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fuchsia-cprng" version = "0.1.1" @@ -950,17 +908,6 @@ dependencies = [ "wasi 0.10.2+wasi-snapshot-preview1", ] -[[package]] -name = "ghost" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a5bcf1bbeab73aa4cf2fde60a846858dc036163c7c33bec309f8d17de785479" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "gimli" version = "0.23.0" @@ -1207,28 +1154,6 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" -[[package]] -name = "inventory" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f0f7efb804ec95e33db9ad49e4252f049e37e8b0a4652e3cd61f7999f2eff7f" -dependencies = [ - "ctor", - "ghost", - "inventory-impl", -] - -[[package]] -name = "inventory-impl" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75c094e94816723ab936484666968f5b58060492e880f3c8d00489a1e244fa51" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "ipnet" version = "2.3.0" @@ -1475,18 +1400,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nix" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2ccba0cfe4fdf15982d1674c69b1fd80bad427d293849982668dfe454bd61f2" -dependencies = [ - "bitflags", - "cc", - "cfg-if", - "libc", -] - [[package]] name = "ntapi" version = "0.3.6" @@ -1721,9 +1634,9 @@ dependencies = [ [[package]] name = "parquet" -version = "4.2.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9275a7f8eab04e6ab6918b4fdd50e00aeba3c288e0f91bdc5da87a2c8ff288a6" +checksum = "9297cb17ef7287f6105685d230abbb2b37247657edf2b4a99271088e7d5b0ddd" dependencies = [ "arrow", "base64 0.13.0", @@ -1734,6 +1647,7 @@ dependencies = [ "lz4", "num-bigint", "parquet-format", + "rand 0.8.3", "snap", "thrift", "zstd", @@ -1829,7 +1743,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" dependencies = [ - "ansi_term 0.12.1", + "ansi_term", "ctor", "diff", "output_vt100", @@ -1906,26 +1820,34 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.13.2" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4837b8e8e18a102c23f79d1e9a110b597ea3b684c95e874eb1ad88f8683109c3" +checksum = "338f7f3701e11fd7f76508c91fbcaabc982564bcaf4d1ca7e1574ff2b4778aec" dependencies = [ "cfg-if", - "ctor", "indoc", - "inventory", "libc", "parking_lot", "paste 0.1.18", + "pyo3-build-config", "pyo3-macros", "unindent", ] +[[package]] +name = "pyo3-build-config" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcb2e98cc9ccc83d4f7115c8f925e0057e88c8d324b1bc4c2db4a7270c06ac9d" +dependencies = [ + "once_cell", +] + [[package]] name = "pyo3-macros" -version = "0.13.2" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47f2c300ceec3e58064fd5f8f5b61230f2ffd64bde4970c81fdd0563a2db1bb" +checksum = "cfb8671a42d0ecc4bec8cc107ae96d49292ca20cd1968e09b98af4aafd516adf" dependencies = [ "pyo3-macros-backend", "quote", @@ -1934,11 +1856,12 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.13.2" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87b097e5d84fcbe3e167f400fbedd657820a375b034c78bd852050749a575d66" +checksum = "9addf6dc422f05d4949cc0990195ee74fa43e3c3780cc9a1972fe9e7b68a9f48" dependencies = [ "proc-macro2", + "pyo3-build-config", "quote", "syn", ] @@ -2363,27 +2286,6 @@ dependencies = [ "security-framework", ] -[[package]] -name = "rustyline" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8227301bfc717136f0ecbd3d064ba8199e44497a0bdd46bb01ede4387cfd2cec" -dependencies = [ - "bitflags", - "cfg-if", - "dirs-next", - "fs2", - "libc", - "log", - "memchr", - "nix", - "scopeguard", - "unicode-segmentation", - "unicode-width", - "utf8parse", - "winapi", -] - [[package]] name = "rutie" version = "0.8.2" @@ -2707,12 +2609,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strsim" version = "0.10.0" @@ -2792,15 +2688,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "textwrap" version = "0.12.1" @@ -3133,12 +3020,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf8parse" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936e4b492acfd135421d8dca4b1aa80a7bfc26e702ef3af710e0752684df5372" - [[package]] name = "utime" version = "0.3.1" @@ -3350,18 +3231,18 @@ checksum = "81a974bcdd357f0dca4d41677db03436324d45a4c9ed2d0b873a5a360ce41c36" [[package]] name = "zstd" -version = "0.8.2+zstd.1.5.0" +version = "0.9.0+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c83508bcbbdc9c3abcf77e8e56773d3ffcd2479e0933caab2e7d6b5a9e183aae" +checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "4.1.0+zstd.1.5.0" +version = "4.1.1+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30375f78e185ca4c91930f42ea2c0162f9aa29737032501f93b79266d985ae7" +checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" dependencies = [ "libc", "zstd-sys", @@ -3369,9 +3250,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.0+zstd.1.5.0" +version = "1.6.1+zstd.1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2141bed8922b427761470e6bbfeff255da94fa20b0bbeab0d9297fcaf71e3aa7" +checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" dependencies = [ "cc", "libc", diff --git a/python/Cargo.toml b/python/Cargo.toml index 19da927fb8..ddf63c5dba 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -20,10 +20,10 @@ env_logger = "0" # for binary wheel best practice, statically link openssl reqwest = { version = "*", features = ["native-tls-vendored"] } serde_json = "1" -arrow = { version = "4" } +arrow = { version = "5" } [dependencies.pyo3] -version = "0.13" +version = "0.14" features = ["extension-module", "abi3", "abi3-py36"] [dependencies.deltalake] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 9f511880c1..ac3d584c02 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -43,9 +43,9 @@ maplit = { version = "1", optional = true } # High-level writer parquet-format = "~2.6.1" -arrow = { version = "4" } -datafusion = { version = "4", optional = true } -parquet = { version = "4" } +arrow = { version = "5" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev="2a4f94e622a9c3db88eeb5c5ec7dd12efb98b546", optional = true } +parquet = { version = "5" } cfg-if = "1" async-trait = "0.1" # NOTE: disable rust-dataframe integration since it currently doesn't have a diff --git a/rust/src/delta_datafusion.rs b/rust/src/delta_datafusion.rs index 6140e647e1..d9dfe16d65 100644 --- a/rust/src/delta_datafusion.rs +++ b/rust/src/delta_datafusion.rs @@ -28,7 +28,8 @@ use arrow::datatypes::Schema as ArrowSchema; use datafusion::datasource::datasource::{ColumnStatistics, Statistics}; use datafusion::datasource::TableProvider; use datafusion::logical_plan::{combine_filters, Expr}; -use datafusion::physical_plan::parquet::{ParquetExec, ParquetPartition, RowGroupPredicateBuilder}; +use datafusion::physical_optimizer::pruning::PruningPredicate; +use datafusion::physical_plan::parquet::{ParquetExec, ParquetExecMetrics, ParquetPartition}; use datafusion::physical_plan::ExecutionPlan; use datafusion::scalar::ScalarValue; @@ -52,9 +53,9 @@ impl TableProvider for delta::DeltaTable { filters: &[Expr], limit: Option, ) -> datafusion::error::Result> { - let schema = >::try_from( + let schema = Arc::new(>::try_from( delta::DeltaTable::schema(&self).unwrap(), - )?; + )?); let filenames = self.get_file_uris(); let partitions = filenames @@ -95,15 +96,15 @@ impl TableProvider for delta::DeltaTable { Ok(ParquetPartition::new(vec![fname], statistics)) }) .collect::>()?; - let predicate_builder = combine_filters(filters).and_then(|predicate_expr| { - RowGroupPredicateBuilder::try_new(&predicate_expr, schema.clone()).ok() + PruningPredicate::try_new(&predicate_expr, schema.clone()).ok() }); Ok(Arc::new(ParquetExec::new( partitions, schema, projection.clone(), + ParquetExecMetrics::new(), predicate_builder, batch_size, limit,