diff --git a/Cargo.lock b/Cargo.lock index 01c1aa7ae62d..e6a49c90baac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -118,21 +118,21 @@ checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow-format" -version = "0.4.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2333f8ccf0d597ba779863c57a0b61f635721187fb2fdeabae92691d7d582fe5" +checksum = "216249afef413d7e9e9b4b543e73b3e371ace3a812380af98f1c871521572cdd" dependencies = [ "planus", - "prost 0.9.0", - "prost-derive 0.9.0", + "prost 0.10.1", + "prost-derive 0.10.1", "serde", - "tonic", + "tonic 0.7.2", ] [[package]] name = "arrow2" version = "0.11.2" -source = "git+https://github.com/datafuse-extras/arrow2?rev=826a2b8#826a2b8ed8598a614c5df9115ea657d1e3c40184" +source = "git+https://github.com/datafuse-extras/arrow2?rev=48a3087#48a308772fcc18fce890447b0bfa30611fc813e6" dependencies = [ "ahash", "arrow-format", @@ -300,6 +300,49 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "axum" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4af7447fc1214c1f3a1ace861d0216a6c8bb13965b64bbad9650f375b67689a" +dependencies = [ + "async-trait", + "axum-core", + "bitflags", + "bytes 1.1.0", + "futures-util", + "http", + "http-body", + "hyper", + "itoa 1.0.1", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde", + "sync_wrapper", + "tokio", + "tower", + "tower-http", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bdc19781b16e32f8a7200368a336fa4509d4b72ef15dd4e41df5290855ee1e6" +dependencies = [ + "async-trait", + "bytes 1.1.0", + "futures-util", + "http", + "http-body", + "mime", +] + [[package]] name = "backoff" version = "0.4.0" @@ -973,6 +1016,8 @@ dependencies = [ "common-io", "pretty_assertions", "regex", + "serde", + "serde_json", ] [[package]] @@ -1012,13 +1057,13 @@ dependencies = [ "common-arrow", "octocrab", "paste", - "prost 0.9.0", + "prost 0.10.1", "serde", "serde_json", "sqlparser", "thiserror", "time 0.3.9", - "tonic", + "tonic 0.7.2", ] [[package]] @@ -1078,7 +1123,7 @@ dependencies = [ "once_cell", "serde", "thiserror", - "tonic", + "tonic 0.7.2", "trust-dns-resolver", ] @@ -1155,7 +1200,7 @@ dependencies = [ "serde", "serde_json", "thiserror", - "tonic", + "tonic 0.7.2", ] [[package]] @@ -1192,13 +1237,13 @@ dependencies = [ "derive_more", "futures", "once_cell", - "prost 0.9.0", + "prost 0.10.1", "rand 0.8.5", "semver", "serde", "serde_json", "thiserror", - "tonic", + "tonic 0.7.2", ] [[package]] @@ -1264,7 +1309,8 @@ dependencies = [ "num-traits", "once_cell", "openraft", - "prost 0.9.0", + "prost 0.10.1", + "prost-build 0.10.1", "regex", "serde", "serde_json", @@ -1272,8 +1318,8 @@ dependencies = [ "sha2 0.10.2", "sled", "thiserror", - "tonic", - "tonic-build 0.6.2", + "tonic 0.7.2", + "tonic-build 0.7.2", ] [[package]] @@ -1332,8 +1378,8 @@ dependencies = [ "num-traits", "prost 0.10.1", "prost-build 0.10.1", - "tonic", - "tonic-build 0.7.0", + "tonic 0.7.2", + "tonic-build 0.7.2", ] [[package]] @@ -1367,7 +1413,7 @@ dependencies = [ "once_cell", "opentelemetry", "opentelemetry-jaeger", - "tonic", + "tonic 0.7.2", "tracing", "tracing-appender", "tracing-bunyan-formatter", @@ -1394,7 +1440,7 @@ checksum = "cc347c19eb5b940f396ac155822caee6662f850d97306890ac3773ed76c90c5a" dependencies = [ "prost 0.9.0", "prost-types 0.9.0", - "tonic", + "tonic 0.6.2", "tonic-build 0.6.2", "tracing-core", ] @@ -1417,7 +1463,7 @@ dependencies = [ "thread_local", "tokio", "tokio-stream", - "tonic", + "tonic 0.6.2", "tracing", "tracing-core", "tracing-subscriber", @@ -1805,7 +1851,7 @@ dependencies = [ "poem", "pretty_assertions", "prometheus", - "prost 0.9.0", + "prost 0.10.1", "regex", "reqwest", "semver", @@ -1817,7 +1863,7 @@ dependencies = [ "temp-env", "tempfile", "tokio-stream", - "tonic", + "tonic 0.7.2", "tonic-reflection", ] @@ -1838,7 +1884,7 @@ dependencies = [ "serde", "serde_json", "tokio-stream", - "tonic", + "tonic 0.7.2", ] [[package]] @@ -1913,7 +1959,7 @@ dependencies = [ "petgraph", "poem", "pretty_assertions", - "prost 0.9.0", + "prost 0.10.1", "rand 0.8.5", "regex", "reqwest", @@ -1933,10 +1979,10 @@ dependencies = [ "threadpool", "thrift", "time 0.3.9", - "tokio-rustls 0.23.3", + "tokio-rustls", "tokio-stream", "toml", - "tonic", + "tonic 0.7.2", "twox-hash", "typetag", "url", @@ -2910,6 +2956,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-range-header" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfe8eed0a9285ef776bb792479ea3834e8b94e13d615c2f66d03dd50a435a29" + [[package]] name = "http-types" version = "2.12.0" @@ -3550,6 +3602,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +[[package]] +name = "matchit" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73cbba799671b762df5a175adf59ce145165747bb891505c43d09aefbbf38beb" + [[package]] name = "md5" version = "0.7.0" @@ -4717,14 +4775,14 @@ dependencies = [ "pin-project-lite", "poem-derive", "regex", - "rustls-pemfile", + "rustls-pemfile 0.3.0", "serde", "serde_json", "serde_urlencoded", "smallvec", "thiserror", "tokio", - "tokio-rustls 0.23.3", + "tokio-rustls", "tokio-stream", "tokio-util 0.7.0", "tracing", @@ -5544,19 +5602,6 @@ dependencies = [ "semver", ] -[[package]] -name = "rustls" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7" -dependencies = [ - "base64 0.13.0", - "log", - "ring", - "sct 0.6.1", - "webpki 0.21.4", -] - [[package]] name = "rustls" version = "0.20.2" @@ -5565,18 +5610,18 @@ checksum = "d37e5e2290f3e040b594b1a9e04377c2c671f1a1cfd9bfdef82106ac1c113f84" dependencies = [ "log", "ring", - "sct 0.7.0", - "webpki 0.22.0", + "sct", + "webpki", ] [[package]] name = "rustls-native-certs" -version = "0.5.0" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a07b7c1885bd8ed3831c289b7870b13ef46fe0e856d288c30d9cc17d75a2092" +checksum = "0167bac7a9f490495f3c33013e7722b53cb087ecbe082fb0c6387c96f634ea50" dependencies = [ "openssl-probe", - "rustls 0.19.1", + "rustls-pemfile 1.0.0", "schannel", "security-framework", ] @@ -5590,6 +5635,15 @@ dependencies = [ "base64 0.13.0", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7522c9de787ff061458fe9a829dc790a3f5b22dc571694fc5883f448b94d9a9" +dependencies = [ + "base64 0.13.0", +] + [[package]] name = "rustversion" version = "1.0.6" @@ -5633,16 +5687,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "sct" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b362b83898e0e69f38515b82ee15aa80636befe47c3b6d3d89a911e78fc228ce" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "sct" version = "0.7.0" @@ -6202,6 +6246,12 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "sync_wrapper" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20518fe4a4c9acf048008599e464deb21beeae3d3578418951a189c235a7a9a8" + [[package]] name = "synstructure" version = "0.12.6" @@ -6467,26 +6517,15 @@ dependencies = [ "tokio", ] -[[package]] -name = "tokio-rustls" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6" -dependencies = [ - "rustls 0.19.1", - "tokio", - "webpki 0.21.4", -] - [[package]] name = "tokio-rustls" version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4151fda0cf2798550ad0b34bcfc9b9dcc2a9d2471c895c68f3a8818e54f2389e" dependencies = [ - "rustls 0.20.2", + "rustls", "tokio", - "webpki 0.22.0", + "webpki", ] [[package]] @@ -6558,9 +6597,7 @@ dependencies = [ "pin-project", "prost 0.9.0", "prost-derive 0.9.0", - "rustls-native-certs", "tokio", - "tokio-rustls 0.22.0", "tokio-stream", "tokio-util 0.6.9", "tower", @@ -6570,6 +6607,41 @@ dependencies = [ "tracing-futures", ] +[[package]] +name = "tonic" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be9d60db39854b30b835107500cf0aca0b0d14d6e1c3de124217c23a29c2ddb" +dependencies = [ + "async-stream", + "async-trait", + "axum", + "base64 0.13.0", + "bytes 1.1.0", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost 0.10.1", + "prost-derive 0.10.1", + "rustls-native-certs", + "rustls-pemfile 1.0.0", + "tokio", + "tokio-rustls", + "tokio-stream", + "tokio-util 0.7.0", + "tower", + "tower-layer", + "tower-service", + "tracing", + "tracing-futures", +] + [[package]] name = "tonic-build" version = "0.6.2" @@ -6584,9 +6656,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d17087af5c80e5d5fc8ba9878e60258065a0a757e35efe7a05b7904bece1943" +checksum = "d9263bf4c9bfaae7317c1c2faf7f18491d2fe476f70c414b73bf5d445b00ffa1" dependencies = [ "prettyplease", "proc-macro2", @@ -6597,17 +6669,17 @@ dependencies = [ [[package]] name = "tonic-reflection" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "228cc5aa5d3e6e0624b5f756a7558038ee86428d1d58d8c6e551b389b12cf355" +checksum = "d1d786fcf313b48f1aac280142eae249f3c03495355c7906aa49872a41955015" dependencies = [ "bytes 1.1.0", - "prost 0.9.0", - "prost-types 0.9.0", + "prost 0.10.1", + "prost-types 0.10.1", "tokio", "tokio-stream", - "tonic", - "tonic-build 0.6.2", + "tonic 0.7.2", + "tonic-build 0.7.2", ] [[package]] @@ -6630,6 +6702,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower-http" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d342c6d58709c0a6d48d48dabbb62d4ef955cf5f0f3bbfd845838e7ae88dbae" +dependencies = [ + "bitflags", + "bytes 1.1.0", + "futures-core", + "futures-util", + "http", + "http-body", + "http-range-header", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.1" @@ -7118,16 +7209,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "webpki" version = "0.22.0" diff --git a/common/arrow/Cargo.toml b/common/arrow/Cargo.toml index 41f755bcd3f1..400745744202 100644 --- a/common/arrow/Cargo.toml +++ b/common/arrow/Cargo.toml @@ -31,10 +31,10 @@ simd = ["arrow/simd"] arrow = { package = "arrow2", git = "https://github.com/datafuse-extras/arrow2", default-features = false, features = [ "io_parquet", "io_parquet_compression", -], rev = "826a2b8" } +], rev = "48a3087" } # Crates.io dependencies -arrow-format = { version = "0.4.0", features = ["flight-data", "flight-service", "ipc"] } +arrow-format = { version = "0.6.0", features = ["flight-data", "flight-service", "ipc"] } futures = "0.3.21" parquet2 = { version = "0.12", default_features = false } diff --git a/common/datablocks/Cargo.toml b/common/datablocks/Cargo.toml index b81556795dc1..d933f7c5729c 100644 --- a/common/datablocks/Cargo.toml +++ b/common/datablocks/Cargo.toml @@ -24,6 +24,8 @@ common-io = { path = "../io" } ahash = "0.7.6" comfy-table = "5.0.1" regex = "1.5.5" +serde = { version = "1.0.136", features = ["derive"] } +serde_json = "1.0.79" [dev-dependencies] pretty_assertions = "1.2.1" diff --git a/common/datablocks/src/kernels/data_block_sort.rs b/common/datablocks/src/kernels/data_block_sort.rs index 3f7b66541471..c38d7965d53a 100644 --- a/common/datablocks/src/kernels/data_block_sort.rs +++ b/common/datablocks/src/kernels/data_block_sort.rs @@ -16,10 +16,15 @@ use std::iter::once; use std::sync::Arc; use common_arrow::arrow::array::growable::make_growable; +use common_arrow::arrow::array::ord as arrow_ord; +use common_arrow::arrow::array::ord::DynComparator; use common_arrow::arrow::array::Array; use common_arrow::arrow::array::ArrayRef; use common_arrow::arrow::compute::merge_sort::*; use common_arrow::arrow::compute::sort as arrow_sort; +use common_arrow::arrow::datatypes::DataType as ArrowType; +use common_arrow::arrow::error::Error as ArrowError; +use common_arrow::arrow::error::Result as ArrowResult; use common_datavalues::prelude::*; use common_exception::ErrorCode; use common_exception::Result; @@ -58,7 +63,7 @@ impl DataBlock { }) .collect::>>()?; - let indices = arrow_sort::lexsort_to_indices(&order_arrays, limit)?; + let indices = arrow_sort::lexsort_to_indices_impl(&order_arrays, limit, &build_compare)?; DataBlock::block_take_by_indices(block, indices.values()) } @@ -111,7 +116,7 @@ impl DataBlock { }) .collect::>(); - let comparator = build_comparator(&sort_options_with_array)?; + let comparator = build_comparator_impl(&sort_options_with_array, &build_compare)?; let lhs_indices = (0, 0, lhs.num_rows()); let rhs_indices = (1, 0, rhs.num_rows()); let slices = merge_sort_slices(once(&lhs_indices), once(&rhs_indices), &comparator); @@ -203,3 +208,38 @@ impl DataBlock { } } } + +fn compare_variant(left: &dyn Array, right: &dyn Array) -> ArrowResult { + let left = VariantColumn::from_arrow_array(left); + let right = VariantColumn::from_arrow_array(right); + + Ok(Box::new(move |i, j| { + left.get_data(i).cmp(right.get_data(j)) + })) +} + +fn compare_array(left: &dyn Array, right: &dyn Array) -> ArrowResult { + let left = ArrayColumn::from_arrow_array(left); + let right = ArrayColumn::from_arrow_array(right); + + Ok(Box::new(move |i, j| { + left.get_data(i).cmp(&right.get_data(j)) + })) +} + +fn build_compare(left: &dyn Array, right: &dyn Array) -> ArrowResult { + match left.data_type() { + ArrowType::LargeList(_) => compare_array(left, right), + ArrowType::Extension(name, _, _) => { + if name == "Variant" || name == "VariantArray" || name == "VariantObject" { + compare_variant(left, right) + } else { + return Err(ArrowError::NotYetImplemented(format!( + "Sort not supported for data type {:?}", + left.data_type() + ))); + } + } + _ => arrow_ord::build_compare(left, right), + } +} diff --git a/common/datablocks/tests/it/kernels/data_block_sort.rs b/common/datablocks/tests/it/kernels/data_block_sort.rs index 768e6b6bcc9e..029171a0721c 100644 --- a/common/datablocks/tests/it/kernels/data_block_sort.rs +++ b/common/datablocks/tests/it/kernels/data_block_sort.rs @@ -15,6 +15,7 @@ use common_datablocks::*; use common_datavalues::prelude::*; use common_exception::Result; +use serde_json::json; #[test] fn test_data_block_sort() -> Result<()> { @@ -69,6 +70,158 @@ fn test_data_block_sort() -> Result<()> { ]; common_datablocks::assert_blocks_eq(expected, &[results]); } + + { + let options = vec![SortColumnDescription { + column_name: "b".to_owned(), + asc: true, + nulls_first: false, + }]; + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+---+----+", + "| a | b |", + "+---+----+", + "| 6 | b1 |", + "| 4 | b2 |", + "| 3 | b3 |", + "+---+----+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + { + let options = vec![SortColumnDescription { + column_name: "b".to_owned(), + asc: false, + nulls_first: false, + }]; + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+---+----+", + "| a | b |", + "+---+----+", + "| 7 | b6 |", + "| 1 | b5 |", + "| 2 | b4 |", + "+---+----+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + let schema = DataSchemaRefExt::create(vec![ + DataField::new("c", VariantValue::to_data_type()), + DataField::new("d", ArrayType::new_impl(i64::to_data_type())), + ]); + + let raw = DataBlock::create(schema, vec![ + Series::from_data(vec![ + VariantValue::from(json!(true)), + VariantValue::from(json!(123)), + VariantValue::from(json!(12.34)), + VariantValue::from(json!("abc")), + VariantValue::from(json!([1, 2, 3])), + VariantValue::from(json!({"a":"b"})), + ]), + Series::from_data(vec![ + ArrayValue::new(vec![1_i64.into(), 2_i64.into()]), + ArrayValue::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]), + ArrayValue::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]), + ArrayValue::new(vec![4_i64.into(), 5_i64.into(), 6_i64.into()]), + ArrayValue::new(vec![6_i64.into()]), + ArrayValue::new(vec![7_i64.into(), 8_i64.into(), 9_i64.into()]), + ]), + ]); + + { + let options = vec![SortColumnDescription { + column_name: "c".to_owned(), + asc: true, + nulls_first: false, + }]; + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+-------+-----------+", + "| c | d |", + "+-------+-----------+", + "| true | [1, 2] |", + "| 12.34 | [1, 2, 4] |", + "| 123 | [1, 2, 3] |", + "+-------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + { + let options = vec![SortColumnDescription { + column_name: "c".to_owned(), + asc: false, + nulls_first: false, + }]; + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+-----------+-----------+", + "| c | d |", + "+-----------+-----------+", + "| [1,2,3] | [6] |", + "| {\"a\":\"b\"} | [7, 8, 9] |", + "| \"abc\" | [4, 5, 6] |", + "+-----------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + { + let options = vec![SortColumnDescription { + column_name: "d".to_owned(), + asc: true, + nulls_first: false, + }]; + println!("raw={:?}", raw); + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+-------+-----------+", + "| c | d |", + "+-------+-----------+", + "| true | [1, 2] |", + "| 123 | [1, 2, 3] |", + "| 12.34 | [1, 2, 4] |", + "+-------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + { + let options = vec![SortColumnDescription { + column_name: "d".to_owned(), + asc: false, + nulls_first: false, + }]; + let results = DataBlock::sort_block(&raw, &options, Some(3))?; + assert_eq!(raw.schema(), results.schema()); + + let expected = vec![ + "+-----------+-----------+", + "| c | d |", + "+-----------+-----------+", + "| {\"a\":\"b\"} | [7, 8, 9] |", + "| [1,2,3] | [6] |", + "| \"abc\" | [4, 5, 6] |", + "+-----------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + Ok(()) } @@ -114,5 +267,111 @@ fn test_data_block_merge_sort() -> Result<()> { common_datablocks::assert_blocks_eq(expected, &[results]); } + { + let options = vec![SortColumnDescription { + column_name: "b".to_owned(), + asc: true, + nulls_first: false, + }]; + let results = DataBlock::merge_sort_block(&raw1, &raw2, &options, None)?; + + assert_eq!(raw1.schema(), results.schema()); + + let expected = vec![ + "+---+----+", + "| a | b |", + "+---+----+", + "| 3 | b1 |", + "| 5 | b2 |", + "| 7 | b3 |", + "| 2 | b4 |", + "| 4 | b5 |", + "| 6 | b6 |", + "+---+----+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + let schema = DataSchemaRefExt::create(vec![ + DataField::new("c", VariantValue::to_data_type()), + DataField::new("d", ArrayType::new_impl(i64::to_data_type())), + ]); + + let raw1 = DataBlock::create(schema.clone(), vec![ + Series::from_data(vec![ + VariantValue::from(json!(true)), + VariantValue::from(json!("abc")), + VariantValue::from(json!([1, 2, 3])), + ]), + Series::from_data(vec![ + ArrayValue::new(vec![1_i64.into(), 2_i64.into()]), + ArrayValue::new(vec![1_i64.into(), 2_i64.into(), 3_i64.into()]), + ArrayValue::new(vec![6_i64.into()]), + ]), + ]); + + let raw2 = DataBlock::create(schema, vec![ + Series::from_data(vec![ + VariantValue::from(json!(12.34)), + VariantValue::from(json!(123)), + VariantValue::from(json!({"a":"b"})), + ]), + Series::from_data(vec![ + ArrayValue::new(vec![1_i64.into(), 2_i64.into(), 4_i64.into()]), + ArrayValue::new(vec![4_i64.into(), 5_i64.into(), 6_i64.into()]), + ArrayValue::new(vec![7_i64.into(), 8_i64.into(), 9_i64.into()]), + ]), + ]); + + { + let options = vec![SortColumnDescription { + column_name: "c".to_owned(), + asc: true, + nulls_first: false, + }]; + let results = DataBlock::merge_sort_block(&raw1, &raw2, &options, None)?; + + assert_eq!(raw1.schema(), results.schema()); + + let expected = vec![ + "+-----------+-----------+", + "| c | d |", + "+-----------+-----------+", + "| true | [1, 2] |", + "| 12.34 | [1, 2, 4] |", + "| 123 | [4, 5, 6] |", + "| \"abc\" | [1, 2, 3] |", + "| {\"a\":\"b\"} | [7, 8, 9] |", + "| [1,2,3] | [6] |", + "+-----------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + + { + let options = vec![SortColumnDescription { + column_name: "d".to_owned(), + asc: true, + nulls_first: false, + }]; + let results = DataBlock::merge_sort_block(&raw1, &raw2, &options, None)?; + + assert_eq!(raw1.schema(), results.schema()); + + let expected = vec![ + "+-----------+-----------+", + "| c | d |", + "+-----------+-----------+", + "| true | [1, 2] |", + "| \"abc\" | [1, 2, 3] |", + "| 12.34 | [1, 2, 4] |", + "| 123 | [4, 5, 6] |", + "| [1,2,3] | [6] |", + "| {\"a\":\"b\"} | [7, 8, 9] |", + "+-----------+-----------+", + ]; + common_datablocks::assert_blocks_eq(expected, &[results]); + } + Ok(()) } diff --git a/common/exception/Cargo.toml b/common/exception/Cargo.toml index caab9aff1d4a..546386d058ea 100644 --- a/common/exception/Cargo.toml +++ b/common/exception/Cargo.toml @@ -16,12 +16,12 @@ common-arrow = { path = "../arrow" } anyhow = "1.0.56" octocrab = "0.15.4" paste = "1.0.7" -prost = "=0.9.0" +prost = "=0.10.1" serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" thiserror = "1.0.30" time = "0.3.9" -tonic = "=0.6.2" +tonic = "=0.7.2" # Github dependencies bincode = { git = "https://github.com/datafuse-extras/bincode", rev = "fd3f9ff" } diff --git a/common/exception/src/exception_into.rs b/common/exception/src/exception_into.rs index da2ee5f5d327..c7391f921482 100644 --- a/common/exception/src/exception_into.rs +++ b/common/exception/src/exception_into.rs @@ -88,8 +88,8 @@ impl From for ErrorCode { } } -impl From for ErrorCode { - fn from(error: common_arrow::arrow::error::ArrowError) -> Self { +impl From for ErrorCode { + fn from(error: common_arrow::arrow::error::Error) -> Self { ErrorCode::from_std_error(error) } } diff --git a/common/grpc/Cargo.toml b/common/grpc/Cargo.toml index 607cf678145e..441c5d16b4f3 100644 --- a/common/grpc/Cargo.toml +++ b/common/grpc/Cargo.toml @@ -25,7 +25,7 @@ jwt-simple = "0.10.9" once_cell = "1.10.0" serde = { version = "1.0.136", features = ["derive"] } thiserror = "1.0.30" -tonic = { version = "=0.6.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } +tonic = { version = "=0.7.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } trust-dns-resolver = { version = "0.21.2", features = ["system-config"] } [build-dependencies] diff --git a/common/meta/api/Cargo.toml b/common/meta/api/Cargo.toml index 3010993ff878..e5b6fa2dd97b 100644 --- a/common/meta/api/Cargo.toml +++ b/common/meta/api/Cargo.toml @@ -28,4 +28,4 @@ maplit = "1.0.2" serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" thiserror = "1.0.30" -tonic = { version = "=0.6.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } +tonic = { version = "=0.7.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } diff --git a/common/meta/grpc/Cargo.toml b/common/meta/grpc/Cargo.toml index 2c1d60322e84..29792cd9eebd 100644 --- a/common/meta/grpc/Cargo.toml +++ b/common/meta/grpc/Cargo.toml @@ -27,13 +27,13 @@ common-tracing = { path = "../../tracing" } derive_more = "0.99.17" futures = "0.3.21" once_cell = "1.10.0" -prost = "=0.9.0" +prost = "=0.10.1" rand = "0.8.5" semver = "1.0.9" serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" thiserror = "1.0.30" -tonic = { version = "=0.6.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } +tonic = { version = "=0.7.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } [dev-dependencies] diff --git a/common/meta/types/Cargo.toml b/common/meta/types/Cargo.toml index 76d4c38d12f1..09749cc9a7f1 100644 --- a/common/meta/types/Cargo.toml +++ b/common/meta/types/Cargo.toml @@ -27,17 +27,18 @@ num = "0.4" num-derive = "0.3" num-traits = "0.2" once_cell = "1.10.0" -prost = "=0.9.0" +prost = "=0.10.1" serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" sha1 = "0.10.1" sha2 = "0.10.2" thiserror = "1.0.30" -tonic = { version = "=0.6.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } +tonic = { version = "=0.7.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } [build-dependencies] common-building = { path = "../../building" } -tonic-build = "=0.6.2" +prost-build = "=0.10.1" +tonic-build = "=0.7.2" [dev-dependencies] anyhow = "1.0.56" diff --git a/common/meta/types/build.rs b/common/meta/types/build.rs index 3c66cf11156b..df3625de651a 100644 --- a/common/meta/types/build.rs +++ b/common/meta/types/build.rs @@ -37,6 +37,9 @@ fn build_proto() { println!("cargo:rerun-if-changed={}", proto.to_str().unwrap()); } + let mut config = prost_build::Config::new(); + config.protoc_arg("--experimental_allow_proto3_optional"); + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); tonic_build::configure() .file_descriptor_set_path(out_dir.join("meta_descriptor.bin")) @@ -116,6 +119,6 @@ fn build_proto() { "Event", "#[derive(Eq, serde::Serialize, serde::Deserialize)]", ) - .compile(&protos, &[&proto_dir]) + .compile_with_config(config, &protos, &[&proto_dir]) .unwrap(); } diff --git a/common/protos/Cargo.toml b/common/protos/Cargo.toml index a1dfb47ee5a0..c664d1c9262f 100644 --- a/common/protos/Cargo.toml +++ b/common/protos/Cargo.toml @@ -14,10 +14,10 @@ test = false num-derive = "0.3" num-traits = "0.2" prost = "0.10.1" -tonic = { version = "=0.6.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } +tonic = { version = "=0.7.2", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } [build-dependencies] prost-build = "0.10.1" -tonic-build = "0.7.0" +tonic-build = "0.7.2" [dev-dependencies] diff --git a/common/streams/tests/it/sources/source_parquet.rs b/common/streams/tests/it/sources/source_parquet.rs index 98b0e068713a..5fec6d7dd24e 100644 --- a/common/streams/tests/it/sources/source_parquet.rs +++ b/common/streams/tests/it/sources/source_parquet.rs @@ -53,7 +53,7 @@ async fn test_source_parquet() -> Result<()> { use common_arrow::arrow::chunk::Chunk; let batch = Chunk::try_from(sample_block)?; use common_arrow::parquet::encoding::Encoding; - let encodings = std::iter::repeat(Encoding::Plain) + let encodings = std::iter::repeat(vec![Encoding::Plain]) .take(arrow_schema.fields.len()) .collect::>(); diff --git a/common/tracing/Cargo.toml b/common/tracing/Cargo.toml index 7bef2f5d5fee..8552cfb41506 100644 --- a/common/tracing/Cargo.toml +++ b/common/tracing/Cargo.toml @@ -18,7 +18,7 @@ console-subscriber = { version = "0.1.3", optional = true } once_cell = "1.10.0" opentelemetry = { version = "0.17.0", default-features = false, features = ["trace", "rt-tokio"] } opentelemetry-jaeger = { version = "0.16.0", features = ["rt-tokio"] } -tonic = "=0.6.2" +tonic = "=0.7.2" tracing = "0.1.32" tracing-appender = "0.2.2" tracing-bunyan-formatter = "0.3.2" diff --git a/common/tracing/src/tracing_to_jaeger.rs b/common/tracing/src/tracing_to_jaeger.rs index e2701d842856..2837444f2148 100644 --- a/common/tracing/src/tracing_to_jaeger.rs +++ b/common/tracing/src/tracing_to_jaeger.rs @@ -24,7 +24,7 @@ impl<'a> Injector for MetadataMapInjector<'a> { /// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs fn set(&mut self, key: &str, value: String) { if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) { - if let Ok(val) = tonic::metadata::MetadataValue::from_str(&value) { + if let Ok(val) = tonic::metadata::AsciiMetadataValue::try_from(&value) { self.0.insert(key, val); } } diff --git a/metasrv/Cargo.toml b/metasrv/Cargo.toml index 2fcffd17800e..c3db48df087a 100644 --- a/metasrv/Cargo.toml +++ b/metasrv/Cargo.toml @@ -50,7 +50,7 @@ num = "0.4.0" once_cell = "1.10.0" poem = { version = "=1.3.16", features = ["rustls"] } prometheus = { version = "0.13.0", features = ["process"] } -prost = "=0.9.0" +prost = "=0.10.1" semver = "1.0.9" serde = { version = "1.0.136", features = ["derive"] } serde-bridge = "0.0.3" @@ -58,8 +58,8 @@ serde_json = "1.0.79" serfig = "0.0.2" tempfile = "3.3.0" tokio-stream = "0.1.8" -tonic = { version = "=0.6.2", features = ["tls"] } -tonic-reflection = "=0.3.0" +tonic = { version = "=0.7.2", features = ["tls"] } +tonic-reflection = "=0.4.0" [dev-dependencies] common-meta-api = { path = "../common/meta/api" } diff --git a/query/Cargo.toml b/query/Cargo.toml index 1bd188e338ea..95e83c878a9d 100644 --- a/query/Cargo.toml +++ b/query/Cargo.toml @@ -93,7 +93,7 @@ openssl = { version = "0.10", features = ["vendored"] } paste = "1.0.7" petgraph = "0.6.0" poem = { version = "=1.3.16", features = ["rustls", "multipart", "compression"] } -prost = "=0.9.0" +prost = "=0.10.1" rand = "0.8.5" regex = "1.5.5" reqwest = "0.11.10" @@ -113,7 +113,7 @@ thrift = { version = "0.15", optional = true } time = "0.3.9" tokio-rustls = "0.23.3" tokio-stream = { version = "0.1.8", features = ["net"] } -tonic = "=0.6.2" +tonic = "=0.7.2" twox-hash = "1.6.2" typetag = "0.1.8" uuid = { version = "0.8.2", features = ["serde", "v4"] } diff --git a/query/src/storages/fuse/io/write/block_writer.rs b/query/src/storages/fuse/io/write/block_writer.rs index fdfb93edb4e0..505f5f8e78a9 100644 --- a/query/src/storages/fuse/io/write/block_writer.rs +++ b/query/src/storages/fuse/io/write/block_writer.rs @@ -63,10 +63,13 @@ pub fn serialize_data_blocks( .map(|b| Chunk::try_from(b.clone())) .collect::>>()?; - let encodings: Vec<_> = arrow_schema + let encodings: Vec> = arrow_schema .fields .iter() - .map(|f| col_encoding(&f.data_type)) + .map(|f| match f.data_type() { + ArrowDataType::Dictionary(..) => vec![Encoding::RleDictionary], + _ => vec![col_encoding(f.data_type())], + }) .collect(); let row_groups = RowGroupIterator::try_new( diff --git a/tests/suites/0_stateless/03_dml/03_0004_select_order_by.result b/tests/suites/0_stateless/03_dml/03_0004_select_order_by.result index 606a515803c6..7e6a73dc5e1e 100644 --- a/tests/suites/0_stateless/03_dml/03_0004_select_order_by.result +++ b/tests/suites/0_stateless/03_dml/03_0004_select_order_by.result @@ -34,3 +34,29 @@ Projection: (number % 3) as c1:UInt8, (number % 2) as c2:UInt8 2 NULL 1 NULL 0 NULL +==Variant== +6 true +4 10 +2 "abcd" +1 {"k":"v"} +3 [1,2,3] +5 null +5 null +3 [1,2,3] +1 {"k":"v"} +2 "abcd" +4 10 +6 true +==Array(Int32)== +3 [] +1 [1, 2, 3] +2 [1, 2, 4] +4 [3, 4, 5] +5 [4] +6 [4, 5] +6 [4, 5] +5 [4] +4 [3, 4, 5] +2 [1, 2, 4] +1 [1, 2, 3] +3 [] diff --git a/tests/suites/0_stateless/03_dml/03_0004_select_order_by.sql b/tests/suites/0_stateless/03_dml/03_0004_select_order_by.sql index 7aac588fa65d..a306f0cc7a40 100644 --- a/tests/suites/0_stateless/03_dml/03_0004_select_order_by.sql +++ b/tests/suites/0_stateless/03_dml/03_0004_select_order_by.sql @@ -12,3 +12,34 @@ drop table t1; -- sort with null SELECT number, null from numbers(3) order by number desc; + +SELECT '==Variant=='; +CREATE TABLE IF NOT EXISTS t2(id Int null, var Variant null) Engine = Fuse; + +INSERT INTO t2 VALUES(1, parse_json('{"k":"v"}')), + (2, parse_json('"abcd"')), + (3, parse_json('[1,2,3]')), + (4, parse_json('10')), + (5, parse_json('null')), + (6, parse_json('true')); + +SELECT id, var FROM t2 ORDER BY var ASC; +SELECT id, var FROM t2 ORDER BY var DESC; + +DROP TABLE t2; + +SELECT '==Array(Int32)=='; + +CREATE TABLE IF NOT EXISTS t3(id Int null, arr Array(Int32) null) Engine = Fuse; + +INSERT INTO t3 VALUES(1, [1,2,3]), + (2, [1,2,4]), + (3, []), + (4, [3,4,5]), + (5, [4]), + (6, [4,5]); + +SELECT id, arr FROM t3 ORDER BY arr ASC; +SELECT id, arr FROM t3 ORDER BY arr DESC; + +DROP TABLE t3; diff --git a/tests/suites/0_stateless/03_dml/03_0004_select_order_by_cluster.result b/tests/suites/0_stateless/03_dml/03_0004_select_order_by_cluster.result index efbcef678202..ba5ac547f033 100644 --- a/tests/suites/0_stateless/03_dml/03_0004_select_order_by_cluster.result +++ b/tests/suites/0_stateless/03_dml/03_0004_select_order_by_cluster.result @@ -35,3 +35,29 @@ Projection: (number % 3) as c1:UInt8, (number % 2) as c2:UInt8 2 NULL 1 NULL 0 NULL +==Variant== +6 true +4 10 +2 "abcd" +1 {"k":"v"} +3 [1,2,3] +5 null +5 null +3 [1,2,3] +1 {"k":"v"} +2 "abcd" +4 10 +6 true +==Array(Int32)== +3 [] +1 [1, 2, 3] +2 [1, 2, 4] +4 [3, 4, 5] +5 [4] +6 [4, 5] +6 [4, 5] +5 [4] +4 [3, 4, 5] +2 [1, 2, 4] +1 [1, 2, 3] +3 [] diff --git a/tools/metactl/Cargo.toml b/tools/metactl/Cargo.toml index 9c70032620f5..d9882130d614 100644 --- a/tools/metactl/Cargo.toml +++ b/tools/metactl/Cargo.toml @@ -29,4 +29,4 @@ clap = { version = "3.1.8", features = ["derive", "env"] } serde = { version = "1.0.136", features = ["derive"] } serde_json = "1.0.79" tokio-stream = "0.1.8" -tonic = "=0.6.2" +tonic = "=0.7.2"