From 541880788f0a0b39f3fab97235b90d7bee7776d3 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Mon, 23 Sep 2024 20:56:40 +0800 Subject: [PATCH] [Fix](inverted index) fix wrong opt for count_on_index #41127 (#41153) cherry pick from #41127 --- .../implementation/AggregateStrategies.java | 11 +- .../inverted_index_p0/count-on-index.json | 100 ++++++++++++++++++ .../inverted_index_p0/test_count_on_index.out | 3 + .../test_count_on_index.groovy | 41 ++++++- 4 files changed, 150 insertions(+), 5 deletions(-) create mode 100644 regression-test/data/inverted_index_p0/count-on-index.json diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index ad1ad4f52676a2..7bbbc7841e8235 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -114,7 +114,10 @@ public List buildRules() { .when(agg -> { Set funcs = agg.getAggregateFunctions(); return !funcs.isEmpty() && funcs.stream() - .allMatch(f -> f instanceof Count && !f.isDistinct()); + .allMatch(f -> f instanceof Count && !f.isDistinct() && (((Count) f).isStar() + || f.children().isEmpty() + || (f.children().size() == 1 && f.child(0) instanceof Literal) + || f.child(0) instanceof Slot)); }) .thenApply(ctx -> { LogicalAggregate> agg = ctx.root; @@ -133,7 +136,11 @@ public List buildRules() { .when(agg -> agg.getGroupByExpressions().isEmpty()) .when(agg -> { Set funcs = agg.getAggregateFunctions(); - return !funcs.isEmpty() && funcs.stream().allMatch(f -> f instanceof Count && !f.isDistinct()); + return !funcs.isEmpty() && funcs.stream() + .allMatch(f -> f instanceof Count && !f.isDistinct() && (((Count) f).isStar() + || f.children().isEmpty() + || (f.children().size() == 1 && f.child(0) instanceof Literal) + || f.child(0) instanceof Slot)); }) .thenApply(ctx -> { LogicalAggregate>> agg = ctx.root; diff --git a/regression-test/data/inverted_index_p0/count-on-index.json b/regression-test/data/inverted_index_p0/count-on-index.json new file mode 100644 index 00000000000000..6ef96d081c657a --- /dev/null +++ b/regression-test/data/inverted_index_p0/count-on-index.json @@ -0,0 +1,100 @@ +{"a": "2024-03-25", "b": "ISZHfv2OQ4", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2023-02-20", "b": "0MgsB3vcIf", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-11-13", "b": "tczLqYSRhY", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2023-08-13", "b": "f86oV0P4s8", "c": "c2", "d": "d2", "e": "e2"} +{"a": "2023-07-13", "b": "YHD5LihHpK", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-07-26", "b": "iSJxD3yKvH", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2023-12-25", "b": "aNiLbwF1vg", "c": "c1", "d": "d2", "e": "e2"} +{"a": "2024-04-04", "b": "DGih88jW0H", "c": "c3", "d": "d3", "e": "e2"} +{"a": "2024-03-30", "b": "kxqqsrD1RH", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2023-10-01", "b": "eySjxEnJvW", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2023-09-19", "b": "lHAJZpcky5", "c": "c2", "d": "d2", "e": "e1"} +{"a": "2023-11-25", "b": "PB4l4NsNAn", "c": "c1", "d": "d3", "e": "e2"} +{"a": "2023-05-10", "b": "TP5M3xQDCj", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2024-08-28", "b": "b1D8xLX4DN", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2023-05-31", "b": "9gTsOoFITb", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2023-01-14", "b": "rsv96RalgR", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-08-31", "b": "Uz67DnurlH", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-04-05", "b": "kPlM5F56kj", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2023-09-18", "b": "wSGVCB6s3I", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-01-28", "b": "2ZpPp1y5G5", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-03-27", "b": "VwdMxBUnrc", "c": "c3", "d": "d1", "e": "e1"} +{"a": "2024-03-17", "b": "QPV71OyuZ2", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2023-08-10", "b": "pyhphs1Mj4", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2024-01-26", "b": "wxRO18q0EY", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2023-11-04", "b": "vfVK2TsjTl", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2023-04-27", "b": "eS6vCuQAXP", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2024-02-25", "b": "6dx8DMPK9f", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2023-04-16", "b": "3aJhtwXa7E", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-07-08", "b": "Ue9xroXnHI", "c": "c3", "d": "d3", "e": "e2"} +{"a": "2023-08-19", "b": "gVPYCdN2eY", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2024-02-24", "b": "qAaaKQpvd3", "c": "c2", "d": "d2", "e": "e1"} +{"a": "2024-01-10", "b": "XsNcGPnvvC", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2023-03-04", "b": "BD7CEdzIhP", "c": "c2", "d": "d1", "e": "e1"} +{"a": "2023-03-18", "b": "sXESaouuHE", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2023-06-28", "b": "DD3RE2pufi", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2024-04-07", "b": "RdEFKIz8QW", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2024-05-10", "b": "u3OhzAL2LH", "c": "c3", "d": "d1", "e": "e1"} +{"a": "2024-07-20", "b": "U0n5EVKjPm", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2024-08-07", "b": "TXypE2ItVh", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2024-08-08", "b": "8g3hPyCB2B", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2023-01-05", "b": "aT6WlbObnZ", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-06-05", "b": "mVqMi8Rzfi", "c": "c1", "d": "d1", "e": "e2"} +{"a": "2024-02-12", "b": "HxpmQ0draG", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2024-05-20", "b": "R5a7gA61KY", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2023-04-19", "b": "QacPa5V0Fj", "c": "c3", "d": "d3", "e": "e1"} +{"a": "2023-07-29", "b": "dwT8GxkWDA", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2024-06-29", "b": "UCRkZWVEhK", "c": "c3", "d": "d1", "e": "e2"} +{"a": "2023-02-22", "b": "yMSAdFkaq9", "c": "c3", "d": "d1", "e": "e1"} +{"a": "2024-04-18", "b": "6Aa4VUyj7b", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2023-09-20", "b": "xet5tOBGLy", "c": "c1", "d": "d1", "e": "e2"} +{"a": "2024-07-09", "b": "kyE5wM71uC", "c": "c1", "d": "d1", "e": "e2"} +{"a": "2024-03-05", "b": "J9UtyRClVj", "c": "c2", "d": "d2", "e": "e2"} +{"a": "2024-08-02", "b": "BnNajVStTq", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-03-20", "b": "YFyjBh6JeE", "c": "c1", "d": "d3", "e": "e1"} +{"a": "2024-01-25", "b": "kF462Dpave", "c": "c1", "d": "d1", "e": "e2"} +{"a": "2023-11-20", "b": "uUsipxur13", "c": "c1", "d": "d3", "e": "e2"} +{"a": "2024-08-06", "b": "g4i1sEGImS", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-02-09", "b": "NUz4tjvKt5", "c": "c3", "d": "d1", "e": "e2"} +{"a": "2024-04-20", "b": "p72Gn18tWd", "c": "c3", "d": "d3", "e": "e2"} +{"a": "2024-01-22", "b": "3DriXIqvSg", "c": "c2", "d": "d3", "e": "e2"} +{"a": "2024-07-26", "b": "rorCsbghiO", "c": "c1", "d": "d1", "e": "e2"} +{"a": "2023-10-26", "b": "XAWPiEQVmE", "c": "c3", "d": "d1", "e": "e1"} +{"a": "2023-10-30", "b": "L3FWcbrzen", "c": "c1", "d": "d2", "e": "e2"} +{"a": "2024-06-25", "b": "Lj6SZ26GJN", "c": "c3", "d": "d3", "e": "e1"} +{"a": "2023-07-20", "b": "U6nYzFhfwM", "c": "c2", "d": "d2", "e": "e2"} +{"a": "2023-06-26", "b": "J7jWtTmtZT", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-05-27", "b": "hDWYIRDHV4", "c": "c2", "d": "d2", "e": "e2"} +{"a": "2023-05-23", "b": "kvjedf4zF8", "c": "c3", "d": "d3", "e": "e1"} +{"a": "2023-02-06", "b": "RsPN2cGb2L", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-06-04", "b": "belvas0y6p", "c": "c3", "d": "d1", "e": "e2"} +{"a": "2024-06-17", "b": "J6vYAcFuGZ", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-04-18", "b": "qHuHh0Y29i", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2023-08-20", "b": "1GS5UtXMdz", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-01-23", "b": "gnjJ4TZ6A6", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2023-02-10", "b": "LX6ddQvIX2", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-03-02", "b": "MkImkgiAfm", "c": "c3", "d": "d1", "e": "e2"} +{"a": "2023-11-30", "b": "tGmu0DD8W0", "c": "c3", "d": "d3", "e": "e1"} +{"a": "2023-02-01", "b": "NgzjCOPAku", "c": "c1", "d": "d3", "e": "e2"} +{"a": "2023-01-03", "b": "5mWMiuYwbi", "c": "c3", "d": "d1", "e": "e2"} +{"a": "2023-02-12", "b": "9324TZqLjh", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-06-24", "b": "7fAYU4XSma", "c": "c1", "d": "d3", "e": "e1"} +{"a": "2024-08-28", "b": "iNvBMy8AB8", "c": "c1", "d": "d1", "e": "e1"} +{"a": "2024-06-08", "b": "nwJjFx21my", "c": "c3", "d": "d1", "e": "e1"} +{"a": "2023-03-15", "b": "Oonv8eGNIF", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2024-04-22", "b": "6UYDEqQxxf", "c": "c2", "d": "d2", "e": "e2"} +{"a": "2024-05-08", "b": "aVPqpTufJf", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2024-08-02", "b": "z1baLbjnTj", "c": "c3", "d": "d3", "e": "e2"} +{"a": "2023-07-05", "b": "vMLMALySMJ", "c": "c3", "d": "d3", "e": "e1"} +{"a": "2023-08-30", "b": "s7VZKlOG27", "c": "c3", "d": "d3", "e": "e2"} +{"a": "2023-03-06", "b": "gWAuqLvHpJ", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2024-02-24", "b": "2IQL6pazn3", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2024-08-15", "b": "4YbxyhwaRF", "c": "c3", "d": "d2", "e": "e1"} +{"a": "2023-02-28", "b": "wAOOLl8Kqj", "c": "c1", "d": "d2", "e": "e2"} +{"a": "2024-05-31", "b": "FTQKB8rURb", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2023-02-20", "b": "knNNhnMXLN", "c": "c1", "d": "d3", "e": "e2"} +{"a": "2024-07-01", "b": "cxA5xpl6NM", "c": "c2", "d": "d1", "e": "e2"} +{"a": "2023-05-10", "b": "5FnmdQtOA0", "c": "c1", "d": "d2", "e": "e1"} +{"a": "2024-08-21", "b": "ldggIaWfYF", "c": "c3", "d": "d2", "e": "e2"} +{"a": "2024-01-05", "b": "7lwOfCQs5o", "c": "c2", "d": "d3", "e": "e1"} +{"a": "2024-08-21", "b": "2zvnCMAkZG", "c": "c1", "d": "d1", "e": "e2"} diff --git a/regression-test/data/inverted_index_p0/test_count_on_index.out b/regression-test/data/inverted_index_p0/test_count_on_index.out index 59910b7fb5d6a8..3c0f47e7f8baf9 100644 --- a/regression-test/data/inverted_index_p0/test_count_on_index.out +++ b/regression-test/data/inverted_index_p0/test_count_on_index.out @@ -74,3 +74,6 @@ -- !sql -- 0 +-- !sql_bad -- +0 1 + diff --git a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy index 8fb98221ef542d..490e998ed58cef 100644 --- a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy @@ -93,7 +93,7 @@ suite("test_count_on_index_httplogs", "p0") { """ } - def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + def stream_load_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, expected_succ_rows = -1, load_to_single_tablet = 'true' -> // load the json data @@ -137,8 +137,8 @@ suite("test_count_on_index_httplogs", "p0") { create_httplogs_dup_table.call(testTable_dup) create_httplogs_unique_table.call(testTable_unique) - load_httplogs_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json') - load_httplogs_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json') + stream_load_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json') + stream_load_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json') sql "sync" sql """set experimental_enable_nereids_planner=true;""" @@ -272,6 +272,41 @@ suite("test_count_on_index_httplogs", "p0") { // case4: test compound query when inverted_index_query disable qt_sql "SELECT COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)" qt_sql "SELECT /*+SET_VAR(enable_inverted_index_query=false) */ COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)" + + // case5: test complex count to testify bad case + def tableName5 = 'test_count_on_index_bad_case' + sql "DROP TABLE IF EXISTS ${tableName5}" + sql """ + CREATE TABLE `${tableName5}` ( + `a` DATE NOT NULL COMMENT '', + `b` VARCHAR(4096) NULL COMMENT '', + `c` VARCHAR(4096) NULL COMMENT '', + `d` VARCHAR(4096) NULL COMMENT '', + `e` VARCHAR(4096) NULL COMMENT '', + INDEX idx_a(`a`) USING INVERTED COMMENT '', + INDEX idx_e(`e`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + UNIQUE KEY(`a`, `b`) + COMMENT '' + DISTRIBUTED BY HASH(`a`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + stream_load_data.call(tableName5, 'test_count_on_index_bad_case', 'true', 'json', 'count-on-index.json') + def bad_sql = """ + SELECT + COUNT(CASE WHEN c IN ('c1', 'c2', 'c3') AND d = 'd1' THEN b END) AS num1, + COUNT(CASE WHEN e = 'e1' AND c IN ('c1', 'c2', 'c3') THEN b END) AS num2 + FROM ${tableName5} + WHERE a = '2024-07-26' + AND e = 'e1'; + """ + explain { + sql("${bad_sql}") + contains "pushAggOp=NONE" + } + qt_sql_bad "${bad_sql}" } finally { //try_sql("DROP TABLE IF EXISTS ${testTable}") }