Skip to content

Commit

Permalink
[Fix](inverted index) fix wrong opt for count_on_index (apache#41127)
Browse files Browse the repository at this point in the history
## Proposed changes

The pushdownCountOnIndex optimization rule in AggregateStrategies was
incorrectly applied to COUNT functions with complex child expressions,
such as COUNT(CASE WHEN ...). This led to incorrect query results.
  • Loading branch information
airborne12 committed Sep 23, 2024
1 parent e1057ac commit 4ccb606
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ public List<Rule> buildRules() {
.when(agg -> {
Set<AggregateFunction> funcs = agg.getAggregateFunctions();
return !funcs.isEmpty() && funcs.stream()
.allMatch(f -> f instanceof Count && !f.isDistinct());
.allMatch(f -> f instanceof Count && !f.isDistinct() && (((Count) f).isCountStar()
|| f.child(0) instanceof Slot));
})
.thenApply(ctx -> {
LogicalAggregate<LogicalFilter<LogicalOlapScan>> agg = ctx.root;
Expand All @@ -136,7 +137,8 @@ public List<Rule> buildRules() {
.when(agg -> agg.getGroupByExpressions().isEmpty())
.when(agg -> {
Set<AggregateFunction> funcs = agg.getAggregateFunctions();
return !funcs.isEmpty() && funcs.stream().allMatch(f -> f instanceof Count && !f.isDistinct());
return !funcs.isEmpty() && funcs.stream().allMatch(f -> f instanceof Count && !f.isDistinct()
&& (((Count) f).isCountStar() || f.child(0) instanceof Slot));
})
.thenApply(ctx -> {
LogicalAggregate<LogicalProject<LogicalFilter<LogicalOlapScan>>> agg = ctx.root;
Expand Down
100 changes: 100 additions & 0 deletions regression-test/data/inverted_index_p0/count-on-index.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{"a": "2024-03-25", "b": "ISZHfv2OQ4", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2023-02-20", "b": "0MgsB3vcIf", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-11-13", "b": "tczLqYSRhY", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-13", "b": "f86oV0P4s8", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-07-13", "b": "YHD5LihHpK", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-07-26", "b": "iSJxD3yKvH", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-12-25", "b": "aNiLbwF1vg", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-04-04", "b": "DGih88jW0H", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2024-03-30", "b": "kxqqsrD1RH", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-10-01", "b": "eySjxEnJvW", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-09-19", "b": "lHAJZpcky5", "c": "c2", "d": "d2", "e": "e1"}
{"a": "2023-11-25", "b": "PB4l4NsNAn", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2023-05-10", "b": "TP5M3xQDCj", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-28", "b": "b1D8xLX4DN", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-05-31", "b": "9gTsOoFITb", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-01-14", "b": "rsv96RalgR", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-08-31", "b": "Uz67DnurlH", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-04-05", "b": "kPlM5F56kj", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-09-18", "b": "wSGVCB6s3I", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-01-28", "b": "2ZpPp1y5G5", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-03-27", "b": "VwdMxBUnrc", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-03-17", "b": "QPV71OyuZ2", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-10", "b": "pyhphs1Mj4", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-01-26", "b": "wxRO18q0EY", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-11-04", "b": "vfVK2TsjTl", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-04-27", "b": "eS6vCuQAXP", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-02-25", "b": "6dx8DMPK9f", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2023-04-16", "b": "3aJhtwXa7E", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-07-08", "b": "Ue9xroXnHI", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-08-19", "b": "gVPYCdN2eY", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-02-24", "b": "qAaaKQpvd3", "c": "c2", "d": "d2", "e": "e1"}
{"a": "2024-01-10", "b": "XsNcGPnvvC", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-03-04", "b": "BD7CEdzIhP", "c": "c2", "d": "d1", "e": "e1"}
{"a": "2023-03-18", "b": "sXESaouuHE", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2023-06-28", "b": "DD3RE2pufi", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-04-07", "b": "RdEFKIz8QW", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2024-05-10", "b": "u3OhzAL2LH", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-07-20", "b": "U0n5EVKjPm", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-08-07", "b": "TXypE2ItVh", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-08-08", "b": "8g3hPyCB2B", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-01-05", "b": "aT6WlbObnZ", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-06-05", "b": "mVqMi8Rzfi", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-02-12", "b": "HxpmQ0draG", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-05-20", "b": "R5a7gA61KY", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-04-19", "b": "QacPa5V0Fj", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-07-29", "b": "dwT8GxkWDA", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-06-29", "b": "UCRkZWVEhK", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-02-22", "b": "yMSAdFkaq9", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2024-04-18", "b": "6Aa4VUyj7b", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-09-20", "b": "xet5tOBGLy", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-07-09", "b": "kyE5wM71uC", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2024-03-05", "b": "J9UtyRClVj", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2024-08-02", "b": "BnNajVStTq", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-03-20", "b": "YFyjBh6JeE", "c": "c1", "d": "d3", "e": "e1"}
{"a": "2024-01-25", "b": "kF462Dpave", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2023-11-20", "b": "uUsipxur13", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2024-08-06", "b": "g4i1sEGImS", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-02-09", "b": "NUz4tjvKt5", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2024-04-20", "b": "p72Gn18tWd", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2024-01-22", "b": "3DriXIqvSg", "c": "c2", "d": "d3", "e": "e2"}
{"a": "2024-07-26", "b": "rorCsbghiO", "c": "c1", "d": "d1", "e": "e2"}
{"a": "2023-10-26", "b": "XAWPiEQVmE", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2023-10-30", "b": "L3FWcbrzen", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-06-25", "b": "Lj6SZ26GJN", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-07-20", "b": "U6nYzFhfwM", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-06-26", "b": "J7jWtTmtZT", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-05-27", "b": "hDWYIRDHV4", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2023-05-23", "b": "kvjedf4zF8", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-02-06", "b": "RsPN2cGb2L", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-06-04", "b": "belvas0y6p", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2024-06-17", "b": "J6vYAcFuGZ", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-04-18", "b": "qHuHh0Y29i", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2023-08-20", "b": "1GS5UtXMdz", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-01-23", "b": "gnjJ4TZ6A6", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2023-02-10", "b": "LX6ddQvIX2", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-03-02", "b": "MkImkgiAfm", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-11-30", "b": "tGmu0DD8W0", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-02-01", "b": "NgzjCOPAku", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2023-01-03", "b": "5mWMiuYwbi", "c": "c3", "d": "d1", "e": "e2"}
{"a": "2023-02-12", "b": "9324TZqLjh", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-06-24", "b": "7fAYU4XSma", "c": "c1", "d": "d3", "e": "e1"}
{"a": "2024-08-28", "b": "iNvBMy8AB8", "c": "c1", "d": "d1", "e": "e1"}
{"a": "2024-06-08", "b": "nwJjFx21my", "c": "c3", "d": "d1", "e": "e1"}
{"a": "2023-03-15", "b": "Oonv8eGNIF", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2024-04-22", "b": "6UYDEqQxxf", "c": "c2", "d": "d2", "e": "e2"}
{"a": "2024-05-08", "b": "aVPqpTufJf", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-02", "b": "z1baLbjnTj", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-07-05", "b": "vMLMALySMJ", "c": "c3", "d": "d3", "e": "e1"}
{"a": "2023-08-30", "b": "s7VZKlOG27", "c": "c3", "d": "d3", "e": "e2"}
{"a": "2023-03-06", "b": "gWAuqLvHpJ", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2024-02-24", "b": "2IQL6pazn3", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-15", "b": "4YbxyhwaRF", "c": "c3", "d": "d2", "e": "e1"}
{"a": "2023-02-28", "b": "wAOOLl8Kqj", "c": "c1", "d": "d2", "e": "e2"}
{"a": "2024-05-31", "b": "FTQKB8rURb", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2023-02-20", "b": "knNNhnMXLN", "c": "c1", "d": "d3", "e": "e2"}
{"a": "2024-07-01", "b": "cxA5xpl6NM", "c": "c2", "d": "d1", "e": "e2"}
{"a": "2023-05-10", "b": "5FnmdQtOA0", "c": "c1", "d": "d2", "e": "e1"}
{"a": "2024-08-21", "b": "ldggIaWfYF", "c": "c3", "d": "d2", "e": "e2"}
{"a": "2024-01-05", "b": "7lwOfCQs5o", "c": "c2", "d": "d3", "e": "e1"}
{"a": "2024-08-21", "b": "2zvnCMAkZG", "c": "c1", "d": "d1", "e": "e2"}
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,6 @@
-- !sql --
0

-- !sql_bad --
0 1

Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ suite("test_count_on_index_httplogs", "p0") {
"""
}

def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
def stream_load_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false,
expected_succ_rows = -1, load_to_single_tablet = 'true' ->

// load the json data
Expand Down Expand Up @@ -137,8 +137,8 @@ suite("test_count_on_index_httplogs", "p0") {
create_httplogs_dup_table.call(testTable_dup)
create_httplogs_unique_table.call(testTable_unique)

load_httplogs_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
load_httplogs_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
stream_load_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
stream_load_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')

sql "sync"
sql """set experimental_enable_nereids_planner=true;"""
Expand Down Expand Up @@ -272,6 +272,41 @@ suite("test_count_on_index_httplogs", "p0") {
// case4: test compound query when inverted_index_query disable
qt_sql "SELECT COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)"
qt_sql "SELECT /*+SET_VAR(enable_inverted_index_query=false) */ COUNT() from ${testTable_dup} where request = 'images' or (size = 0 and status > 400)"

// case5: test complex count to testify bad case
def tableName5 = 'test_count_on_index_bad_case'
sql "DROP TABLE IF EXISTS ${tableName5}"
sql """
CREATE TABLE `${tableName5}` (
`a` DATE NOT NULL COMMENT '',
`b` VARCHAR(4096) NULL COMMENT '',
`c` VARCHAR(4096) NULL COMMENT '',
`d` VARCHAR(4096) NULL COMMENT '',
`e` VARCHAR(4096) NULL COMMENT '',
INDEX idx_a(`a`) USING INVERTED COMMENT '',
INDEX idx_e(`e`) USING INVERTED COMMENT ''
) ENGINE=OLAP
UNIQUE KEY(`a`, `b`)
COMMENT ''
DISTRIBUTED BY HASH(`a`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
stream_load_data.call(tableName5, 'test_count_on_index_bad_case', 'true', 'json', 'count-on-index.json')
def bad_sql = """
SELECT
COUNT(CASE WHEN c IN ('c1', 'c2', 'c3') AND d = 'd1' THEN b END) AS num1,
COUNT(CASE WHEN e = 'e1' AND c IN ('c1', 'c2', 'c3') THEN b END) AS num2
FROM ${tableName5}
WHERE a = '2024-07-26'
AND e = 'e1';
"""
explain {
sql("${bad_sql}")
contains "pushAggOp=NONE"
}
qt_sql_bad "${bad_sql}"
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}
Expand Down

0 comments on commit 4ccb606

Please sign in to comment.