From f3c3d9dcc74a86de88f3a45d38b7c6d77d2c5df8 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 21 Nov 2023 13:15:37 +0800 Subject: [PATCH] [Fix](statistics)Fix analyze sql including key word bug (#27321) Fix analyze sql including key word bug. Need use `` to wrap column names. --- .../apache/doris/statistics/BaseAnalysisTask.java | 14 +++++++------- .../doris/statistics/BaseAnalysisTaskTest.java | 4 ++-- .../doris/statistics/OlapAnalysisTaskTest.java | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index f3fa143b528622..824e3f74abd84a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -57,9 +57,9 @@ public abstract class BaseAnalysisTask { + " NULL AS `part_id`, " + " COUNT(1) AS `row_count`, " + " NDV(`${colName}`) AS `ndv`, " - + " COUNT(1) - COUNT(${colName}) AS `null_count`, " - + " CAST(MIN(${colName}) AS STRING) AS `min`, " - + " CAST(MAX(${colName}) AS STRING) AS `max`, " + + " COUNT(1) - COUNT(`${colName}`) AS `null_count`, " + + " CAST(MIN(`${colName}`) AS STRING) AS `min`, " + + " CAST(MAX(`${colName}`) AS STRING) AS `max`, " + " ${dataSizeFunction} AS `data_size`, " + " NOW() AS `update_time` " + " FROM `${catalogName}`.`${dbName}`.`${tblName}`"; @@ -91,13 +91,13 @@ public abstract class BaseAnalysisTask { + "NULL AS `part_id`, " + "${rowCount} AS `row_count`, " + "${ndvFunction} as `ndv`, " - + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * ${scaleFactor} as `null_count`, " + + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, " + "'${min}' AS `min`, " + "'${max}' AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() " + "FROM ( " - + " SELECT t0.`${colName}` as column_key, COUNT(1) as `count` " + + " SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` " + " FROM " + " (SELECT `${colName}` FROM `${catalogName}`.`${dbName}`.`${tblName}` " + " ${sampleHints} ${limit}) as `t0` " @@ -260,8 +260,8 @@ protected String getMinFunction() { } protected String getNdvFunction(String totalRows) { - String sampleRows = "SUM(t1.count)"; - String onceCount = "SUM(IF(t1.count = 1, 1, 0))"; + String sampleRows = "SUM(`t1`.`count`)"; + String onceCount = "SUM(IF(`t1`.`count` = 1, 1, 0))"; String countDistinct = "COUNT(1)"; // DUJ1 estimator: n*d / (n - f1 + f1*n/N) // f1 is the count of element that appears only once in the sample. diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java index e3d080fea0aea2..fe81c055e0dfbf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java @@ -55,8 +55,8 @@ public void testGetFunctions() { Assertions.assertEquals("NULL", maxFunction); String ndvFunction = olapAnalysisTask.getNdvFunction(String.valueOf(100)); - Assertions.assertEquals("SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) " - + "+ SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 100)", ndvFunction); + Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) " + + "+ SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 100)", ndvFunction); System.out.println(ndvFunction); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 8e30519e8c4fff..c174795b36bf20 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -151,7 +151,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql, boolean needEncode) { Assertions.assertFalse(needEncode); - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) + SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as column_key, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; @@ -292,7 +292,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql, boolean needEncode) { Assertions.assertFalse(needEncode); - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) + SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as column_key, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } };