Skip to content

Commit

Permalink
[Fix](statistics)Fix analyze sql including key word bug (apache#27321)
Browse files Browse the repository at this point in the history
Fix analyze sql including key word bug. Need use `` to wrap column names.
  • Loading branch information
Jibing-Li authored and 胥剑旭 committed Dec 14, 2023
1 parent f08e269 commit f3c3d9d
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ public abstract class BaseAnalysisTask {
+ " NULL AS `part_id`, "
+ " COUNT(1) AS `row_count`, "
+ " NDV(`${colName}`) AS `ndv`, "
+ " COUNT(1) - COUNT(${colName}) AS `null_count`, "
+ " CAST(MIN(${colName}) AS STRING) AS `min`, "
+ " CAST(MAX(${colName}) AS STRING) AS `max`, "
+ " COUNT(1) - COUNT(`${colName}`) AS `null_count`, "
+ " CAST(MIN(`${colName}`) AS STRING) AS `min`, "
+ " CAST(MAX(`${colName}`) AS STRING) AS `max`, "
+ " ${dataSizeFunction} AS `data_size`, "
+ " NOW() AS `update_time` "
+ " FROM `${catalogName}`.`${dbName}`.`${tblName}`";
Expand Down Expand Up @@ -91,13 +91,13 @@ public abstract class BaseAnalysisTask {
+ "NULL AS `part_id`, "
+ "${rowCount} AS `row_count`, "
+ "${ndvFunction} as `ndv`, "
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * ${scaleFactor} as `null_count`, "
+ "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, "
+ "'${min}' AS `min`, "
+ "'${max}' AS `max`, "
+ "${dataSizeFunction} * ${scaleFactor} AS `data_size`, "
+ "NOW() "
+ "FROM ( "
+ " SELECT t0.`${colName}` as column_key, COUNT(1) as `count` "
+ " SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` "
+ " FROM "
+ " (SELECT `${colName}` FROM `${catalogName}`.`${dbName}`.`${tblName}` "
+ " ${sampleHints} ${limit}) as `t0` "
Expand Down Expand Up @@ -260,8 +260,8 @@ protected String getMinFunction() {
}

protected String getNdvFunction(String totalRows) {
String sampleRows = "SUM(t1.count)";
String onceCount = "SUM(IF(t1.count = 1, 1, 0))";
String sampleRows = "SUM(`t1`.`count`)";
String onceCount = "SUM(IF(`t1`.`count` = 1, 1, 0))";
String countDistinct = "COUNT(1)";
// DUJ1 estimator: n*d / (n - f1 + f1*n/N)
// f1 is the count of element that appears only once in the sample.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ public void testGetFunctions() {
Assertions.assertEquals("NULL", maxFunction);

String ndvFunction = olapAnalysisTask.getNdvFunction(String.valueOf(100));
Assertions.assertEquals("SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) "
+ "+ SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 100)", ndvFunction);
Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) "
+ "+ SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 100)", ndvFunction);
System.out.println(ndvFunction);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) {
@Mock
public void runQuery(String sql, boolean needEncode) {
Assertions.assertFalse(needEncode);
Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) + SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as column_key, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql);
Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql);
return;
}
};
Expand Down Expand Up @@ -292,7 +292,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) {
@Mock
public void runQuery(String sql, boolean needEncode) {
Assertions.assertFalse(needEncode);
Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(t1.count) * COUNT(1) / (SUM(t1.count) - SUM(IF(t1.count = 1, 1, 0)) + SUM(IF(t1.count = 1, 1, 0)) * SUM(t1.count) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.count, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as column_key, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql);
Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql);
return;
}
};
Expand Down

0 comments on commit f3c3d9d

Please sign in to comment.