Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Generate relevant error message when grouping functions are not used in GROUP BY #38017

Merged
merged 10 commits into from
Feb 2, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ Collection<Failure> verify(LogicalPlan plan) {
validateInExpression(p, localFailures);
validateConditional(p, localFailures);

checkHistogramInGrouping(p, localFailures);
checkFilterOnAggs(p, localFailures);
checkFilterOnGrouping(p, localFailures);

Expand Down Expand Up @@ -560,6 +561,37 @@ private static boolean checkGroupMatch(Expression e, Node<?> source, List<Expres
}
return false;
}

private static void checkHistogramInGrouping(LogicalPlan p, Set<Failure> localFailures) {
// check if the query has a grouping function (Histogram) but no GROUP BY
if (p instanceof Project) {
Project proj = (Project) p;

proj.projections().forEach(e -> {
if (Functions.isGrouping(e) == false) {
e.collectFirstChildren(c -> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collectFirstChildren uses a List to add the matches which we don't need in this impl.
Why don't you use forEachDown?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the approach and pushed a new commit.

if (Functions.isGrouping(c)) {
localFailures.add(fail(c, "[%s] needs to be part of the grouping", Expressions.name(c)));
return true;
}
return false;
});
} else {
localFailures.add(fail(e, "[%s] needs to be part of the grouping", Expressions.name(e)));
}
});
} else if (p instanceof Aggregate) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this block is a bit condensed and could use some whitespace and indentation to make it more readable.
For example put localFailures on a new line.

// if it does have a GROUP BY, check if the groupings contain the grouping functions (Histograms)
Aggregate a = (Aggregate) p;

a.aggregates().forEach(as -> {
Expression exp = as instanceof Alias ? ((Alias) as).child() : as;
if (Functions.isGrouping(exp) && false == Expressions.anyMatch(a.groupings(), g -> exp.semanticEquals(g))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if the histogram is wrapped with a Scalar function? is it caught somewhere else?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was already handled. See the test here.

localFailures.add(fail(exp, "[%s] needs to be part of the grouping", Expressions.name(exp)));
}
});
}
}

private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures) {
if (p instanceof Filter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,41 @@ public void testAggsInHistogram() {
assertEquals("1:47: Cannot use an aggregate [MAX] for grouping",
error("SELECT MAX(date) FROM test GROUP BY HISTOGRAM(MAX(int), 1)"));
}

public void testHistogramNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramNotInGroupingWithCount() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, COUNT(*) FROM test"));
}

public void testHistogramNotInGroupingWithMaxFirst() {
assertEquals("1:19: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MAX(date), HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramWithoutAliasNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test"));
}

public void testTwoHistogramsNotInGrouping() {
assertEquals("1:48: [HISTOGRAM(date, INTERVAL 1 DAY)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, HISTOGRAM(date, INTERVAL 1 DAY) FROM test GROUP BY h"));
}

public void testHistogramNotInGrouping_WithGroupByField() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test GROUP BY date"));
}

public void testScalarOfHistogramNotInGrouping() {
assertEquals("1:14: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MONTH(HISTOGRAM(date, INTERVAL 1 MONTH)) FROM test"));
}

public void testErrorMessageForPercentileWithSecondArgBasedOnAField() {
assertEquals("1:8: Second argument of PERCENTILE must be a constant, received [ABS(int)]",
Expand Down