Skip to content

Commit

Permalink
sql: implement HAVING clause
Browse files Browse the repository at this point in the history
Closes src-d#56

This PR adds support for the HAVING SQL clause, that allows
filtering rows and has support for aggregation functions.

For this, the following changes have been made:
- New `Having` node, which is essentially a Filter, but it is
  a different node for the purpose of differentiating between the
  two of them during the analysis phase.
- Having is now parsed.
- A new rule for resolving the Having node has been added. Because
  of the way aggregations are executed (only inside a GroupBy node)
  it is not possible to execute them in any other node that's not a
  GroupBy. For this reason, this rule pushes down any aggregation
  on a Having node to its child, which is the GroupBy node. If
  the same aggregation is already done on the GroupBy, nothing will
  be added and the aggregation on the Having node will be replaced
  with a reference to the result of the aggregation in the GroupBy.
  Because pushing new aggregations to the GroupBy changes the
  resulting schema, a Project node is added wrapping the Having
  node projecting only the columns the GroupBy was initially
  projecting.

Signed-off-by: Miguel Molina <miguel@erizocosmi.co>
  • Loading branch information
erizocosmico committed Apr 24, 2019
1 parent c4a4af4 commit c0e7c10
Show file tree
Hide file tree
Showing 9 changed files with 539 additions and 15 deletions.
65 changes: 57 additions & 8 deletions engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,13 +508,15 @@ var queries = []struct {
{"mytable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"othertable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"tabletest", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"bigtable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
},
},
{
`SHOW TABLE STATUS LIKE '%table'`,
[]sql.Row{
{"mytable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"othertable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"bigtable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
},
},
{
Expand Down Expand Up @@ -636,6 +638,7 @@ var queries = []struct {
{"mytable"},
{"othertable"},
{"tabletest"},
{"bigtable"},
},
},
{
Expand All @@ -659,6 +662,8 @@ var queries = []struct {
{"i"},
{"s2"},
{"i2"},
{"t"},
{"n"},
},
},
{
Expand All @@ -672,6 +677,8 @@ var queries = []struct {
{"i"},
{"s2"},
{"i2"},
{"t"},
{"n"},
},
},
{
Expand All @@ -685,6 +692,8 @@ var queries = []struct {
{"i"},
{"s2"},
{"i2"},
{"t"},
{"n"},
},
},
{
Expand Down Expand Up @@ -718,6 +727,7 @@ var queries = []struct {
{"mytable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"othertable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"tabletest", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
{"bigtable", "InnoDB", "10", "Fixed", int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), int64(0), nil, nil, nil, "utf8_bin", nil, nil},
},
},
{
Expand Down Expand Up @@ -828,6 +838,7 @@ var queries = []struct {
{"mytable"},
{"othertable"},
{"tabletest"},
{"bigtable"},
},
},
{
Expand All @@ -836,14 +847,7 @@ var queries = []struct {
{"mytable", "BASE TABLE"},
{"othertable", "BASE TABLE"},
{"tabletest", "BASE TABLE"},
},
},
{
"SHOW FULL TABLES",
[]sql.Row{
{"mytable", "BASE TABLE"},
{"othertable", "BASE TABLE"},
{"tabletest", "BASE TABLE"},
{"bigtable", "BASE TABLE"},
},
},
{
Expand All @@ -857,6 +861,7 @@ var queries = []struct {
[]sql.Row{
{"mytable"},
{"othertable"},
{"bigtable"},
},
},
{
Expand Down Expand Up @@ -949,6 +954,26 @@ var queries = []struct {
{int64(3), int64(1)},
},
},
{
"SELECT n, COUNT(n) FROM bigtable GROUP BY n HAVING COUNT(n) > 2",
[]sql.Row{{int64(1), int64(3)}, {int64(2), int64(3)}},
},
{
"SELECT n, MAX(n) FROM bigtable GROUP BY n HAVING COUNT(n) > 2",
[]sql.Row{{int64(1), int64(1)}, {int64(2), int64(2)}},
},
{
"SELECT substring(mytable.s, 1, 5) as s FROM mytable INNER JOIN othertable ON (substring(mytable.s, 1, 5) = SUBSTRING(othertable.s2, 1, 5)) GROUP BY 1 HAVING s = \"secon\"",
[]sql.Row{{"secon"}},
},
{
`
SELECT COLUMN_NAME as COLUMN_NAME FROM information_schema.COLUMNS
WHERE TABLE_SCHEMA=DATABASE() AND TABLE_NAME LIKE '%table'
GROUP BY 1 HAVING SUBSTRING(COLUMN_NAME, 1, 1) = "s"
`,
[]sql.Row{{"s"}, {"s2"}},
},
}

func TestQueries(t *testing.T) {
Expand Down Expand Up @@ -1625,10 +1650,34 @@ func newEngineWithParallelism(t *testing.T, parallelism int) *sqle.Engine {
sql.NewRow("c", int32(0)),
)

bigtable := mem.NewPartitionedTable("bigtable", sql.Schema{
{Name: "t", Type: sql.Text, Source: "bigtable"},
{Name: "n", Type: sql.Int64, Source: "bigtable"},
}, testNumPartitions)

insertRows(
t, bigtable,
sql.NewRow("a", int64(1)),
sql.NewRow("s", int64(2)),
sql.NewRow("f", int64(3)),
sql.NewRow("g", int64(1)),
sql.NewRow("h", int64(2)),
sql.NewRow("j", int64(3)),
sql.NewRow("k", int64(1)),
sql.NewRow("l", int64(2)),
sql.NewRow("ñ", int64(4)),
sql.NewRow("z", int64(5)),
sql.NewRow("x", int64(6)),
sql.NewRow("c", int64(7)),
sql.NewRow("v", int64(8)),
sql.NewRow("b", int64(9)),
)

db := mem.NewDatabase("mydb")
db.AddTable("mytable", table)
db.AddTable("othertable", table2)
db.AddTable("tabletest", table3)
db.AddTable("bigtable", bigtable)

db2 := mem.NewDatabase("foo")
db2.AddTable("other_table", table4)
Expand Down
16 changes: 13 additions & 3 deletions sql/analyzer/resolve_columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,19 @@ func isDefinedInChildProject(n sql.Node, col *expression.UnresolvedColumn) bool

var found bool
for _, expr := range x.(sql.Expressioner).Expressions() {
alias, ok := expr.(*expression.Alias)
if ok && strings.ToLower(alias.Name()) == strings.ToLower(col.Name()) {
found = true
switch expr := expr.(type) {
case *expression.Alias:
if strings.ToLower(expr.Name()) == strings.ToLower(col.Name()) {
found = true
}
case column:
if strings.ToLower(expr.Name()) == strings.ToLower(col.Name()) &&
strings.ToLower(expr.Table()) == strings.ToLower(col.Table()) {
found = true
}
}

if found {
break
}
}
Expand Down
151 changes: 151 additions & 0 deletions sql/analyzer/resolve_having.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package analyzer

import (
"reflect"

"gopkg.in/src-d/go-errors.v1"
"gopkg.in/src-d/go-mysql-server.v0/sql"
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
"gopkg.in/src-d/go-mysql-server.v0/sql/expression/function/aggregation"
"gopkg.in/src-d/go-mysql-server.v0/sql/plan"
)

func resolveHaving(ctx *sql.Context, a *Analyzer, node sql.Node) (sql.Node, error) {
return node.TransformUp(func(node sql.Node) (sql.Node, error) {
having, ok := node.(*plan.Having)
if !ok {
return node, nil
}

if !having.Resolved() {
return node, nil
}

// If there are no aggregations there is no need to check anything else
// and we can just leave the node as it is.
if !hasAggregations(having.Cond) {
return node, nil
}

groupBy, ok := having.Child.(*plan.GroupBy)
if !ok {
return nil, errHavingNeedsGroupBy.New()
}

var aggregate = make([]sql.Expression, len(groupBy.Aggregate))
copy(aggregate, groupBy.Aggregate)

// We need to find all the aggregations in the having that are already present in
// the group by and replace them with a GetField. If the aggregation is not
// present, we need to move it to the GroupBy and reference it with a GetField.
cond, err := having.Cond.TransformUp(func(e sql.Expression) (sql.Expression, error) {
agg, ok := e.(sql.Aggregation)
if !ok {
return e, nil
}

for i, expr := range aggregate {
if aggregationEquals(agg, expr) {
var name string
if n, ok := expr.(sql.Nameable); ok {
name = n.Name()
} else {
name = expr.String()
}

return expression.NewGetField(
i,
expr.Type(),
name,
expr.IsNullable(),
), nil
}
}

aggregate = append(aggregate, agg)
return expression.NewGetField(
len(aggregate)-1,
agg.Type(),
agg.String(),
agg.IsNullable(),
), nil
})
if err != nil {
return nil, err
}

var result sql.Node = plan.NewHaving(
cond,
plan.NewGroupBy(aggregate, groupBy.Grouping, groupBy.Child),
)

// If any aggregation was sent to the GroupBy aggregate, we will need
// to wrap the new Having in a project that will get rid of all those
// extra columns we added.
if len(aggregate) != len(groupBy.Aggregate) {
var projection = make([]sql.Expression, len(groupBy.Aggregate))
for i, e := range groupBy.Aggregate {
var table, name string
if t, ok := e.(sql.Tableable); ok {
table = t.Table()
}

if n, ok := e.(sql.Nameable); ok {
name = n.Name()
} else {
name = e.String()
}

projection[i] = expression.NewGetFieldWithTable(
i,
e.Type(),
table,
name,
e.IsNullable(),
)
}
result = plan.NewProject(projection, result)
}

return result, nil
})
}

func aggregationEquals(a, b sql.Expression) bool {
// First unwrap aliases
if alias, ok := b.(*expression.Alias); ok {
b = alias.Child
} else if alias, ok := a.(*expression.Alias); ok {
a = alias.Child
}

switch a := a.(type) {
case *aggregation.Count:
// it doesn't matter what's inside a Count, the result will be
// the same.
_, ok := b.(*aggregation.Count)
return ok
case *aggregation.Sum,
*aggregation.Avg,
*aggregation.Min,
*aggregation.Max:
return reflect.DeepEqual(a, b)
default:
return false
}
}

var errHavingNeedsGroupBy = errors.NewKind("found HAVING clause with no GROUP BY")

func hasAggregations(expr sql.Expression) bool {
var has bool
expression.Inspect(expr, func(e sql.Expression) bool {
_, ok := e.(sql.Aggregation)
if ok {
has = true
return false
}
return true
})
return has
}
Loading

0 comments on commit c0e7c10

Please sign in to comment.