Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

executor: add memTracker for aggregation #13720

Merged
merged 4 commits into from
Dec 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 38 additions & 4 deletions executor/aggregate.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/pingcap/tidb/util/codec"
"github.com/pingcap/tidb/util/logutil"
"github.com/pingcap/tidb/util/mathutil"
"github.com/pingcap/tidb/util/memory"
"github.com/pingcap/tidb/util/set"
"github.com/spaolacci/murmur3"
"go.uber.org/zap"
Expand Down Expand Up @@ -70,7 +71,8 @@ type HashAggPartialWorker struct {
groupKey [][]byte
// chk stores the input data from child,
// and is reused by childExec and partial worker.
chk *chunk.Chunk
chk *chunk.Chunk
memTracker *memory.Tracker
}

// HashAggFinalWorker indicates the final workers of parallel hash agg execution,
Expand Down Expand Up @@ -166,6 +168,8 @@ type HashAggExec struct {
isUnparallelExec bool
prepared bool
executed bool

memTracker *memory.Tracker // track memory usage.
}

// HashAggInput indicates the input of hash agg exec.
Expand Down Expand Up @@ -199,6 +203,7 @@ func (d *HashAggIntermData) getPartialResultBatch(sc *stmtctx.StatementContext,
// Close implements the Executor Close interface.
func (e *HashAggExec) Close() error {
if e.isUnparallelExec {
e.memTracker.Consume(-e.childResult.MemoryUsage())
e.childResult = nil
e.groupSet = nil
e.partialResultMap = nil
Expand All @@ -221,7 +226,8 @@ func (e *HashAggExec) Close() error {
}
}
for _, ch := range e.partialInputChs {
for range ch {
for chk := range ch {
e.memTracker.Consume(-chk.MemoryUsage())
}
}
for range e.finalOutputCh {
Expand Down Expand Up @@ -250,6 +256,9 @@ func (e *HashAggExec) Open(ctx context.Context) error {
}
e.prepared = false

e.memTracker = memory.NewTracker(e.id, -1)
e.memTracker.AttachTo(e.ctx.GetSessionVars().StmtCtx.MemTracker)

if e.isUnparallelExec {
e.initForUnparallelExec()
return nil
Expand All @@ -263,6 +272,7 @@ func (e *HashAggExec) initForUnparallelExec() {
e.partialResultMap = make(aggPartialResultMapper)
e.groupKeyBuffer = make([][]byte, 0, 8)
e.childResult = newFirstChunk(e.children[0])
e.memTracker.Consume(e.childResult.MemoryUsage())
}

func (e *HashAggExec) initForParallelExec(ctx sessionctx.Context) {
Expand Down Expand Up @@ -298,13 +308,17 @@ func (e *HashAggExec) initForParallelExec(ctx sessionctx.Context) {
groupByItems: e.GroupByItems,
chk: newFirstChunk(e.children[0]),
groupKey: make([][]byte, 0, 8),
memTracker: e.memTracker,
}

e.memTracker.Consume(w.chk.MemoryUsage())
e.partialWorkers[i] = w
e.inputCh <- &HashAggInput{

input := &HashAggInput{
chk: newFirstChunk(e.children[0]),
giveBackCh: w.inputCh,
}
e.memTracker.Consume(input.chk.MemoryUsage())
e.inputCh <- input
}

// Init final workers.
Expand Down Expand Up @@ -356,6 +370,7 @@ func (w *HashAggPartialWorker) run(ctx sessionctx.Context, waitGroup *sync.WaitG
if needShuffle {
w.shuffleIntermData(sc, finalConcurrency)
}
w.memTracker.Consume(-w.chk.MemoryUsage())
waitGroup.Done()
}()
for {
Expand Down Expand Up @@ -606,20 +621,28 @@ func (e *HashAggExec) fetchChildData(ctx context.Context) {
}
chk = input.chk
}
mSize := chk.MemoryUsage()
err = Next(ctx, e.children[0], chk)
if err != nil {
e.finalOutputCh <- &AfFinalResult{err: err}
e.memTracker.Consume(-mSize)
return
}
if chk.NumRows() == 0 {
e.memTracker.Consume(-mSize)
return
}
e.memTracker.Consume(chk.MemoryUsage() - mSize)
input.giveBackCh <- chk
}
}

func (e *HashAggExec) waitPartialWorkerAndCloseOutputChs(waitGroup *sync.WaitGroup) {
waitGroup.Wait()
close(e.inputCh)
for input := range e.inputCh {
e.memTracker.Consume(-input.chk.MemoryUsage())
}
for _, ch := range e.partialOutputChs {
close(ch)
}
Expand Down Expand Up @@ -733,7 +756,9 @@ func (e *HashAggExec) unparallelExec(ctx context.Context, chk *chunk.Chunk) erro
// execute fetches Chunks from src and update each aggregate function for each row in Chunk.
func (e *HashAggExec) execute(ctx context.Context) (err error) {
for {
mSize := e.childResult.MemoryUsage()
err := Next(ctx, e.children[0], e.childResult)
e.memTracker.Consume(e.childResult.MemoryUsage() - mSize)
if err != nil {
return err
}
Expand Down Expand Up @@ -800,6 +825,8 @@ type StreamAggExec struct {
partialResults []aggfuncs.PartialResult
groupRows []chunk.Row
childResult *chunk.Chunk

memTracker *memory.Tracker // track memory usage.
}

// Open implements the Executor Open interface.
Expand All @@ -818,11 +845,16 @@ func (e *StreamAggExec) Open(ctx context.Context) error {
e.partialResults = append(e.partialResults, aggFunc.AllocPartialResult())
}

// bytesLimit <= 0 means no limit, for now we just track the memory footprint
e.memTracker = memory.NewTracker(e.id, -1)
e.memTracker.AttachTo(e.ctx.GetSessionVars().StmtCtx.MemTracker)
e.memTracker.Consume(e.childResult.MemoryUsage())
return nil
}

// Close implements the Executor Close interface.
func (e *StreamAggExec) Close() error {
e.memTracker.Consume(-e.childResult.MemoryUsage())
e.childResult = nil
e.groupChecker.reset()
return e.baseExecutor.Close()
Expand Down Expand Up @@ -910,7 +942,9 @@ func (e *StreamAggExec) consumeCurGroupRowsAndFetchChild(ctx context.Context, ch
return err
}

mSize := e.childResult.MemoryUsage()
err = Next(ctx, e.children[0], e.childResult)
e.memTracker.Consume(e.childResult.MemoryUsage() - mSize)
if err != nil {
return err
}
Expand Down
7 changes: 5 additions & 2 deletions executor/explain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (s *testSuite1) TestExplainAnalyzeMemory(c *C) {

func (s *testSuite1) checkMemoryInfo(c *C, tk *testkit.TestKit, sql string) {
memCol := 5
ops := []string{"Join", "Reader", "Top", "Sort", "LookUp", "Projection", "Selection"}
ops := []string{"Join", "Reader", "Top", "Sort", "LookUp", "Projection", "Selection", "Agg"}
rows := tk.MustQuery(sql).Rows()
for _, row := range rows {
strs := make([]string, len(row))
Expand Down Expand Up @@ -165,7 +165,10 @@ func (s *testSuite1) TestMemoryUsageAfterClose(c *C) {
}
SQLs := []string{"select v+abs(k) from t",
"select v from t where abs(v) > 0",
"select v from t order by v"}
"select v from t order by v",
"select count(v) from t", // StreamAgg
"select count(v) from t group by v", // HashAgg
}
for _, sql := range SQLs {
tk.MustQuery(sql)
c.Assert(tk.Se.GetSessionVars().StmtCtx.MemTracker.BytesConsumed(), Equals, int64(0))
Expand Down