Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

util/goroutine_pool: add a goroutine pool package utilities #3752

Merged
merged 10 commits into from
Sep 12, 2017
138 changes: 138 additions & 0 deletions util/goroutine_pool/gp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package gp

import (
"sync"
"sync/atomic"
"time"
)

// Pool is a struct to represent goroutine pool.
type Pool struct {
head goroutine
tail *goroutine
count int
idleTimeout time.Duration
sync.Mutex
}

// goroutine is actually a background goroutine, with a channel binded for communication.
type goroutine struct {
ch chan func()
pool *Pool
next *goroutine
status int32
}

const (
statusIdle int32 = 0
statusInUse int32 = 1
statusDying int32 = 2 // Intermediate state used to avoid race: Idle => Dying => Dead
statusDead int32 = 3
)

// New returns a new *Pool object.
func New(idleTimeout time.Duration) *Pool {
pool := &Pool{
idleTimeout: idleTimeout,
}
pool.tail = &pool.head
return pool
}

// Go works like go func(), but goroutines are pooled for reusing.
// This strategy can avoid runtime.morestack, because pooled goroutine is already enlarged.
func (pool *Pool) Go(f func()) {
var g *goroutine
for {
g = pool.get()
if atomic.CompareAndSwapInt32(&g.status, statusIdle, statusInUse) {
break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think just call
g.ch <-f and return here is easier to read.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's easier to read indeed, but there is a potential race condition.

}
// Status already changed from statusIdle => statusDying, delete this goroutine.
if atomic.LoadInt32(&g.status) == statusDying {
g.status = statusDead
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems no body cares about this value any more.
It can be removed.

}
}

g.ch <- f
// When the goroutine finish f(), it will be put back to pool automatically,
// so it doesn't need to call pool.put() here.
}

func (pool *Pool) get() *goroutine {
pool.Lock()
head := &pool.head
if head.next == nil {
pool.Unlock()
return pool.alloc()
}

ret := head.next
head.next = ret.next
if ret == pool.tail {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pool.count--
if pool.count == 0 {
  pool.tail = head
}

is easier to read.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't intent to maintain count, it's added later just for testing purpose.

pool.tail = head
}
pool.count--
pool.Unlock()
ret.next = nil
return ret
}

func (pool *Pool) put(p *goroutine) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is called by goroutine in background, I think make this a goroutine method is more reasonable.

p.next = nil
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it always nil, encured by get?

pool.Lock()
pool.tail.next = p
pool.tail = p
pool.count++
p.status = statusIdle
pool.Unlock()
}

func (pool *Pool) alloc() *goroutine {
g := &goroutine{
ch: make(chan func()),
pool: pool,
}
go func(g *goroutine) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think defining a method for goroutine is more clear.

timer := time.NewTimer(pool.idleTimeout)
for {
select {
case <-timer.C:
// Check to avoid a corner case that the goroutine is take out from pool,
// and get this signal at the same time.
succ := atomic.CompareAndSwapInt32(&g.status, statusIdle, statusDying)
if succ {
return
}
case work := <-g.ch:
work()
// Put g back to the pool.
// This is the normal usage for a resource pool:
//
// obj := pool.get()
// use(obj)
// pool.put(obj)
//
// But when goroutine is used as a resource, we can't pool.put() immediately,
// because the resource(goroutine) maybe still in use.
// So, put back resource is done here, when the goroutine finish its work.
pool.put(g)
}
timer.Reset(pool.idleTimeout)
}
}(g)
return g
}
132 changes: 132 additions & 0 deletions util/goroutine_pool/gp_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright 2017 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package gp

import (
"sync"
"testing"
"time"
)

func TestBasicAPI(t *testing.T) {
gp := New(time.Second)
var wg sync.WaitGroup
wg.Add(1)
// cover alloc()
gp.Go(func() { wg.Done() })
// cover put()
wg.Wait()
// cover get()
gp.Go(func() {})
}

func TestGC(t *testing.T) {
gp := New(200 * time.Millisecond)
var wg sync.WaitGroup
wg.Add(100)
for i := 0; i < 100; i++ {
idx := i
gp.Go(func() {
time.Sleep(time.Duration(idx+1) * time.Millisecond)
wg.Done()
})
}
wg.Wait()
time.Sleep(300 * time.Millisecond)
gp.Go(func() {}) // To trigger count change.
gp.Lock()
count := gp.count
gp.Unlock()
if count != 1 {
t.Error("all goroutines should be recycled", count)
}
}

func TestRace(t *testing.T) {
gp := New(200 * time.Millisecond)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the timeout too long to test race?

var wg sync.WaitGroup
begin := make(chan struct{})
wg.Add(500)
for i := 0; i < 50; i++ {
go func() {
<-begin
for i := 0; i < 10; i++ {
gp.Go(func() {
wg.Done()
})
time.Sleep(5 * time.Millisecond)
}
}()
}
close(begin)
wg.Wait()
}

func BenchmarkGoPool(b *testing.B) {
gp := New(20 * time.Second)
for i := 0; i < b.N/2; i++ {
gp.Go(func() {})
}

b.ResetTimer()
for i := 0; i < b.N; i++ {
gp.Go(dummy)
}
}

func BenchmarkGo(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
go dummy()
}
}

func dummy() {
}

func BenchmarkMorestackPool(b *testing.B) {
gp := New(5 * time.Second)
b.ResetTimer()
for i := 0; i < b.N; i++ {
var wg sync.WaitGroup
wg.Add(1)
gp.Go(func() {
morestack(false)
wg.Done()
})
wg.Wait()
}
}

func BenchmarkMoreStack(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
var wg sync.WaitGroup
wg.Add(1)
go func() {
morestack(false)
wg.Done()
}()
wg.Wait()
}
}

func morestack(f bool) {
var stack [8 * 1024]byte
if f {
for i := 0; i < len(stack); i++ {
stack[i] = 'a'
}
}
}