Skip to content

Commit

Permalink
sync: add ShardedValue
Browse files Browse the repository at this point in the history
Implementation golang#18802 (comment)

This CL is for a better understanding of the API based on the check-out/check-in model.

Change-Id: I7fdef164291cbb064f593faabee53e5221d008da
  • Loading branch information
qiulaidongfeng committed Aug 29, 2024
1 parent 9e8ea56 commit 41d4bac
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/runtime/proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,7 @@ const (
stwForTestReadMemStatsSlow // "ReadMemStatsSlow (test)"
stwForTestPageCachePagesLeaked // "PageCachePagesLeaked (test)"
stwForTestResetDebugLog // "ResetDebugLog (test)"
stwShardRead
)

func (r stwReason) String() string {
Expand Down Expand Up @@ -1402,6 +1403,7 @@ var stwReasonStrings = [...]string{
stwForTestReadMemStatsSlow: "ReadMemStatsSlow (test)",
stwForTestPageCachePagesLeaked: "PageCachePagesLeaked (test)",
stwForTestResetDebugLog: "ResetDebugLog (test)",
stwShardRead: "ShardRead",
}

// worldStop provides context from the stop-the-world required by the
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/runtime2.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,11 @@ type p struct {
// gcStopTime is the nanotime timestamp that this P last entered _Pgcstop.
gcStopTime int64

shardp []struct {
shard unsafe.Pointer
pool uintptr
}

// Padding is no longer needed. False sharing is now not a worry because p is large enough
// that its size class is an integer multiple of the cache line size (for any of our architectures).
}
Expand Down
80 changes: 80 additions & 0 deletions src/runtime/shard.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package runtime

import (
"internal/runtime/atomic"
"unsafe"
)

type ShardedValue struct {
// NewShard is a function that produces new shards of type T.
NewShard func()

id uintptr
}

func findShrad(p *p, s uintptr) (shard unsafe.Pointer, index int) {
for i := range p.shardp {
if p.shardp[i].pool == s {
return p.shardp[i].shard, i
}
}
return nil, -1
}

var genShardId uintptr

func updateShard(v unsafe.Pointer, f func(unsafe.Pointer) unsafe.Pointer) {
s := (*ShardedValue)(v)
id := atomic.Loaduintptr(&s.id)
if id == 0 {
atomic.Casuintptr(&s.id, 0, atomic.Xadduintptr(&genShardId, 1))
id = atomic.Loaduintptr(&s.id)
}
p := getg().m.p.ptr()
shard, index := findShrad(p, id)
if index == -1 {
p.shardp = append(p.shardp, struct {
shard unsafe.Pointer
pool uintptr
}{
pool: id,
shard: f(nil),
})
return
}
p.shardp[index].shard = f(shard)
KeepAlive(s)
}

func valueShard(v unsafe.Pointer, yield func(unsafe.Pointer), Getret func() unsafe.Pointer) {
s := (*ShardedValue)(v)
stw := stopTheWorld(stwShardRead)
once := false
for i := range allp {
v, index := findShrad(allp[i], s.id)
if v != nil {
yield(v)
}
if index != -1 {
if once {
allp[i].shardp[index].shard = nil
} else {
allp[i].shardp[index].shard = Getret()
once = true
}
}
}
startTheWorld(stw)
}

func drainShard(v unsafe.Pointer, yield func(unsafe.Pointer)) {
s := (*ShardedValue)(v)
stw := stopTheWorld(stwShardRead)
for i := range allp {
v, _ := findShrad(allp[i], s.id)
if v != nil {
yield(v)
}
}
startTheWorld(stw)
}
66 changes: 66 additions & 0 deletions src/sync/old.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
goos: windows
goarch: amd64
pkg: sync
cpu: AMD Ryzen 7 7840HS w/ Radeon 780M Graphics
BenchmarkPool-16 657581748 0.9539 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 658348549 0.9283 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 651776504 0.9274 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 646705388 0.9399 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 641301361 0.9349 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 643769142 0.9415 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 630462696 0.9611 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 656426908 0.9616 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 643703709 0.9519 ns/op 0 B/op 0 allocs/op
BenchmarkPool-16 647926084 0.9586 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3214956 187.3 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3213816 188.3 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3184186 186.8 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3141736 187.4 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3175390 189.0 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3190791 188.8 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3186766 188.6 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3127542 187.6 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3185238 189.6 ns/op 0 B/op 0 allocs/op
BenchmarkPoolOverflow-16 3169407 190.3 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 244855 2575 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 234656 2557 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 235246 2572 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 235178 2548 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 232990 2565 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 232219 2571 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 244801 2574 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 234160 2592 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 235189 2596 ns/op 0 B/op 0 allocs/op
BenchmarkPoolStarvation-16 229666 2576 ns/op 0 B/op 0 allocs/op
BenchmarkPoolSTW-16 288 5316 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104516 B/op 30 allocs/op
BenchmarkPoolSTW-16 280 12541 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolSTW-16 290 15642 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104516 B/op 30 allocs/op
BenchmarkPoolSTW-16 280 7331 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolSTW-16 276 9080 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104518 B/op 30 allocs/op
BenchmarkPoolSTW-16 277 10875 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolSTW-16 273 9200 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104518 B/op 30 allocs/op
BenchmarkPoolSTW-16 277 3614 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolSTW-16 278 1816 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolSTW-16 283 12384 ns/op 0 p50-ns/STW 0 p95-ns/STW 2104517 B/op 30 allocs/op
BenchmarkPoolExpensiveNew-16 1 1563342600 ns/op 1.000 GCs/op 100.0 New/op 11671568 B/op 153 allocs/op
BenchmarkPoolExpensiveNew-16 1 1548258100 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1555788000 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1556702800 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1558681400 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1559715900 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1538853100 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1550263800 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolExpensiveNew-16 1 1560400600 ns/op 1.000 GCs/op 100.0 New/op 11671568 B/op 153 allocs/op
BenchmarkPoolExpensiveNew-16 1 1555661700 ns/op 1.000 GCs/op 100.0 New/op 11671472 B/op 152 allocs/op
BenchmarkPoolpinSlow-16 1192489 475.6 ns/op 2383 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1205654 492.3 ns/op 2383 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1204794 496.4 ns/op 2382 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1210573 477.7 ns/op 2383 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1201594 507.1 ns/op 2382 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1256626 514.0 ns/op 2382 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1202012 508.4 ns/op 2382 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1262577 487.0 ns/op 2382 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1263466 490.1 ns/op 2383 B/op 2 allocs/op
BenchmarkPoolpinSlow-16 1249252 512.0 ns/op 2382 B/op 2 allocs/op
PASS
ok sync 74.172s
100 changes: 100 additions & 0 deletions src/sync/shard_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package sync_test

import (
. "sync"
"sync/atomic"
"testing"
)

func TestShardCounter(t *testing.T) {
var s Counter
s.sp.NewShard = func() counterInt {
return counterInt(0)
}
s.Add(1)
if v := s.Value(); v != 1 {
t.Fatalf("got %d , want %d", v, 1)
}
var wg WaitGroup
for range 100 {
wg.Add(1)
go func() {
defer wg.Done()
s.Add(1)
}()
}
wg.Wait()
if v := s.Value(); v != 101 {
t.Fatalf("got %d , want %d", v, 101)
}
}

type Counter struct {
sp ShardedValue[counterInt]
}

func (c *Counter) Add(value int) {
c.sp.Update(func(v counterInt) counterInt {
return counterInt(int(v) + value)
})
}

func (c *Counter) Value() int {
return int(c.sp.Value())
}

type counterInt int

func (a counterInt) Merge(b counterInt) counterInt {
return a + b
}

func TestShardDrain(t *testing.T) {
var s Counter
s.sp.NewShard = func() counterInt {
return counterInt(0)
}
s.Add(1)
if v := s.sp.Drain(); v != 1 {
t.Fatalf("got %d , want %d", v, 1)
}
var wg WaitGroup
for range 100 {
wg.Add(1)
go func() {
defer wg.Done()
s.Add(1)
}()
}
wg.Wait()
if v := s.sp.Drain(); v != 101 {
t.Fatalf("got %d , want %d", v, 101)
}
}

func BenchmarkCounter(b *testing.B) {
b.Run("atomic/int64", func(b *testing.B) {
i := int64(0)
b.RunParallel(func(p *testing.PB) {
for p.Next() {
for range 100000 {
atomic.AddInt64(&i, 1)
}

}
})
})
b.Run("sync/int64", func(b *testing.B) {
b.RunParallel(func(p *testing.PB) {
c := Counter{}
c.sp.NewShard = func() counterInt {
return counterInt(0)
}
for p.Next() {
for range 100000 {
c.Add(1)
}
}
})
})
}

0 comments on commit 41d4bac

Please sign in to comment.