internal/sync: move sync.Mutex implementation into new package

This CL refactors sync.Mutex such that its implementation lives in the new internal/sync package. The purpose of this change is to eventually reverse the dependency edge between internal/concurrent and sync, such that sync can depend on internal/concurrent (or really, its contents, which will likely end up in internal/sync). The only change made to the sync.Mutex code is the frame skip count for mutex profiling, so that the internal/sync frames are omitted in the profile. Change-Id: Ib3603d30e8e71508c4ea883a584ae2e51ce40c3f Reviewed-on: https://go-review.googlesource.com/c/go/+/594056 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com> Auto-Submit: Michael Knyszek <mknyszek@google.com>
golang · Nov 18, 2024 · 6c66005 · 6c66005
1 parent b5906ac
commit 6c66005
Show file tree

Hide file tree

Showing 11 changed files with 348 additions and 236 deletions.
diff --git a/src/cmd/internal/objabi/pkgspecial.go b/src/cmd/internal/objabi/pkgspecial.go
@@ -80,7 +80,7 @@ var extraNoInstrumentPkgs = []string{
 	"-internal/bytealg",
 }
 
-var noRaceFuncPkgs = []string{"sync", "sync/atomic", "internal/runtime/atomic"}
+var noRaceFuncPkgs = []string{"sync", "sync/atomic", "internal/sync", "internal/runtime/atomic"}
 
 var allowAsmABIPkgs = []string{
 	"runtime",

diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
@@ -97,6 +97,7 @@ var depsRules = `
 	< runtime
 	< sync/atomic
 	< internal/weak
+	< internal/sync
 	< sync
 	< internal/bisect
 	< internal/godebug

diff --git a/src/internal/sync/mutex.go b/src/internal/sync/mutex.go
@@ -0,0 +1,234 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package sync provides basic synchronization primitives such as mutual
+// exclusion locks to internal packages (including ones that depend on sync).
+//
+// Tests are defined in package [sync].
+package sync
+
+import (
+	"internal/race"
+	"sync/atomic"
+	"unsafe"
+)
+
+// A Mutex is a mutual exclusion lock.
+//
+// See package [sync.Mutex] documentation.
+type Mutex struct {
+	state int32
+	sema  uint32
+}
+
+const (
+	mutexLocked = 1 << iota // mutex is locked
+	mutexWoken
+	mutexStarving
+	mutexWaiterShift = iota
+
+	// Mutex fairness.
+	//
+	// Mutex can be in 2 modes of operations: normal and starvation.
+	// In normal mode waiters are queued in FIFO order, but a woken up waiter
+	// does not own the mutex and competes with new arriving goroutines over
+	// the ownership. New arriving goroutines have an advantage -- they are
+	// already running on CPU and there can be lots of them, so a woken up
+	// waiter has good chances of losing. In such case it is queued at front
+	// of the wait queue. If a waiter fails to acquire the mutex for more than 1ms,
+	// it switches mutex to the starvation mode.
+	//
+	// In starvation mode ownership of the mutex is directly handed off from
+	// the unlocking goroutine to the waiter at the front of the queue.
+	// New arriving goroutines don't try to acquire the mutex even if it appears
+	// to be unlocked, and don't try to spin. Instead they queue themselves at
+	// the tail of the wait queue.
+	//
+	// If a waiter receives ownership of the mutex and sees that either
+	// (1) it is the last waiter in the queue, or (2) it waited for less than 1 ms,
+	// it switches mutex back to normal operation mode.
+	//
+	// Normal mode has considerably better performance as a goroutine can acquire
+	// a mutex several times in a row even if there are blocked waiters.
+	// Starvation mode is important to prevent pathological cases of tail latency.
+	starvationThresholdNs = 1e6
+)
+
+// Lock locks m.
+//
+// See package [sync.Mutex] documentation.
+func (m *Mutex) Lock() {
+	// Fast path: grab unlocked mutex.
+	if atomic.CompareAndSwapInt32(&m.state, 0, mutexLocked) {
+		if race.Enabled {
+			race.Acquire(unsafe.Pointer(m))
+		}
+		return
+	}
+	// Slow path (outlined so that the fast path can be inlined)
+	m.lockSlow()
+}
+
+// TryLock tries to lock m and reports whether it succeeded.
+//
+// See package [sync.Mutex] documentation.
+func (m *Mutex) TryLock() bool {
+	old := m.state
+	if old&(mutexLocked|mutexStarving) != 0 {
+		return false
+	}
+
+	// There may be a goroutine waiting for the mutex, but we are
+	// running now and can try to grab the mutex before that
+	// goroutine wakes up.
+	if !atomic.CompareAndSwapInt32(&m.state, old, old|mutexLocked) {
+		return false
+	}
+
+	if race.Enabled {
+		race.Acquire(unsafe.Pointer(m))
+	}
+	return true
+}
+
+func (m *Mutex) lockSlow() {
+	var waitStartTime int64
+	starving := false
+	awoke := false
+	iter := 0
+	old := m.state
+	for {
+		// Don't spin in starvation mode, ownership is handed off to waiters
+		// so we won't be able to acquire the mutex anyway.
+		if old&(mutexLocked|mutexStarving) == mutexLocked && runtime_canSpin(iter) {
+			// Active spinning makes sense.
+			// Try to set mutexWoken flag to inform Unlock
+			// to not wake other blocked goroutines.
+			if !awoke && old&mutexWoken == 0 && old>>mutexWaiterShift != 0 &&
+				atomic.CompareAndSwapInt32(&m.state, old, old|mutexWoken) {
+				awoke = true
+			}
+			runtime_doSpin()
+			iter++
+			old = m.state
+			continue
+		}
+		new := old
+		// Don't try to acquire starving mutex, new arriving goroutines must queue.
+		if old&mutexStarving == 0 {
+			new |= mutexLocked
+		}
+		if old&(mutexLocked|mutexStarving) != 0 {
+			new += 1 << mutexWaiterShift
+		}
+		// The current goroutine switches mutex to starvation mode.
+		// But if the mutex is currently unlocked, don't do the switch.
+		// Unlock expects that starving mutex has waiters, which will not
+		// be true in this case.
+		if starving && old&mutexLocked != 0 {
+			new |= mutexStarving
+		}
+		if awoke {
+			// The goroutine has been woken from sleep,
+			// so we need to reset the flag in either case.
+			if new&mutexWoken == 0 {
+				throw("sync: inconsistent mutex state")
+			}
+			new &^= mutexWoken
+		}
+		if atomic.CompareAndSwapInt32(&m.state, old, new) {
+			if old&(mutexLocked|mutexStarving) == 0 {
+				break // locked the mutex with CAS
+			}
+			// If we were already waiting before, queue at the front of the queue.
+			queueLifo := waitStartTime != 0
+			if waitStartTime == 0 {
+				waitStartTime = runtime_nanotime()
+			}
+			runtime_SemacquireMutex(&m.sema, queueLifo, 2)
+			starving = starving || runtime_nanotime()-waitStartTime > starvationThresholdNs
+			old = m.state
+			if old&mutexStarving != 0 {
+				// If this goroutine was woken and mutex is in starvation mode,
+				// ownership was handed off to us but mutex is in somewhat
+				// inconsistent state: mutexLocked is not set and we are still
+				// accounted as waiter. Fix that.
+				if old&(mutexLocked|mutexWoken) != 0 || old>>mutexWaiterShift == 0 {
+					throw("sync: inconsistent mutex state")
+				}
+				delta := int32(mutexLocked - 1<<mutexWaiterShift)
+				if !starving || old>>mutexWaiterShift == 1 {
+					// Exit starvation mode.
+					// Critical to do it here and consider wait time.
+					// Starvation mode is so inefficient, that two goroutines
+					// can go lock-step infinitely once they switch mutex
+					// to starvation mode.
+					delta -= mutexStarving
+				}
+				atomic.AddInt32(&m.state, delta)
+				break
+			}
+			awoke = true
+			iter = 0
+		} else {
+			old = m.state
+		}
+	}
+
+	if race.Enabled {
+		race.Acquire(unsafe.Pointer(m))
+	}
+}
+
+// Unlock unlocks m.
+//
+// See package [sync.Mutex] documentation.
+func (m *Mutex) Unlock() {
+	if race.Enabled {
+		_ = m.state
+		race.Release(unsafe.Pointer(m))
+	}
+
+	// Fast path: drop lock bit.
+	new := atomic.AddInt32(&m.state, -mutexLocked)
+	if new != 0 {
+		// Outlined slow path to allow inlining the fast path.
+		// To hide unlockSlow during tracing we skip one extra frame when tracing GoUnblock.
+		m.unlockSlow(new)
+	}
+}
+
+func (m *Mutex) unlockSlow(new int32) {
+	if (new+mutexLocked)&mutexLocked == 0 {
+		fatal("sync: unlock of unlocked mutex")
+	}
+	if new&mutexStarving == 0 {
+		old := new
+		for {
+			// If there are no waiters or a goroutine has already
+			// been woken or grabbed the lock, no need to wake anyone.
+			// In starvation mode ownership is directly handed off from unlocking
+			// goroutine to the next waiter. We are not part of this chain,
+			// since we did not observe mutexStarving when we unlocked the mutex above.
+			// So get off the way.
+			if old>>mutexWaiterShift == 0 || old&(mutexLocked|mutexWoken|mutexStarving) != 0 {
+				return
+			}
+			// Grab the right to wake someone.
+			new = (old - 1<<mutexWaiterShift) | mutexWoken
+			if atomic.CompareAndSwapInt32(&m.state, old, new) {
+				runtime_Semrelease(&m.sema, false, 2)
+				return
+			}
+			old = m.state
+		}
+	} else {
+		// Starving mode: handoff mutex ownership to the next waiter, and yield
+		// our time slice so that the next waiter can start to run immediately.
+		// Note: mutexLocked is not set, the waiter will set it after wakeup.
+		// But mutex is still considered locked if mutexStarving is set,
+		// so new coming goroutines won't acquire it.
+		runtime_Semrelease(&m.sema, true, 2)
+	}
+}
diff --git a/src/internal/sync/runtime.go b/src/internal/sync/runtime.go
@@ -0,0 +1,52 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sync
+
+import _ "unsafe"
+
+// defined in package runtime
+
+// SemacquireMutex is like Semacquire, but for profiling contended
+// Mutexes and RWMutexes.
+// If lifo is true, queue waiter at the head of wait queue.
+// skipframes is the number of frames to omit during tracing, counting from
+// runtime_SemacquireMutex's caller.
+// The different forms of this function just tell the runtime how to present
+// the reason for waiting in a backtrace, and is used to compute some metrics.
+// Otherwise they're functionally identical.
+//
+//go:linkname runtime_SemacquireMutex
+func runtime_SemacquireMutex(s *uint32, lifo bool, skipframes int)
+
+// Semrelease atomically increments *s and notifies a waiting goroutine
+// if one is blocked in Semacquire.
+// It is intended as a simple wakeup primitive for use by the synchronization
+// library and should not be used directly.
+// If handoff is true, pass count directly to the first waiter.
+// skipframes is the number of frames to omit during tracing, counting from
+// runtime_Semrelease's caller.
+//
+//go:linkname runtime_Semrelease
+func runtime_Semrelease(s *uint32, handoff bool, skipframes int)
+
+// Active spinning runtime support.
+// runtime_canSpin reports whether spinning makes sense at the moment.
+//
+//go:linkname runtime_canSpin
+func runtime_canSpin(i int) bool
+
+// runtime_doSpin does active spinning.
+//
+//go:linkname runtime_doSpin
+func runtime_doSpin()
+
+//go:linkname runtime_nanotime
+func runtime_nanotime() int64
+
+//go:linkname throw
+func throw(string)
+
+//go:linkname fatal
+func fatal(string)
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
@@ -49,7 +49,7 @@ const (
 	// desired maximum number of frames after expansion.
 	// This should be at least as large as the largest skip value
 	// used for profiling; otherwise stacks may be truncated inconsistently
-	maxSkip = 5
+	maxSkip = 6
 
 	// maxProfStackDepth is the highest valid value for debug.profstackdepth.
 	// It's used for the bucket.stk func.

diff --git a/src/runtime/panic.go b/src/runtime/panic.go
@@ -1048,6 +1048,16 @@ func maps_fatal(s string) {
 	fatal(s)
 }
 
+//go:linkname internal_sync_throw internal/sync.throw
+func internal_sync_throw(s string) {
+	throw(s)
+}
+
+//go:linkname internal_sync_fatal internal/sync.fatal
+func internal_sync_fatal(s string) {
+	fatal(s)
+}
+
 // throw triggers a fatal error that dumps a stack trace and exits.
 //
 // throw should be used for runtime-internal fatal errors where Go itself,

diff --git a/src/runtime/proc.go b/src/runtime/proc.go
@@ -7154,19 +7154,9 @@ func sync_atomic_runtime_procUnpin() {
 
 // Active spinning for sync.Mutex.
 //
-// sync_runtime_canSpin should be an internal detail,
-// but widely used packages access it using linkname.
-// Notable members of the hall of shame include:
-//   - github.com/livekit/protocol
-//   - github.com/sagernet/gvisor
-//   - gvisor.dev/gvisor
-//
-// Do not remove or change the type signature.
-// See go.dev/issue/67401.
-//
-//go:linkname sync_runtime_canSpin sync.runtime_canSpin
+//go:linkname internal_sync_runtime_canSpin internal/sync.runtime_canSpin
 //go:nosplit
-func sync_runtime_canSpin(i int) bool {
+func internal_sync_runtime_canSpin(i int) bool {
 	// sync.Mutex is cooperative, so we are conservative with spinning.
 	// Spin only few times and only if running on a multicore machine and
 	// GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
@@ -7181,6 +7171,30 @@ func sync_runtime_canSpin(i int) bool {
 	return true
 }
 
+//go:linkname internal_sync_runtime_doSpin internal/sync.runtime_doSpin
+//go:nosplit
+func internal_sync_runtime_doSpin() {
+	procyield(active_spin_cnt)
+}
+
+// Active spinning for sync.Mutex.
+//
+// sync_runtime_canSpin should be an internal detail,
+// but widely used packages access it using linkname.
+// Notable members of the hall of shame include:
+//   - github.com/livekit/protocol
+//   - github.com/sagernet/gvisor
+//   - gvisor.dev/gvisor
+//
+// Do not remove or change the type signature.
+// See go.dev/issue/67401.
+//
+//go:linkname sync_runtime_canSpin sync.runtime_canSpin
+//go:nosplit
+func sync_runtime_canSpin(i int) bool {
+	return internal_sync_runtime_canSpin(i)
+}
+
 // sync_runtime_doSpin should be an internal detail,
 // but widely used packages access it using linkname.
 // Notable members of the hall of shame include:
@@ -7194,7 +7208,7 @@ func sync_runtime_canSpin(i int) bool {
 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
 //go:nosplit
 func sync_runtime_doSpin() {
-	procyield(active_spin_cnt)
+	internal_sync_runtime_doSpin()
 }
 
 var stealOrder randomOrder