Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[metrics] add lru cache age metric #536

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 69 additions & 13 deletions cache/disk/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/buchgr/bazel-remote/cache"
"github.com/buchgr/bazel-remote/cache/disk/casblob"
Expand All @@ -31,6 +32,8 @@ import (
pb "github.com/buchgr/bazel-remote/genproto/build/bazel/remote/execution/v2"
"google.golang.org/protobuf/proto"

"github.com/prometheus/client_golang/prometheus"

"golang.org/x/sync/semaphore"
)

Expand Down Expand Up @@ -83,6 +86,8 @@ type diskCache struct {

mu sync.Mutex
lru SizedLRU

gaugeCacheAge prometheus.Gauge
}

type nameAndInfo struct {
Expand Down Expand Up @@ -135,6 +140,11 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// I suppose it's better to slow down processing than to crash
// when hitting the 10k limit or to run out of disk space.
fileRemovalSem: semaphore.NewWeighted(5000),

gaugeCacheAge: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "bazel_remote_disk_cache_longest_item_idle_time_seconds",
Help: "The idle time (now - atime) of the last item in the LRU cache, updated once per minute. Depending on filesystem mount options (e.g. relatime), the resolution may be measured in 'days' and not accurate to the second. If using noatime this will be 0.",
}),
}

cc := CacheConfig{diskCache: &c}
Expand All @@ -143,19 +153,7 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// This function is only called while the lock is held
// by the current goroutine.
onEvict := func(key Key, value lruItem) {
ks := key.(string)
hash := ks[len(ks)-sha256.Size*2:]
var kind cache.EntryKind = cache.AC
if strings.HasPrefix(ks, "cas") {
kind = cache.CAS
} else if strings.HasPrefix(ks, "ac") {
kind = cache.AC
} else if strings.HasPrefix(ks, "raw") {
kind = cache.RAW
}

f := filepath.Join(dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))

f := c.getElementPath(key, value)
// Run in a goroutine so we can release the lock sooner.
go c.removeFile(f)
}
Expand Down Expand Up @@ -211,6 +209,64 @@ func New(dir string, maxSizeBytes int64, opts ...Option) (Cache, error) {
// Non-test users must call this to expose metrics.
func (c *diskCache) RegisterMetrics() {
c.lru.RegisterMetrics()

prometheus.MustRegister(c.gaugeCacheAge)

// Update the cache age metric on a static interval
// Note: this could be modeled as a GuageFunc that updates as needed
// but since the updater func must lock the cache mu, it was deemed
// necessary to have greater control of when to get the cache age
go c.pollCacheAge()
}

// Update metric every minute with the idle time of the least recently used item in the cache
func (c *diskCache) pollCacheAge() {
ticker := time.NewTicker(60 * time.Second)
for ; true; <-ticker.C {
c.updateCacheAgeMetric()
}
}

// Get the idle time of the least-recently used item in the cache, and store the value in a metric
func (c *diskCache) updateCacheAgeMetric() {
c.mu.Lock()

key, value := c.lru.getTailItem()
age := 0.0
validAge := true

if key != nil {
f := c.getElementPath(key, value)
ts, err := atime.Stat(f)

if err != nil {
log.Printf("ERROR: failed to determine time of least recently used cache item: %v, unable to stat %s", err, f)
validAge = false
} else {
age = time.Now().Sub(ts).Seconds()
}
}

c.mu.Unlock()

if validAge {
c.gaugeCacheAge.Set(age)
}
}

func (c *diskCache) getElementPath(key Key, value lruItem) string {
ks := key.(string)
hash := ks[len(ks)-sha256.Size*2:]
var kind cache.EntryKind = cache.AC
if strings.HasPrefix(ks, "cas") {
kind = cache.CAS
} else if strings.HasPrefix(ks, "ac") {
kind = cache.AC
} else if strings.HasPrefix(ks, "raw") {
kind = cache.RAW
}

return filepath.Join(c.dir, c.FileLocation(kind, value.legacy, hash, value.size, value.random))
}

func (c *diskCache) removeFile(f string) {
Expand Down
10 changes: 10 additions & 0 deletions cache/disk/lru.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,13 @@ func (c *SizedLRU) removeElement(e *list.Element) {
func roundUp4k(n int64) int64 {
return (n + BlockSize - 1) & -BlockSize
}

// Get the back item of the LRU cache.
func (c *SizedLRU) getTailItem() (Key, lruItem) {
ele := c.ll.Back()
if ele != nil {
kv := ele.Value.(*entry)
return kv.key, kv.value
}
return nil, lruItem{}
}