grafana · Dieterbe · Oct 23, 2018 · Oct 4, 2018 · Oct 10, 2018 · Oct 18, 2018
diff --git a/idx/memory/memory.go b/idx/memory/memory.go
@@ -1,7 +1,6 @@
 package memory
 
 import (
-	"context"
 	"flag"
 	"fmt"
 	"regexp"
@@ -70,6 +69,9 @@ func ConfigProcess() {
 	if err != nil {
 		log.Fatal(4, "could not parse max-prune-lock-time %q: %s", maxPruneLockTimeStr, err)
 	}
+	if maxPruneLockTime > time.Second {
+		log.Fatal(4, "invalid max-prune-lock-time of %s. Must be <= 1 second", maxPruneLockTimeStr)
+	}
 }
 
 type Tree struct {
@@ -1316,12 +1318,9 @@ DEFS:
 	}
 	m.RUnlock()
 
-	ctx, cancel := context.WithCancel(context.Background())
-	defer cancel()
-
 	// create a new timeLimiter that allows us to limit the amount of time we spend
 	// holding a lock to maxPruneLockTime (default 100ms) every second.
-	tl := NewTimeLimiter(ctx, time.Second, maxPruneLockTime)
+	tl := NewTimeLimiter(time.Second, maxPruneLockTime, time.Now())
 
 	for org, ids := range toPruneTagged {
 		if len(ids) == 0 {

diff --git a/idx/memory/time_limit.go b/idx/memory/time_limit.go
@@ -1,94 +1,92 @@
 package memory
 
-import (
-	"context"
-	"sync"
-	"time"
-)
+import "time"
 
-// TimeLimiter limits the rate of a set of operations.
-// It does this by slowing down further operations as soon
+// TimeLimiter limits the rate of a set of serial operations.
+// It does this by tracking how much time has been spent (updated via Add()),
+// and comparing this to the window size and the limit, slowing down further operations as soon
 // as one Add() is called informing it the per-window allowed budget has been exceeded.
 // Limitations:
-// * concurrently running operations can all exceed the budget,
-//   so it works best for serial operations.
-// * for serial operations, the last operation is allowed to exceed the budget
-// * when an operation takes very long (e.g. 10 seconds, with a 100ms limit per second), it
-//   is counted as exceeding the 100ms budget, but no other provisions are being made.
+// * the last operation is allowed to exceed the budget (but the next call will be delayed to compensate)
+// * concurrency is not supported
 //
-// Thus, TimeLimiter is designed for, and works best with, serially running operations,
-// each of which takes a fraction of the limit.
+// For correctness, you should always follow up an Add() with a Wait()
 type TimeLimiter struct {
-	sync.Mutex
-	ctx       context.Context
+	since     time.Time
+	next      time.Time
 	timeSpent time.Duration
 	window    time.Duration
 	limit     time.Duration
-	addCh     chan time.Duration
-	queryCh   chan chan struct{}
+	factor    float64
 }
 
-// NewTimeLimiter creates a new TimeLimiter.  A background goroutine will run until the
-// provided context is done.  When the amount of time spent on task (the time is determined
-// by calls to "Add()") every "window" duration is more then "limit",  then calls to
-// Wait() will block until the start if the next window period.
-func NewTimeLimiter(ctx context.Context, window, limit time.Duration) *TimeLimiter {
-	l := &TimeLimiter{
-		ctx:     ctx,
-		window:  window,
-		limit:   limit,
-		addCh:   make(chan time.Duration),
-		queryCh: make(chan chan struct{}),
-	}
-	go l.run()
-	return l
-}
-
-func (l *TimeLimiter) run() {
-	ticker := time.NewTicker(l.window)
-	done := l.ctx.Done()
-	var blockedQueries []chan struct{}
-	for {
-		select {
-		case <-done:
-			//context done. shutting down
-			for _, ch := range blockedQueries {
-				close(ch)
-			}
-			return
-		case <-ticker.C:
-			l.timeSpent = 0
-			for _, ch := range blockedQueries {
-				close(ch)
-			}
-			blockedQueries = blockedQueries[:0]
-		case d := <-l.addCh:
-			l.timeSpent += d
-		case respCh := <-l.queryCh:
-			if l.timeSpent < l.limit {
-				close(respCh)
-			} else {
-				// rate limit exceeded.  On the next tick respCh will be closed
-				// notifying the caller that they can continue.
-				blockedQueries = append(blockedQueries, respCh)
-			}
-		}
+// NewTimeLimiter creates a new TimeLimiter.
+// limit must <= window
+func NewTimeLimiter(window, limit time.Duration, now time.Time) *TimeLimiter {
+	l := TimeLimiter{
+		since:  now,
+		next:   now.Add(window),
+		window: window,
+		limit:  limit,
+		factor: float64(window) / float64(limit),
 	}
+	return &l
 }
 
 // Add increments the "time spent" counter by "d"
 func (l *TimeLimiter) Add(d time.Duration) {
-	l.addCh <- d
+	l.add(time.Now(), d)
 }
 
-// Wait returns when we are not rate limited, which may be
-// anywhere between immediately or after the window.
+// add increments the "time spent" counter by "d" at a given time
+func (l *TimeLimiter) add(now time.Time, d time.Duration) {
+	if now.After(l.next) {
+		l.timeSpent = d
+		l.since = now.Add(-d)
+		l.next = l.since.Add(l.window)
+		return
+	}
+	l.timeSpent += d
+}
+
+// Wait returns when we are not rate limited
+// * if we passed the window, we reset everything (this is only safe for callers
+//     that behave correctly, i.e. that wait the instructed time after each add)
+// * if limit is not reached, no sleep is needed
+// * if limit has been exceeded, sleep until next period + extra multiple to compensate
+//    this is perhaps best explained with an example:
+//    if window is 1s and limit 100ms, but we spent 250ms, then we spent effectively 2.5 seconds worth of work.
+//    let's say we are 800ms into the 1s window, that means we should sleep 2500-800 = 1.7s
+//    in order to maximize work while honoring the imposed limit.
+// * if limit has been met exactly, sleep until next period (this is a special case of the above)
 func (l *TimeLimiter) Wait() {
-	respCh := make(chan struct{})
-	l.queryCh <- respCh
+	time.Sleep(l.wait(time.Now()))
+}
 
-	// if we have not exceeded our locking quota then respCh will be
-	// immediately closed. Otherwise it wont be closed until the next tick (duration of "l.window")
-	// and we will block until then.
-	<-respCh
+// wait returns how long should be slept at a given time. See Wait() for more info
+func (l *TimeLimiter) wait(now time.Time) time.Duration {
+
+	// if we passed the window, reset and start over
+	// if clock is adjusted backwards, best we can do is also just reset and start over
+	if now.After(l.next) || now.Before(l.since) {
+		l.timeSpent = 0
+		l.since = now
+		l.next = now.Add(l.window)
+		return 0
+	}
+	if l.timeSpent < l.limit {
+		return 0
+	}
+
+	// here we know that:
+	// since <= now <= next
+	// timespent >= limit
+	timeToPass := time.Duration(float64(l.timeSpent) * l.factor)
+	timePassed := now.Sub(l.since)
+
+	// not sure if this should happen, but let's be safe anyway
+	if timePassed > timeToPass {
+		return 0
+	}
+	return timeToPass - timePassed
 }
diff --git a/idx/memory/time_limit_test.go b/idx/memory/time_limit_test.go
@@ -1,71 +1,45 @@
 package memory
 
 import (
-	"context"
 	"testing"
 	"time"
 )
 
-func shouldTakeAbout(t *testing.T, fn func(), expDur time.Duration, sloppynessFactor int, info string) {
-	// on Dieter's laptop:
-	// takes about <=15 micros for add/wait sequences
-	// takes about 150micros for a add + blocking wait
-	// on circleCI, takes 75micros for add/wait sequence
-	slop := time.Duration(sloppynessFactor) * time.Microsecond
-	pre := time.Now()
-	fn()
-	dur := time.Since(pre)
-	if dur > expDur+slop || dur < expDur-slop {
-		t.Fatalf("scenario %s. was supposed to take %s, but took %s", info, expDur, dur)
+var now = time.Unix(10, 0)
+
+func shouldTake(t *testing.T, tl *TimeLimiter, workDone, expDur time.Duration, info string) {
+
+	// account for work done, as well as moving our clock forward by the same amount
+	now = now.Add(workDone)
+	tl.add(now, workDone)
+
+	dur := tl.wait(now)
+	if dur != expDur {
+		t.Fatalf("scenario %s. expected wait %s, got wait %s", info, expDur, dur)
 	}
+
+	// fake the "sleep" so we're a properly behaving caller
+	now = now.Add(dur)
 }
 
 func TestTimeLimiter(t *testing.T) {
 	window := time.Second
 	limit := 100 * time.Millisecond
 
-	ctx, cancel := context.WithCancel(context.Background())
-	tl := NewTimeLimiter(ctx, window, limit)
+	tl := NewTimeLimiter(window, limit, now)
 
 	// TEST 1 : Start first window by doing work and seeing when it starts blocking
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 1: work done: 0 - wait should be 0")
-
-	tl.Add(5 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 1: work done: 5ms - wait should be 0")
-
-	tl.Add(10 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 1: work done: 15ms - wait should be 0")
-
-	tl.Add(80 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 1: work done: 95ms - wait should be 0")
-
-	tl.Add(4 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 1: work done: 99ms - wait should be 0")
-
-	tl.Add(3 * time.Millisecond)
-
-	shouldTakeAbout(t, tl.Wait, time.Second, 500, "window 1: work done: 102ms - almost no time has passed, so wait should be full window")
+	shouldTake(t, tl, 0, 0, "window 1: work done: 0")
+	shouldTake(t, tl, 5*time.Millisecond, 0, "window 1: work done: 5ms")
+	shouldTake(t, tl, 10*time.Millisecond, 0, "window 1: work done: 15ms")
+	shouldTake(t, tl, 80*time.Millisecond, 0, "window 1: work done: 95ms")
+	shouldTake(t, tl, 4*time.Millisecond, 0, "window 1: work done: 99ms")
+	// we should spend at least 1020ms to do 102ms of work
+	shouldTake(t, tl, 3*time.Millisecond, (1020-102)*time.Millisecond, "window 1: work done: 102ms")
 
 	// TEST 2 : Now that we waited until a full window, should be able to up to limit work again
-	tl.Add(50 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 2: work done: 50ms - wait should be 0")
-
-	tl.Add(40 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 2: work done: 90ms - wait should be 0")
-
-	tl.Add(40 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, time.Second, 500, "window 2: work done: 130ms - wait should be 1s")
-
-	// TEST 3 : Now that we waited until a full window, should be able to up to limit work again
-	// but this time we cancel, so we don't have to wait as long
-	tl.Add(50 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 3: work done: 50ms - wait should be 0")
-
-	tl.Add(40 * time.Millisecond)
-	shouldTakeAbout(t, tl.Wait, 0, 100, "window 3: work done: 90ms - wait should be 0")
-
-	tl.Add(40 * time.Millisecond)
-
-	time.AfterFunc(500*time.Millisecond, cancel)
-	shouldTakeAbout(t, tl.Wait, 500*time.Millisecond, 500, "window 3: work done: 130ms, canceling after 500ms - wait should be 500ms")
+	shouldTake(t, tl, 50*time.Millisecond, 0, "window 2: work done: 50ms")
+	shouldTake(t, tl, 40*time.Millisecond, 0, "window 2: work done: 90ms")
+	// we should spend at least 1300ms to do 130ms of work
+	shouldTake(t, tl, 40*time.Millisecond, (1300-130)*time.Millisecond, "window 2: work done: 130ms")
 }