Skip to content

Commit

Permalink
Streamline dynamic interval calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
Daisuke Maki committed Sep 29, 2024
1 parent e767743 commit 8f3d81a
Show file tree
Hide file tree
Showing 9 changed files with 442 additions and 252 deletions.
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,67 @@
can be fetched via HTTP, but keep the cached content up-to-date based on periodic
refreshing.

# Client

A `httprc.Client` object is comprised of 3 parts: The user-facing controller API,
the main controller loop, and set of workers that perform the actual fetching.

The user-facing controller API is the object returned when you call `(httprc.Client).Start`.

```go
ctrl, _ := client.Start(ctx)
```

# Controller API

The controller API gives you access to the controller backend that runs asynchronously.
All methods take a `context.Context` object because they potentially block. You should
be careful to use `context.WithTimeout` to properly set a timeout if you cannot tolerate
a blocking operation.

# Main Controller Loop

The main controller loop is run asynchronously to the controller API. It is single threaded,
and it has two reponsibilities.

The first is to receive commands from the controller API,
and appropriately modify the state of the goroutine, i.e. modify the list of resources
it is watching, performing forced refreshes, etc.

The other is to periodically wake up and go through the list of resources and re-fetch
ones that are past their TTL (in reality, each resource carry a "next-check" time, not
a TTL). The main controller loop itself does nothing more: it just kicks these checks periodically.

The interval between is changed dynamically based on either metadata carried
with the HTTP responses, such as `Cache-Control` and `Expires` headers, or a constant
interval set by the user for a given resource. Between these values, the main controller loop
will pick the shortest interval (but no less than 1 second) and checks if resources
need updating based on that value.

For example, if a resource A has an expiry of 10 minutes and if resource has an expiry of 5
minutes, the main controller loop will attempt to wake up roughly every 5 minutes to check
on the resources.

When the controller loop detects that a resource needs to be checked for freshness,
it will send the resource to the worker pool to be synced.

# Interval calculation

After the resource is synced, the next fetch is scheduled. The interval to the next
fetch is calculated either by using constant intervals, or by heuristics using values
from the `http.Response` object.

If the constant interval is specified, no extra calculation is performed. If you specify
a constant interval of 15 minutes, the resource will be checked every 15 minutes. This is
predictable and reliable, but not necessarily efficient.

If you do not specify a constant interval, the HTTP response is analyzed for
values in `Cache-Control` and `Expires` headers. These values will be compared against
a maximum and minimum interval values, which default to 30 days and 15 minutes, respectively.
If the values obtained from the headers fall within that range, the value from the header is
used. If the value is larger than the maximum, the maximum is used. If the value is lower
than the minimum, the minimum is used.

# SYNOPSIS

<!-- INCLUDE(client_example_test.go) -->
Expand Down
188 changes: 188 additions & 0 deletions backend.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
package httprc

import (
"context"
"fmt"
"sync"
"time"
)

func (c *controller) adjustInterval(ctx context.Context, req adjustIntervalRequest) {
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: got adjust request (time until next check: %s)", time.Until(req.resource.Next())))
interval := roundupToSeconds(time.Until(req.resource.Next()))

if c.tickInterval < interval {
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: no adjusting required (time to next check %s > current tick interval %s)", interval, c.tickInterval))
} else {
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: adjusting tick interval to %s", interval))
c.tickInterval = interval
c.check.Reset(interval)
}
}

func (c *controller) addResource(ctx context.Context, req addRequest) {
r := req.resource
if _, ok := c.items[r.URL()]; ok {
// Already exists
sendReply(ctx, req.reply, struct{}{}, errResourceAlreadyExists)
return
}
c.items[r.URL()] = r

if r.MaxInterval() == 0 {
r.SetMaxInterval(c.defaultMaxInterval)
}

if r.MinInterval() == 0 {
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: set minimum interval to %s", c.defaultMinInterval))
r.SetMinInterval(c.defaultMinInterval)
}
close(req.reply)

c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: added resource %q", r.URL()))
c.SetTickInterval(time.Nanosecond)
}

func (c *controller) rmResource(ctx context.Context, req rmRequest) {
u := req.u
if _, ok := c.items[u]; !ok {
sendReply(ctx, req.reply, struct{}{}, errResourceNotFound)
return
}

delete(c.items, u)

minInterval := oneDay
for _, item := range c.items {
if d := item.MinInterval(); d < minInterval {
minInterval = d
}
}

close(req.reply)
c.check.Reset(minInterval)
}

func (c *controller) refreshResource(ctx context.Context, req refreshRequest) {
u := req.u
r, ok := c.items[u]
if !ok {
sendReply(ctx, req.reply, struct{}{}, errResourceNotFound)
return
}
r.SetNext(time.Unix(0, 0))
sendWorkerSynchronous(ctx, c.syncoutgoing, synchronousRequest{
resource: r,
reply: req.reply,
})
}

func (c *controller) lookupResource(ctx context.Context, req lookupRequest) {
u := req.u
r, ok := c.items[u]
if !ok {
sendReply(ctx, req.reply, nil, errResourceNotFound)
return
}
sendReply(ctx, req.reply, r, nil)
}

func (c *controller) handleRequest(ctx context.Context, req any) {
switch req := req.(type) {
case adjustIntervalRequest:
c.adjustInterval(ctx, req)
case addRequest:
c.addResource(ctx, req)
case rmRequest:
c.rmResource(ctx, req)
case refreshRequest:
c.refreshResource(ctx, req)
case lookupRequest:
c.lookupResource(ctx, req)
default:
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: unknown request type %T", req))
}
}

func sendWorker(ctx context.Context, ch chan Resource, r Resource) {
r.SetBusy(true)
select {
case <-ctx.Done():
case ch <- r:
}
}

func sendWorkerSynchronous(ctx context.Context, ch chan synchronousRequest, r synchronousRequest) {
r.resource.SetBusy(true)
select {
case <-ctx.Done():
case ch <- r:
}
}

func sendReply[T any](ctx context.Context, ch chan backendResponse[T], v T, err error) {
defer close(ch)
select {
case <-ctx.Done():
case ch <- backendResponse[T]{payload: v, err: err}:
}
}

func (c *controller) loop(ctx context.Context, wg *sync.WaitGroup) {
c.traceSink.Put(ctx, "httprc controller: starting main controller loop")
defer c.traceSink.Put(ctx, "httprc controller: stopping main controller loop")
defer wg.Done()
for {
select {
case req := <-c.incoming:
c.handleRequest(ctx, req)
case t := <-c.check.C:
var minNext time.Time
var dispatched int
for _, item := range c.items {
next := item.Next()
if minNext.IsZero() {
minNext = item.Next()
} else if next.Before(minNext) {
minNext = next
}

if item.IsBusy() || next.After(t) {
continue
}

dispatched++
sendWorker(ctx, c.outgoing, item)
}

c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: dispatched %d resources", dispatched))

// Next check is always at the earliest next check + 1 second.
// The extra second makes sure that we are _past_ the actual next check time
// so we can send the resource to the worker pool
if interval := time.Until(minNext); interval > 0 {
c.SetTickInterval(roundupToSeconds(interval) + time.Second)
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: resetting check intervanl to %s", c.tickInterval))
} else {
// if we got here, either we have no resources, or all resources are busy.
// In this state, it's possible that the interval is less than 1 second,
// because we previously set ti to a small value for an immediate refresh.
// in this case, we want to reset it to a sane value
if c.tickInterval < time.Second {
c.SetTickInterval(defaultMinInterval)
c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: resetting check intervanl to %s after forced refresh", c.tickInterval))
}
}

c.traceSink.Put(ctx, fmt.Sprintf("httprc controller: next check in %s", c.tickInterval))
case <-ctx.Done():
return
}
}
}

func (c *controller) SetTickInterval(d time.Duration) {
// TODO synchronize
c.tickInterval = d
c.check.Reset(d)
}
33 changes: 26 additions & 7 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,26 @@ import (

// Client is the main entry point for the httprc package.
type Client struct {
mu sync.Mutex
httpcl HTTPClient
numWorkers int
running bool
errSink ErrorSink
traceSink TraceSink
wl Whitelist
mu sync.Mutex
httpcl HTTPClient
numWorkers int
running bool
errSink ErrorSink
traceSink TraceSink
wl Whitelist
defaultMaxInterval time.Duration
defaultMinInterval time.Duration
}

const DefaultWorkers = 5

// DefaultMaxInterval is the default maximum interval between fetches
const DefaultMaxInterval = 24 * time.Hour * 30

// DefaultMinInterval is the default minimum interval between fetches.
const DefaultMinInterval = 15 * time.Minute

// used internally
const oneDay = 24 * time.Hour

// NewClient creates a new `httprc.Client` object.
Expand All @@ -38,6 +48,9 @@ func NewClient(options ...NewClientOption) *Client {
var wl Whitelist = InsecureWhitelist{}
var httpcl HTTPClient = http.DefaultClient

defaultMinInterval := DefaultMinInterval
defaultMaxInterval := DefaultMaxInterval

numWorkers := DefaultWorkers
//nolint:forcetypeassert
for _, option := range options {
Expand All @@ -64,6 +77,9 @@ func NewClient(options ...NewClientOption) *Client {
errSink: errSink,
traceSink: traceSink,
wl: wl,

defaultMinInterval: defaultMinInterval,
defaultMaxInterval: defaultMaxInterval,
}
}

Expand Down Expand Up @@ -144,6 +160,9 @@ func (c *Client) Start(octx context.Context) (Controller, error) {
check: time.NewTicker(tickInterval),
shutdown: make(chan struct{}),
wl: c.wl,

defaultMinInterval: c.defaultMinInterval,
defaultMaxInterval: c.defaultMaxInterval,
}
wg.Add(1)
go ctrl.loop(ctx, &wg)
Expand Down
Loading

0 comments on commit 8f3d81a

Please sign in to comment.