Skip to content

Commit

Permalink
Support scheduled actions and cancellation (#419)
Browse files Browse the repository at this point in the history
* Support scheduled actions and cancellation

Support scheduled actions by adding a new queue that actions will be
added to/removed from before they are sent to the dispatcher. The queue
is a priority queue (ordered by start_time). fleet_gateway is
responsible for syncing the queue to storage. Cancellation of an action
will be handled by a new action dispatcher that will remove actions from
the queue (if any) and update the targetID action status.

TODO
- cancel handler
- action expiration
- fleet_gateway tests

* Add queue tests in fleet_gateway_tests, fix check and linting issues

* Force start_time/expiration to be utc

* Remove logic todos, fix logging statement

* Apply suggestions from code review

Co-authored-by: Anderson Queiroz <me@andersonq.me>

Co-authored-by: Anderson Queiroz <me@andersonq.me>
  • Loading branch information
michel-laterman and AndersonQ authored May 24, 2022
1 parent 95da34b commit ffe77e8
Show file tree
Hide file tree
Showing 17 changed files with 1,644 additions and 116 deletions.
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ linters:
- noctx # noctx finds sending http request without context.Context
- unconvert # Remove unnecessary type conversions
- wastedassign # wastedassign finds wasted assignment statements.
- godox # tool for detection of FIXME, TODO and other comment keywords
# - godox # tool for detection of FIXME, TODO and other comment keywords

# all available settings of specific linters
linters-settings:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,4 @@
- Increase the download artifact timeout to 10mins and add log download statistics. {pull}308[308]
- Save the agent configuration and the state encrypted on the disk. {issue}535[535] {pull}398[398]
- Bump node.js version for heartbeat/synthetics to 16.15.0
- Support scheduled actions and cancellation of pending actions. {issue}393[393] {pull}419[419]
32 changes: 32 additions & 0 deletions NOTICE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14073,6 +14073,38 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
Dependency : github.com/stretchr/objx
Version: v0.2.0
Licence type (autodetected): MIT
--------------------------------------------------------------------------------

Contents of probable licence file $GOMODCACHE/github.com/stretchr/objx@v0.2.0/LICENSE:

The MIT License

Copyright (c) 2014 Stretchr, Inc.
Copyright (c) 2017-2018 objx contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


--------------------------------------------------------------------------------
Dependency : github.com/tklauser/go-sysconf
Version: v0.3.9
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ require (
github.com/sergi/go-diff v1.1.0 // indirect
github.com/sirupsen/logrus v1.8.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.9 // indirect
github.com/tklauser/numcpus v0.3.0 // indirect
github.com/yusufpapurcu/wmi v1.2.2 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,7 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag
github.com/stretchr/objx v0.0.0-20180129172003-8a3f7159479f/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0 h1:Hbg2NidpLE8veEBkEZTL3CvlkUIVzuU9jDplZO54c48=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/testify v0.0.0-20180303142811-b89eecf5ca5d/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
Expand Down
91 changes: 87 additions & 4 deletions internal/pkg/agent/application/gateway/fleet/fleet_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package fleet

import (
"context"
stderr "errors"
"fmt"
"sync"
"time"
Expand Down Expand Up @@ -62,6 +63,14 @@ type stateStore interface {
AckToken() string
SetAckToken(ackToken string)
Save() error
SetQueue([]fleetapi.Action)
Actions() []fleetapi.Action
}

type actionQueue interface {
Add(fleetapi.Action, int64)
DequeueActions() []fleetapi.Action
Cancel(string) int
Actions() []fleetapi.Action
}

Expand All @@ -82,6 +91,7 @@ type fleetGateway struct {
statusController status.Controller
statusReporter status.Reporter
stateStore stateStore
queue actionQueue
}

// New creates a new fleet gateway
Expand All @@ -95,6 +105,7 @@ func New(
acker store.FleetAcker,
statusController status.Controller,
stateStore stateStore,
queue actionQueue,
) (gateway.FleetGateway, error) {

scheduler := scheduler.NewPeriodicJitter(defaultGatewaySettings.Duration, defaultGatewaySettings.Jitter)
Expand All @@ -110,6 +121,7 @@ func New(
acker,
statusController,
stateStore,
queue,
)
}

Expand All @@ -125,6 +137,7 @@ func newFleetGatewayWithScheduler(
acker store.FleetAcker,
statusController status.Controller,
stateStore stateStore,
queue actionQueue,
) (gateway.FleetGateway, error) {

// Backoff implementation doesn't support the use of a context [cancellation]
Expand All @@ -151,13 +164,14 @@ func newFleetGatewayWithScheduler(
statusReporter: statusController.RegisterComponent("gateway"),
statusController: statusController,
stateStore: stateStore,
queue: queue,
}, nil
}

func (f *fleetGateway) worker() {
for {
select {
case <-f.scheduler.WaitTick():
case ts := <-f.scheduler.WaitTick():
f.log.Debug("FleetGateway calling Checkin API")

// Execute the checkin call and for any errors returned by the fleet-server API
Expand All @@ -168,12 +182,27 @@ func (f *fleetGateway) worker() {
continue
}

actions := make([]fleetapi.Action, len(resp.Actions))
for idx, a := range resp.Actions {
actions[idx] = a
actions := f.queueScheduledActions(resp.Actions)
actions, err = f.dispatchCancelActions(actions)
if err != nil {
f.log.Error(err.Error())
}

queued, expired := f.gatherQueuedActions(ts.UTC())
f.log.Debugf("Gathered %d actions from queue, %d actions expired", len(queued), len(expired))
f.log.Debugf("Expired actions: %v", expired)

actions = append(actions, queued...)

var errMsg string
// Persist state
f.stateStore.SetQueue(f.queue.Actions())
if err := f.stateStore.Save(); err != nil {
errMsg = fmt.Sprintf("failed to persist action_queue, error: %s", err)
f.log.Error(errMsg)
f.statusReporter.Update(state.Failed, errMsg, nil)
}

if err := f.dispatcher.Dispatch(context.Background(), f.acker, actions...); err != nil {
errMsg = fmt.Sprintf("failed to dispatch actions, error: %s", err)
f.log.Error(errMsg)
Expand All @@ -194,6 +223,60 @@ func (f *fleetGateway) worker() {
}
}

// queueScheduledActions will add any action in actions with a valid start time to the queue and return the rest.
// start time to current time comparisons are purposefully not made in case of cancel actions.
func (f *fleetGateway) queueScheduledActions(input fleetapi.Actions) []fleetapi.Action {
actions := make([]fleetapi.Action, 0, len(input))
for _, action := range input {
start, err := action.StartTime()
if err == nil {
f.log.Debugf("Adding action id: %s to queue.", action.ID())
f.queue.Add(action, start.Unix())
continue
}
if !stderr.Is(err, fleetapi.ErrNoStartTime) {
f.log.Warnf("Issue gathering start time from action id %s: %v", action.ID(), err)
}
actions = append(actions, action)
}
return actions
}

// dispatchCancelActions will separate and dispatch any cancel actions from the actions list and return the rest of the list.
// cancel actions are dispatched seperatly as they may remove items from the queue.
func (f *fleetGateway) dispatchCancelActions(actions []fleetapi.Action) ([]fleetapi.Action, error) {
// separate cancel actions from the actions list
cancelActions := make([]fleetapi.Action, 0, len(actions))
for i := len(actions) - 1; i >= 0; i-- {
action := actions[i]
if action.Type() == fleetapi.ActionTypeCancel {
cancelActions = append(cancelActions, action)
actions = append(actions[:i], actions[i+1:]...)
}
}
// Dispatch cancel actions
if len(cancelActions) > 0 {
if err := f.dispatcher.Dispatch(context.Background(), f.acker, cancelActions...); err != nil {
return actions, fmt.Errorf("failed to dispatch cancel actions: %w", err)
}
}
return actions, nil
}

// gatherQueuedActions will dequeue actions from the action queue and separate those that have already expired.
func (f *fleetGateway) gatherQueuedActions(ts time.Time) (queued, expired []fleetapi.Action) {
actions := f.queue.DequeueActions()
for _, action := range actions {
exp, _ := action.Expiration()
if ts.After(exp) {
expired = append(expired, action)
continue
}
queued = append(queued, action)
}
return queued, expired
}

func (f *fleetGateway) doExecute() (*fleetapi.CheckinResponse, error) {
f.backoff.Reset()

Expand Down
Loading

0 comments on commit ffe77e8

Please sign in to comment.