From 253ed78cc62d8ec8e73260748e66875589c1d27e Mon Sep 17 00:00:00 2001 From: James Shubin Date: Wed, 13 Feb 2019 15:40:08 -0500 Subject: [PATCH] engine: Rewrite the core algorithm The engine core had some unfortunate bugs that were the result of some early design errors when I wasn't as familiar with channels. I've finally rewritten most of the bad parts, and I think it's much more logical and stable now. This also simplifies the resource API, since more of the work is done completely in the engine, and hidden from view. Lastly, this adds a few new metaparameters and associated code. There are still some open problems left to solve, but hopefully this brings us one step closer. --- docs/resource-guide.md | 65 +--- engine/error.go | 7 +- engine/event/event.go | 83 ----- engine/graph/actions.go | 321 ++++++---------- engine/graph/engine.go | 148 +++++--- engine/graph/graph_test.go | 37 ++ engine/graph/state.go | 352 ++++++------------ engine/graph/vardir.go | 51 +++ engine/metaparams.go | 43 ++- engine/resources.go | 20 +- engine/resources/augeas.go | 19 +- engine/resources/aws_ec2.go | 42 +-- engine/resources/cron.go | 22 +- engine/resources/docker_container.go | 21 +- engine/resources/exec.go | 19 +- engine/resources/exec_test.go | 3 - engine/resources/file.go | 19 +- engine/resources/group.go | 19 +- engine/resources/hostname.go | 21 +- engine/resources/kv.go | 20 +- engine/resources/mount.go | 20 +- engine/resources/msg.go | 28 +- engine/resources/net.go | 20 +- engine/resources/noop.go | 30 +- engine/resources/nspawn.go | 19 +- engine/resources/password.go | 19 +- engine/resources/pkg.go | 21 +- engine/resources/print.go | 28 +- engine/resources/resources_test.go | 18 +- engine/resources/svc.go | 29 +- engine/resources/test.go | 30 +- engine/resources/timer.go | 18 +- engine/resources/user.go | 19 +- engine/resources/virt.go | 25 +- engine/reverse.go | 65 ++++ engine/traits/reverse.go | 48 +++ lang/interpret_test.go | 20 +- .../TestAstFunc1/metaparams0.graph | 26 +- .../TestAstFunc1/metaparams0/main.mcl | 3 + lang/structs.go | 101 ++++- lib/main.go | 6 +- pgraph/graphsync.go | 44 ++- 42 files changed, 890 insertions(+), 1079 deletions(-) delete mode 100644 engine/event/event.go create mode 100644 engine/graph/graph_test.go create mode 100644 engine/graph/vardir.go create mode 100644 engine/reverse.go create mode 100644 engine/traits/reverse.go diff --git a/docs/resource-guide.md b/docs/resource-guide.md index 841ebd0e2..bf27be6ec 100644 --- a/docs/resource-guide.md +++ b/docs/resource-guide.md @@ -307,21 +307,18 @@ running. The lifetime of most resources `Watch` method should be spent in an infinite loop that is bounded by a `select` call. The `select` call is the point where our method hands back control to the engine (and the kernel) so that we can -sleep until something of interest wakes us up. In this loop we must process -events from the engine via the `<-obj.init.Events` channel, and receive events -for our resource itself! +sleep until something of interest wakes us up. In this loop we must wait until +we get a shutdown event from the engine via the `<-obj.init.Done` channel, which +closes when we'd like to shut everything down. At this point you should cleanup, +and let `Watch` close. #### Events -If we receive an internal event from the `<-obj.init.Events` channel, we should -read it with the `obj.init.Read` helper function. This function tells us if we -should shutdown our resource. It also handles pause functionality which blocks -our resource temporarily in this method. If this channel shuts down, then we -should treat that as an exit signal. - -When we want to send an event, we use the `Event` helper function. It is also -important to mark the resource state as `dirty` if we believe it might have -changed. We do this by calling the `obj.init.Dirty` function. +If the `<-obj.init.Done` channel closes, we should shutdown our resource. When +When we want to send an event, we use the `Event` helper function. This +automatically marks the resource state as `dirty`. If you're unsure, it's not +harmful to send the event. This will ultimately cause `CheckApply` to run. This +method can block if the resource is being paused. #### Startup @@ -330,8 +327,7 @@ to generate one event to notify the `mgmt` engine that we're now listening successfully, so that it can run an initial `CheckApply` to ensure we're safely tracking a healthy state and that we didn't miss anything when `Watch` was down or from before `mgmt` was running. You must do this by calling the -`obj.init.Running` method. If it returns an error, you must exit and return that -error. +`obj.init.Running` method. #### Converged @@ -358,41 +354,29 @@ func (obj *FooRes) Watch() error { defer obj.whatever.CloseFoo() // shutdown our Foo // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { select { - case event, ok := <-obj.init.Events: - if !ok { - // shutdown engine - // (it is okay if some `defer` code runs first) - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - // the actual events! case event := <-obj.foo.Events: if is_an_event { send = true - obj.init.Dirty() // dirty } // event errors case err := <-obj.foo.Errors: return err // will cause a retry or permanent failure + + case <-obj.init.Done: // signal for shutdown request + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() } } } @@ -567,23 +551,10 @@ ready to detect changes. Event sends an event notifying the engine of a possible state change. It is only called from within `Watch`. -### Events +### Done -Events is a channel that we must watch for messages from the engine. When it -closes, this is a signal to shutdown. It is -only called from within `Watch`. - -### Read - -Read processes messages that come in from the `Events` channel. It is a helper -method that knows how to handle the pause mechanism correctly. It is -only called from within `Watch`. - -### Dirty - -Dirty marks the resource state as dirty. This signals to the engine that -CheckApply will have some work to do in order to converge it. It is -only called from within `Watch`. +Done is a channel that closes when the engine wants us to shutdown. It is only +called from within `Watch`. ### Refresh diff --git a/engine/error.go b/engine/error.go index e1339ddea..74b3fb303 100644 --- a/engine/error.go +++ b/engine/error.go @@ -24,9 +24,6 @@ type Error string func (e Error) Error() string { return string(e) } const ( - // ErrWatchExit represents an exit from the Watch loop via chan closure. - ErrWatchExit = Error("watch exit") - - // ErrSignalExit represents an exit from the Watch loop via exit signal. - ErrSignalExit = Error("signal exit") + // ErrClosed means we couldn't complete a task because we had closed. + ErrClosed = Error("closed") ) diff --git a/engine/event/event.go b/engine/event/event.go deleted file mode 100644 index 54ec5ff63..000000000 --- a/engine/event/event.go +++ /dev/null @@ -1,83 +0,0 @@ -// Mgmt -// Copyright (C) 2013-2018+ James Shubin and the project contributors -// Written by James Shubin and the project contributors -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -// Package event provides some primitives that are used for message passing. -package event - -//go:generate stringer -type=Kind -output=kind_stringer.go - -// Kind represents the type of event being passed. -type Kind int - -// The different event kinds are used in different contexts. -const ( - KindNil Kind = iota - KindStart - KindPause - KindPoke - KindExit -) - -// Pre-built messages so they can be used directly without having to use NewMsg. -// These are useful when we don't want a response via ACK(). -var ( - Start = &Msg{Kind: KindStart} - Pause = &Msg{Kind: KindPause} // probably unused b/c we want a resp - Poke = &Msg{Kind: KindPoke} - Exit = &Msg{Kind: KindExit} -) - -// Msg is an event primitive that represents a kind of event, and optionally a -// request for an ACK. -type Msg struct { - Kind Kind - - resp chan struct{} -} - -// NewMsg builds a new message struct. It will want an ACK. If you don't want an -// ACK then use the pre-built messages in the package variable globals. -func NewMsg(kind Kind) *Msg { - return &Msg{ - Kind: kind, - resp: make(chan struct{}), - } -} - -// CanACK determines if an ACK is possible for this message. It does not say -// whether one has already been sent or not. -func (obj *Msg) CanACK() bool { - return obj.resp != nil -} - -// ACK acknowledges the event. It must not be called more than once for the same -// event. It unblocks the past and future calls of Wait for this event. -func (obj *Msg) ACK() { - close(obj.resp) -} - -// Wait on ACK for this event. It doesn't matter if this runs before or after -// the ACK. It will unblock either way. -// TODO: consider adding a context if it's ever useful. -func (obj *Msg) Wait() error { - select { - //case <-ctx.Done(): - // return ctx.Err() - case <-obj.resp: - return nil - } -} diff --git a/engine/graph/actions.go b/engine/graph/actions.go index 253c02d69..31d73c6b3 100644 --- a/engine/graph/actions.go +++ b/engine/graph/actions.go @@ -24,12 +24,10 @@ import ( "time" "github.com/purpleidea/mgmt/engine" - "github.com/purpleidea/mgmt/engine/event" "github.com/purpleidea/mgmt/pgraph" - //multierr "github.com/hashicorp/go-multierror" + multierr "github.com/hashicorp/go-multierror" errwrap "github.com/pkg/errors" - "golang.org/x/time/rate" ) // OKTimestamp returns true if this vertex can run right now. @@ -67,26 +65,24 @@ func (obj *Engine) Process(vertex pgraph.Vertex) error { return fmt.Errorf("vertex is not a Res") } - // Engine Guarantee: Do not allow CheckApply to run while we are paused. - // This makes the resource able to know that synchronous channel sending - // to the main loop select in Watch from within CheckApply, will succeed - // without blocking because the resource went into a paused state. If we - // are using the Poll metaparam, then Watch will (of course) not be run. - // FIXME: should this lock be here, or wrapped right around CheckApply ? - obj.state[vertex].eventsLock.Lock() // this lock is taken within Event() - defer obj.state[vertex].eventsLock.Unlock() - // backpoke! (can be async) if vs := obj.BadTimestamps(vertex); len(vs) > 0 { // back poke in parallel (sync b/c of waitgroup) + wg := &sync.WaitGroup{} for _, v := range obj.graph.IncomingGraphVertices(vertex) { if !pgraph.VertexContains(v, vs) { // only poke what's needed continue } - go obj.state[v].Poke() // async + // doesn't really need to be in parallel, but we can... + wg.Add(1) + go func(vv pgraph.Vertex) { + defer wg.Done() + obj.state[vv].Poke() // async + }(v) } + wg.Wait() return nil // can't continue until timestamp is in sequence } @@ -244,14 +240,17 @@ func (obj *Engine) Process(vertex pgraph.Vertex) error { // Worker is the common run frontend of the vertex. It handles all of the retry // and retry delay common code, and ultimately returns the final status of this -// vertex execution. +// vertex execution. This function cannot be "re-run" for the same vertex. The +// retry mechanism stuff happens inside of this. To actually "re-run" you need +// to remove the vertex and build a new one. The engine guarantees that we do +// not allow CheckApply to run while we are paused. That is enforced here. func (obj *Engine) Worker(vertex pgraph.Vertex) error { res, isRes := vertex.(engine.Res) if !isRes { return fmt.Errorf("vertex is not a resource") } - defer close(obj.state[vertex].stopped) // done signal + //defer close(obj.state[vertex].stopped) // done signal obj.state[vertex].cuid = obj.Converger.Register() obj.state[vertex].tuid = obj.Converger.Register() @@ -265,214 +264,140 @@ func (obj *Engine) Worker(vertex pgraph.Vertex) error { obj.state[vertex].wg.Add(1) go func() { defer obj.state[vertex].wg.Done() - defer close(obj.state[vertex].outputChan) // we close this on behalf of res - - var err error - var retry = res.MetaParams().Retry // lookup the retry value - var delay uint64 - for { // retry loop - // a retry-delay was requested, wait, but don't block events! - if delay > 0 { - errDelayExpired := engine.Error("delay exit") - err = func() error { // slim watch main loop - timer := time.NewTimer(time.Duration(delay) * time.Millisecond) - defer obj.state[vertex].init.Logf("the Watch delay expired!") - defer timer.Stop() // it's nice to cleanup - for { - select { - case <-timer.C: // the wait is over - return errDelayExpired // special - - case event, ok := <-obj.state[vertex].init.Events: - if !ok { - return nil - } - if err := obj.state[vertex].init.Read(event); err != nil { - return err - } - } - } - }() - if err == errDelayExpired { - delay = 0 // reset - continue - } - } else if interval := res.MetaParams().Poll; interval > 0 { // poll instead of watching :( - obj.state[vertex].cuid.StartTimer() - err = obj.state[vertex].poll(interval) - obj.state[vertex].cuid.StopTimer() // clean up nicely - } else { - obj.state[vertex].cuid.StartTimer() - obj.Logf("Watch(%s)", vertex) - err = res.Watch() // run the watch normally - obj.Logf("Watch(%s): Exited(%+v)", vertex, err) - obj.state[vertex].cuid.StopTimer() // clean up nicely - } - if err == nil || err == engine.ErrWatchExit || err == engine.ErrSignalExit { - return // exited cleanly, we're done + defer close(obj.state[vertex].eventsChan) // we close this on behalf of res + + // This is a close reverse-multiplexer. If any of the channels + // close, then it will cause the doneChan to close. That way, + // multiple different folks can send a close signal, without + // every worrying about duplicate channel close panics. + obj.state[vertex].wg.Add(1) + go func() { + defer obj.state[vertex].wg.Done() + + // reverse-multiplexer: any close, causes *the* close! + select { + case <-obj.state[vertex].processDone: + case <-obj.state[vertex].watchDone: + case <-obj.state[vertex].removeDone: + case <-obj.state[vertex].eventsDone: } - // we've got an error... - delay = res.MetaParams().Delay - if retry < 0 { // infinite retries - obj.state[vertex].reset() - continue - } - if retry > 0 { // don't decrement past 0 - retry-- - obj.state[vertex].init.Logf("retrying Watch after %.4f seconds (%d left)", float64(delay)/1000, retry) - obj.state[vertex].reset() - continue - } - //if retry == 0 { // optional - // err = errwrap.Wrapf(err, "permanent watch error") - //} - break // break out of this and send the error + // the main "done" signal gets activated here! + close(obj.state[vertex].doneChan) + }() + + obj.Logf("Watch(%s)", vertex) + err := res.Watch() // run the watch normally + obj.Logf("Watch(%s): Exited(%+v)", vertex, err) + if err == nil { // || err == engine.ErrClosed + return // exited cleanly, we're done } + // this section sends an error... // If the CheckApply loop exits and THEN the Watch fails with an // error, then we'd be stuck here if exit signal didn't unblock! select { - case obj.state[vertex].outputChan <- errwrap.Wrapf(err, "watch failed"): + case obj.state[vertex].eventsChan <- errwrap.Wrapf(err, "watch failed"): // send - case <-obj.state[vertex].exit.Signal(): - // pass } }() - // bonus safety check - if res.MetaParams().Burst == 0 && !(res.MetaParams().Limit == rate.Inf) { // blocked - return fmt.Errorf("permanently limited (rate != Inf, burst = 0)") - } - var limiter = rate.NewLimiter(res.MetaParams().Limit, res.MetaParams().Burst) - // It is important that we shutdown the Watch loop if this exits. - // Example, if Process errors permanently, we should ask Watch to exit. - defer obj.state[vertex].Event(event.Exit) // signal an exit - for { + // If this exits cleanly, we must unblock the reverse-multiplexer. + // I think this additional close is unnecessary, but it's not harmful. + defer close(obj.state[vertex].eventsDone) // causes doneChan to close + var reterr error + var failed bool // has Process permanently failed? +Loop: + for { // process loop select { - case err, ok := <-obj.state[vertex].outputChan: // read from watch channel + case err, ok := <-obj.state[vertex].eventsChan: // read from watch channel if !ok { - return nil + return reterr // we only return when chan closes } + // If the Watch method exits with an error, then this + // channel will get that error propagated to it, which + // we then save so we can return it to the caller of us. if err != nil { - return err // permanent failure + failed = true + close(obj.state[vertex].watchDone) // causes doneChan to close + reterr = multierr.Append(reterr, err) // permanent failure + continue + } + if obj.Debug { + obj.Logf("event received") } - // safe to go run the process... - case <-obj.state[vertex].exit.Signal(): // TODO: is this needed? - return nil + case _, ok := <-obj.state[vertex].pokeChan: // read from buffered poke channel + if !ok { // we never close it + panic("unexpected close of poke channel") + } + if obj.Debug { + obj.Logf("poke received") + } + } + if failed { // don't Process anymore if we've already failed... + continue Loop } - now := time.Now() - r := limiter.ReserveN(now, 1) // one event - // r.OK() seems to always be true here! - d := r.DelayFrom(now) - if d > 0 { // delay - obj.state[vertex].init.Logf("limited (rate: %v/sec, burst: %d, next: %v)", res.MetaParams().Limit, res.MetaParams().Burst, d) - var count int - timer := time.NewTimer(time.Duration(d) * time.Millisecond) - LimitWait: - for { - select { - case <-timer.C: // the wait is over - break LimitWait - - // consume other events while we're waiting... - case e, ok := <-obj.state[vertex].outputChan: // read from watch channel - if !ok { - // FIXME: is this logic correct? - if count == 0 { - return nil - } - // loop, because we have - // the previous event to - // run process on first! - continue - } - if e != nil { - return e // permanent failure - } - count++ // count the events... - limiter.ReserveN(time.Now(), 1) // one event - } + // drop redundant pokes + for len(obj.state[vertex].pokeChan) > 0 { + select { + case <-obj.state[vertex].pokeChan: + default: + // race, someone else read one! } - timer.Stop() // it's nice to cleanup - obj.state[vertex].init.Logf("rate limiting expired!") } - var err error - var retry = res.MetaParams().Retry // lookup the retry value - var delay uint64 - Loop: - for { // retry loop - if delay > 0 { - var count int - timer := time.NewTimer(time.Duration(delay) * time.Millisecond) - RetryWait: - for { - select { - case <-timer.C: // the wait is over - break RetryWait - - // consume other events while we're waiting... - case e, ok := <-obj.state[vertex].outputChan: // read from watch channel - if !ok { - // FIXME: is this logic correct? - if count == 0 { - // last process error - return err - } - // loop, because we have - // the previous event to - // run process on first! - continue - } - if e != nil { - return e // permanent failure - } - count++ // count the events... - limiter.ReserveN(time.Now(), 1) // one event - } - } - timer.Stop() // it's nice to cleanup - delay = 0 // reset - obj.state[vertex].init.Logf("the CheckApply delay expired!") + // pause if one was requested... + select { + case <-obj.state[vertex].pauseSignal: // channel closes + // NOTE: If we allowed a doneChan below to let us out + // of the resumeSignal wait, then we could loop around + // and run this again, causing a panic. Instead of this + // being made safe with a sync.Once, we instead run a + // Resume() call inside of the vertexRemoveFn function, + // which should unblock it when we're going to need to. + obj.state[vertex].pausedAck.Ack() // send ack + // we are paused now, and waiting for resume or exit... + select { + case <-obj.state[vertex].resumeSignal: // channel closes + // resumed! + // pass through to allow a Process to try to run + // TODO: consider adding this fast pause here... + //if obj.fastPause { + // obj.Logf("fast pausing on resume") + // continue + //} } + default: + // no pause requested, keep going... + } - if obj.Debug { - obj.Logf("Process(%s)", vertex) - } - err = obj.Process(vertex) - if obj.Debug { - obj.Logf("Process(%s): Return(%+v)", vertex, err) - } - if err == nil { - break Loop - } - // we've got an error... - delay = res.MetaParams().Delay + var err error + if obj.Debug { + obj.Logf("Process(%s)", vertex) + } + err = obj.Process(vertex) + if obj.Debug { + obj.Logf("Process(%s): Return(%+v)", vertex, err) + } - if retry < 0 { // infinite retries - continue - } - if retry > 0 { // don't decrement past 0 - retry-- - obj.state[vertex].init.Logf("retrying CheckApply after %.4f seconds (%d left)", float64(delay)/1000, retry) - continue - } - //if retry == 0 { // optional - // err = errwrap.Wrapf(err, "permanent process error") - //} - - // If this exits, defer calls: obj.Event(event.Exit), - // which will cause the Watch loop to shutdown. Also, - // if the Watch loop shuts down, that will cause this - // Process loop to shut down. Also the graph sync can - // run an: obj.Event(event.Exit) which causes this to - // shutdown as well. Lastly, it is possible that more - // that one of these scenarios happens simultaneously. - return err + // It is important that we shutdown the Watch loop if this dies. + // If Process fails permanently, we ask it to exit right here... + if err != nil { + failed = true + close(obj.state[vertex].processDone) // causes doneChan to close + reterr = multierr.Append(reterr, err) // permanent failure + continue } - } + + // When this Process loop exits, it's because something has + // caused Watch() to shutdown (even if it's our permanent + // failure from Process), which caused this channel to close. + // On or more exit signals are possible, and more than one can + // happen simultaneously. + + } // process loop + //return nil // unreachable } diff --git a/engine/graph/engine.go b/engine/graph/engine.go index f47289b18..a3495b6b8 100644 --- a/engine/graph/engine.go +++ b/engine/graph/engine.go @@ -25,7 +25,6 @@ import ( "github.com/purpleidea/mgmt/converger" "github.com/purpleidea/mgmt/engine" - "github.com/purpleidea/mgmt/engine/event" "github.com/purpleidea/mgmt/pgraph" "github.com/purpleidea/mgmt/util/semaphore" @@ -50,13 +49,14 @@ type Engine struct { graph *pgraph.Graph nextGraph *pgraph.Graph state map[pgraph.Vertex]*State - waits map[pgraph.Vertex]*sync.WaitGroup + waits map[pgraph.Vertex]*sync.WaitGroup // wg for the Worker func slock *sync.Mutex // semaphore lock semas map[string]*semaphore.Semaphore - wg *sync.WaitGroup + wg *sync.WaitGroup // wg for the whole engine (only used for close) + paused bool // are we paused? fastPause bool } @@ -84,6 +84,8 @@ func (obj *Engine) Init() error { obj.wg = &sync.WaitGroup{} + obj.paused = true // start off true, so we can Resume after first Commit + return nil } @@ -137,6 +139,7 @@ func (obj *Engine) Apply(fn func(*pgraph.Graph) error) error { func (obj *Engine) Commit() error { // TODO: Does this hurt performance or graph changes ? + start := []func() error{} // functions to run after graphsync to start... vertexAddFn := func(vertex pgraph.Vertex) error { // some of these validation steps happen before this Commit step // in Validate() to avoid erroring here. These are redundant. @@ -192,12 +195,36 @@ func (obj *Engine) Commit() error { if err := obj.state[vertex].Init(); err != nil { return errwrap.Wrapf(err, "the Res did not Init") } + + fn := func() error { + // start the Worker + obj.wg.Add(1) + obj.waits[vertex].Add(1) + go func(v pgraph.Vertex) { + defer obj.wg.Done() + defer obj.waits[v].Done() + + obj.Logf("Worker(%s)", v) + // contains the Watch and CheckApply loops + err := obj.Worker(v) + obj.Logf("Worker(%s): Exited(%+v)", v, err) + obj.state[v].workerErr = err // store the error + // If the Rewatch metaparam is true, then this will get + // restarted if we do a graph cmp swap. This is why the + // graph cmp function runs the removes before the adds. + // XXX: This should feed into an $error var in the lang. + }(vertex) + return nil + } + start = append(start, fn) // do this at the end, if it's needed return nil } + free := []func() error{} // functions to run after graphsync to reset... vertexRemoveFn := func(vertex pgraph.Vertex) error { // wait for exit before starting new graph! - obj.state[vertex].Event(event.Exit) // signal an exit + close(obj.state[vertex].removeDone) // causes doneChan to close + obj.state[vertex].Resume() // unblock from resume obj.waits[vertex].Wait() // sync // close the state and resource @@ -216,15 +243,58 @@ func (obj *Engine) Commit() error { return nil } + // add the Worker swap (reload) on error decision into this vertexCmpFn + vertexCmpFn := func(v1, v2 pgraph.Vertex) (bool, error) { + r1, ok1 := v1.(engine.Res) + r2, ok2 := v2.(engine.Res) + if !ok1 || !ok2 { // should not happen, previously validated + return false, fmt.Errorf("not a Res") + } + m1 := r1.MetaParams() + m2 := r2.MetaParams() + swap1, swap2 := true, true // assume default of true + if m1 != nil { + swap1 = m1.Rewatch + } + if m2 != nil { + swap2 = m2.Rewatch + } + + s1, ok1 := obj.state[v1] + s2, ok2 := obj.state[v2] + x1, x2 := false, false + if ok1 { + x1 = s1.workerErr != nil && swap1 + } + if ok2 { + x2 = s2.workerErr != nil && swap2 + } + + if x1 || x2 { + // We swap, even if they're the same, so that we reload! + // This causes an add and remove of the "same" vertex... + return false, nil + } + + return engine.VertexCmpFn(v1, v2) // do the normal cmp otherwise + } + // If GraphSync succeeds, it updates the receiver graph accordingly... // Running the shutdown in vertexRemoveFn does not need to happen in a // topologically sorted order because it already paused in that order. obj.Logf("graph sync...") - if err := obj.graph.GraphSync(obj.nextGraph, engine.VertexCmpFn, vertexAddFn, vertexRemoveFn, engine.EdgeCmpFn); err != nil { + if err := obj.graph.GraphSync(obj.nextGraph, vertexCmpFn, vertexAddFn, vertexRemoveFn, engine.EdgeCmpFn); err != nil { return errwrap.Wrapf(err, "error running graph sync") } - // we run these afterwards, so that the state structs (that might get - // referenced) aren't destroyed while someone might poke or use one. + // We run these afterwards, so that we don't unnecessarily start anyone + // if GraphSync failed in some way. Otherwise we'd have to do clean up! + for _, fn := range start { + if err := fn(); err != nil { + return errwrap.Wrapf(err, "error running start fn") + } + } + // We run these afterwards, so that the state structs (that might get + // referenced) are not destroyed while someone might poke or use one. for _, fn := range free { if err := fn(); err != nil { return errwrap.Wrapf(err, "error running free fn") @@ -248,50 +318,28 @@ func (obj *Engine) Commit() error { return nil } -// Start runs the currently active graph. It also un-pauses the graph if it was -// paused. -func (obj *Engine) Start() error { +// Resume runs the currently active graph. It also un-pauses the graph if it was +// paused. Very little that is interesting should happen here. It all happens in +// the Commit method. After Commit, new things are already started, but we still +// need to Resume any pre-existing resources. +func (obj *Engine) Resume() error { + if !obj.paused { + return fmt.Errorf("already resumed") + } + topoSort, err := obj.graph.TopologicalSort() if err != nil { return err } - indegree := obj.graph.InDegree() // compute all of the indegree's + //indegree := obj.graph.InDegree() // compute all of the indegree's reversed := pgraph.Reverse(topoSort) for _, vertex := range reversed { - state := obj.state[vertex] - state.starter = (indegree[vertex] == 0) - var unpause = true // assume true - - if !state.working { // if not running... - state.working = true - unpause = false // doesn't need unpausing if starting - obj.wg.Add(1) - obj.waits[vertex].Add(1) - go func(v pgraph.Vertex) { - defer obj.wg.Done() - defer obj.waits[vertex].Done() - defer func() { - obj.state[v].working = false - }() - - obj.Logf("Worker(%s)", v) - // contains the Watch and CheckApply loops - err := obj.Worker(v) - obj.Logf("Worker(%s): Exited(%+v)", v, err) - }(vertex) - } - - select { - case <-state.started: - case <-state.stopped: // we failed on Watch start - } - - if unpause { // unpause (if needed) - obj.state[vertex].Event(event.Start) - } + //obj.state[vertex].starter = (indegree[vertex] == 0) + obj.state[vertex].Resume() // doesn't error } // we wait for everyone to start before exiting! + obj.paused = false return nil } @@ -302,22 +350,32 @@ func (obj *Engine) Start() error { // This is because once you've started a fast pause, some dependencies might // have been skipped when fast pausing, and future resources might have missed a // poke. In general this is only called when you're trying to hurry up the exit. +// XXX: Not implemented func (obj *Engine) SetFastPause() { obj.fastPause = true } -// Pause the active, running graph. At the moment this cannot error. -func (obj *Engine) Pause(fastPause bool) { +// Pause the active, running graph. +func (obj *Engine) Pause(fastPause bool) error { + if obj.paused { + return fmt.Errorf("already paused") + } + obj.fastPause = fastPause topoSort, _ := obj.graph.TopologicalSort() for _, vertex := range topoSort { // squeeze out the events... // The Event is sent to an unbuffered channel, so this event is // synchronous, and as a result it blocks until it is received. - obj.state[vertex].Event(event.Pause) + if err := obj.state[vertex].Pause(); err != nil && err != engine.ErrClosed { + return err + } } + obj.paused = true + // we are now completely paused... obj.fastPause = false // reset + return nil } // Close triggers a shutdown. Engine must be already paused before this is run. diff --git a/engine/graph/graph_test.go b/engine/graph/graph_test.go new file mode 100644 index 000000000..bb4a074e2 --- /dev/null +++ b/engine/graph/graph_test.go @@ -0,0 +1,37 @@ +// Mgmt +// Copyright (C) 2013-2018+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +// +build !root + +package graph + +import ( + "fmt" + "testing" + + multierr "github.com/hashicorp/go-multierror" +) + +func TestMultiErr(t *testing.T) { + var err error + e := fmt.Errorf("some error") + err = multierr.Append(err, e) // build an error from a nil base + // ensure that this lib allows us to append to a nil + if err == nil { + t.Errorf("missing error") + } +} diff --git a/engine/graph/state.go b/engine/graph/state.go index 6f714591f..4c98e0ba7 100644 --- a/engine/graph/state.go +++ b/engine/graph/state.go @@ -19,14 +19,11 @@ package graph import ( "fmt" - "os" - "path" "sync" "time" "github.com/purpleidea/mgmt/converger" "github.com/purpleidea/mgmt/engine" - "github.com/purpleidea/mgmt/engine/event" "github.com/purpleidea/mgmt/pgraph" "github.com/purpleidea/mgmt/util" @@ -61,29 +58,49 @@ type State struct { timestamp int64 // last updated timestamp isStateOK bool // is state OK or do we need to run CheckApply ? - - // events is a channel of incoming events which is read by the Watch - // loop for that resource. It receives events like pause, start, and - // poke. The channel shuts down to signal for Watch to exit. - eventsChan chan *event.Msg // incoming to resource - eventsLock *sync.Mutex // lock around sending and closing of events channel - eventsDone bool // is channel closed? - - // outputChan is the channel that the engine listens on for events from + workerErr error // did the Worker error? + + // doneChan closes when Watch should shut down. When any of the + // following channels close, it causes this to close. + doneChan chan struct{} + + // processDone is closed when the Process/CheckApply function fails + // permanently, and wants to cause Watch to exit. + processDone chan struct{} + // watchDone is closed when the Watch function fails permanently, and we + // close this to signal we should definitely exit. (Often redundant.) + watchDone chan struct{} + // removeDone is closed when the vertexRemoveFn method asks for an exit. + // This happens when we're switching graphs. The switch to an "empty" is + // the equivalent of asking for a final shutdown. + removeDone chan struct{} + // eventsDone is closed when we shutdown the Process loop because we + // closed without error. In theory this shouldn't happen, but it could + // if Watch returns without error for some reason. + eventsDone chan struct{} + + // eventsChan is the channel that the engine listens on for events from // the Watch loop for that resource. The event is nil normally, except // when events are sent on this channel from the engine. This only // happens as a signaling mechanism when Watch has shutdown and we want // to notify the Process loop which reads from this. - outputChan chan error // outgoing from resource + eventsChan chan error // outgoing from resource - wg *sync.WaitGroup - exit *util.EasyExit + // pokeChan is a separate channel that the Process loop listens on to + // know when we might need to run Process. It never closes, and is safe + // to send on since it is buffered. + pokeChan chan struct{} // outgoing from resource - started chan struct{} // closes when it's started - stopped chan struct{} // closes when it's stopped + // paused represents if this particular res is paused or not. + paused bool + // pauseSignal closes to request a pause of this resource. + pauseSignal chan struct{} + // resumeSignal closes to request a resume of this resource. + resumeSignal chan struct{} + // pausedAck is used to send an ack message saying that we've paused. + pausedAck *util.EasyAck - starter bool // do we have an indegree of 0 ? - working bool // is the Main() loop running ? + wg *sync.WaitGroup // used for all vertex specific processes cuid *converger.UID // primary converger tuid *converger.UID // secondary converger @@ -93,17 +110,6 @@ type State struct { // Init initializes structures like channels. func (obj *State) Init() error { - obj.eventsChan = make(chan *event.Msg) - obj.eventsLock = &sync.Mutex{} - - obj.outputChan = make(chan error) - - obj.wg = &sync.WaitGroup{} - obj.exit = util.NewEasyExit() - - obj.started = make(chan struct{}) - obj.stopped = make(chan struct{}) - res, isRes := obj.Vertex.(engine.Res) if !isRes { return fmt.Errorf("vertex is not a Res") @@ -121,6 +127,24 @@ func (obj *State) Init() error { return fmt.Errorf("the Logf function is missing") } + obj.doneChan = make(chan struct{}) + + obj.processDone = make(chan struct{}) + obj.watchDone = make(chan struct{}) + obj.removeDone = make(chan struct{}) + obj.eventsDone = make(chan struct{}) + + obj.eventsChan = make(chan error) + + obj.pokeChan = make(chan struct{}, 1) // must be buffered + + //obj.paused = false // starts off as started + obj.pauseSignal = make(chan struct{}) + //obj.resumeSignal = make(chan struct{}) // happens on pause + //obj.pausedAck = util.NewEasyAck() // happens on pause + + obj.wg = &sync.WaitGroup{} + //obj.cuid = obj.Converger.Register() // gets registered in Worker() //obj.tuid = obj.Converger.Register() // gets registered in Worker() @@ -129,24 +153,9 @@ func (obj *State) Init() error { Hostname: obj.Hostname, // Watch: - Running: func() error { - obj.tuid.StopTimer() - close(obj.started) // this is reset in the reset func - obj.isStateOK = false // assume we're initially dirty - // optimization: skip the initial send if not a starter - // because we'll get poked from a starter soon anyways! - if !obj.starter { - return nil - } - return obj.event() - }, - Event: obj.event, - Events: obj.eventsChan, - Read: obj.read, - Dirty: func() { // TODO: should we rename this SetDirty? - obj.tuid.StopTimer() - obj.isStateOK = false - }, + Running: obj.event, + Event: obj.event, + Done: obj.doneChan, // CheckApply: Refresh: func() bool { @@ -231,187 +240,91 @@ func (obj *State) Close() error { return err } -// reset is run to reset the state so that Watch can run a second time. Thus is -// needed for the Watch retry in particular. -func (obj *State) reset() { - obj.started = make(chan struct{}) - obj.stopped = make(chan struct{}) -} - -// Poke sends a nil message on the outputChan. This channel is used by the -// resource to signal a possible change. This will cause the Process loop to -// run if it can. +// Poke sends a notification on the poke channel. This channel is used to notify +// the Worker to run the Process/CheckApply when it can. This is used when there +// is a need to schedule or reschedule some work which got postponed or dropped. +// This doesn't contain any internal synchronization primitives or wait groups, +// callers are expected to make sure that they don't leave any of these running +// by the time the Worker() shuts down. func (obj *State) Poke() { - // add a wait group on the vertex we're poking! - obj.wg.Add(1) - defer obj.wg.Done() - - // now that we've added to the wait group, obj.outputChan won't close... - // so see if there's an exit signal before we release the wait group! - // XXX: i don't think this is necessarily happening, but maybe it is? - // XXX: re-write some of the engine to ensure that: "the sender closes"! - select { - case <-obj.exit.Signal(): - return // skip sending the poke b/c we're closing - default: - } + // redundant + //if len(obj.pokeChan) > 0 { + // return + //} select { - case obj.outputChan <- nil: - - case <-obj.exit.Signal(): + case obj.pokeChan <- struct{}{}: + default: // if chan is now full because more than one poke happened... } } -// Event sends a Pause or Start event to the resource. It can also be used to -// send Poke events, but it's much more efficient to send them directly instead -// of passing them through the resource. -func (obj *State) Event(msg *event.Msg) { - // TODO: should these happen after the lock? - obj.wg.Add(1) - defer obj.wg.Done() - - obj.eventsLock.Lock() - defer obj.eventsLock.Unlock() - - if obj.eventsDone { // closing, skip events... - return +// Pause pauses this resource. It should not be called on any already paused +// resource. It will block until the resource pauses with an acknowledgment, or +// until an exit for that resource is seen. If the latter happens it will error. +// It is NOT thread-safe with the Resume() method so only call either one at a +// time. +func (obj *State) Pause() error { + if obj.paused { + return fmt.Errorf("already paused") } - if msg.Kind == event.KindExit { // set this so future events don't deadlock - obj.Logf("exit event...") - obj.eventsDone = true - close(obj.eventsChan) // causes resource Watch loop to close - obj.exit.Done(nil) // trigger exit signal to unblock some cases - return - } + obj.pausedAck = util.NewEasyAck() + obj.resumeSignal = make(chan struct{}) // build the resume signal + close(obj.pauseSignal) + obj.Poke() // unblock and notice the pause if necessary + // wait for ack (or exit signal) select { - case obj.eventsChan <- msg: - - case <-obj.exit.Signal(): - } -} - -// read is a helper function used inside the main select statement of resources. -// If it returns an error, then this is a signal for the resource to exit. -func (obj *State) read(msg *event.Msg) error { - switch msg.Kind { - case event.KindPoke: - return obj.event() // a poke needs to cause an event... - case event.KindStart: - return fmt.Errorf("unexpected start") - case event.KindPause: - // pass - case event.KindExit: - return engine.ErrSignalExit - - default: - return fmt.Errorf("unhandled event: %+v", msg.Kind) + case <-obj.pausedAck.Wait(): // we got it! + // we're paused + case <-obj.doneChan: + return engine.ErrClosed } + obj.paused = true - // we're paused now - select { - case msg, ok := <-obj.eventsChan: - if !ok { - return engine.ErrWatchExit - } - switch msg.Kind { - case event.KindPoke: - return fmt.Errorf("unexpected poke") - case event.KindPause: - return fmt.Errorf("unexpected pause") - case event.KindStart: - // resumed - return nil - case event.KindExit: - return engine.ErrSignalExit + return nil +} - default: - return fmt.Errorf("unhandled event: %+v", msg.Kind) - } +// Resume unpauses this resource. It can be safely called on a brand-new +// resource that has just started running without incident. It is NOT +// thread-safe with the Pause() method, so only call either one at a time. +func (obj *State) Resume() { + // TODO: do we need a mutex around Resume? + if !obj.paused { // no need to unpause brand-new resources + return } -} -// event is a helper function to send an event from the resource Watch loop. It -// can be used for the initial `running` event, or any regular event. If it -// returns an error, then the Watch loop must return this error and shutdown. -func (obj *State) event() error { - // loop until we sent on obj.outputChan or exit with error - for { - select { - // send "activity" event - case obj.outputChan <- nil: - return nil // sent event! + obj.pauseSignal = make(chan struct{}) // rebuild for next pause + close(obj.resumeSignal) + //obj.Poke() // not needed, we're already waiting for resume - // make sure to keep handling incoming - case msg, ok := <-obj.eventsChan: - if !ok { - return engine.ErrWatchExit - } - switch msg.Kind { - case event.KindPoke: - // we're trying to send an event, so swallow the - // poke: it's what we wanted to have happen here - continue - case event.KindStart: - return fmt.Errorf("unexpected start") - case event.KindPause: - // pass - case event.KindExit: - return engine.ErrSignalExit - - default: - return fmt.Errorf("unhandled event: %+v", msg.Kind) - } - } + obj.paused = false - // we're paused now - select { - case msg, ok := <-obj.eventsChan: - if !ok { - return engine.ErrWatchExit - } - switch msg.Kind { - case event.KindPoke: - return fmt.Errorf("unexpected poke") - case event.KindPause: - return fmt.Errorf("unexpected pause") - case event.KindStart: - // resumed - case event.KindExit: - return engine.ErrSignalExit - - default: - return fmt.Errorf("unhandled event: %+v", msg.Kind) - } - } - } + // no need to wait for it to resume + //return // implied } -// varDir returns the path to a working directory for the resource. It will try -// and create the directory first, and return an error if this failed. The dir -// should be cleaned up by the resource on Close if it wishes to discard the -// contents. If it does not, then a future resource with the same kind and name -// may see those contents in that directory. The resource should clean up the -// contents before use if it is important that nothing exist. It is always -// possible that contents could remain after an abrupt crash, so do not store -// overly sensitive data unless you're aware of the risks. -func (obj *State) varDir(extra string) (string, error) { - // Using extra adds additional dirs onto our namespace. An empty extra - // adds no additional directories. - if obj.Prefix == "" { // safety - return "", fmt.Errorf("the VarDir prefix is empty") - } +// event is a helper function to send an event to the CheckApply process loop. +// It can be used for the initial `running` event, or any regular event. You +// should instead use Poke() to "schedule" a new Process/CheckApply loop when +// one might be needed. This method will block until we're unpaused and ready to +// receive on the events channel. +func (obj *State) event() { + obj.setDirty() // assume we're initially dirty - // an empty string at the end has no effect - p := fmt.Sprintf("%s/", path.Join(obj.Prefix, extra)) - if err := os.MkdirAll(p, 0770); err != nil { - return "", errwrap.Wrapf(err, "can't create prefix in: %s", p) + select { + case obj.eventsChan <- nil: + // send! } - // returns with a trailing slash as per the mgmt file res convention - return p, nil + //return // implied +} + +// setDirty marks the resource state as dirty. This signals to the engine that +// CheckApply will have some work to do in order to converge it. +func (obj *State) setDirty() { + obj.tuid.StopTimer() + obj.isStateOK = false } // poll is a replacement for Watch when the Poll metaparameter is used. @@ -420,34 +333,17 @@ func (obj *State) poll(interval uint32) error { ticker := time.NewTicker(time.Duration(interval) * time.Second) defer ticker.Stop() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running - var send = false // send event? for { select { case <-ticker.C: // received the timer event obj.init.Logf("polling...") - send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // signal for shutdown request + return nil } - // do all our event sending all together to avoid duplicate msgs - if send { - send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } - } + obj.init.Event() // notify engine of an event (this can block) } } diff --git a/engine/graph/vardir.go b/engine/graph/vardir.go new file mode 100644 index 000000000..9fcd8f215 --- /dev/null +++ b/engine/graph/vardir.go @@ -0,0 +1,51 @@ +// Mgmt +// Copyright (C) 2013-2018+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package graph + +import ( + "fmt" + "os" + "path" + + errwrap "github.com/pkg/errors" +) + +// varDir returns the path to a working directory for the resource. It will try +// and create the directory first, and return an error if this failed. The dir +// should be cleaned up by the resource on Close if it wishes to discard the +// contents. If it does not, then a future resource with the same kind and name +// may see those contents in that directory. The resource should clean up the +// contents before use if it is important that nothing exist. It is always +// possible that contents could remain after an abrupt crash, so do not store +// overly sensitive data unless you're aware of the risks. +func (obj *State) varDir(extra string) (string, error) { + // Using extra adds additional dirs onto our namespace. An empty extra + // adds no additional directories. + if obj.Prefix == "" { // safety + return "", fmt.Errorf("the VarDir prefix is empty") + } + + // an empty string at the end has no effect + p := fmt.Sprintf("%s/", path.Join(obj.Prefix, extra)) + if err := os.MkdirAll(p, 0770); err != nil { + return "", errwrap.Wrapf(err, "can't create prefix in: %s", p) + } + + // returns with a trailing slash as per the mgmt file res convention + return p, nil +} diff --git a/engine/metaparams.go b/engine/metaparams.go index 23ad2d0df..773cd4bc0 100644 --- a/engine/metaparams.go +++ b/engine/metaparams.go @@ -37,6 +37,8 @@ var DefaultMetaParams = &MetaParams{ Limit: rate.Inf, // defaults to no limit Burst: 0, // no burst needed on an infinite rate //Sema: []string{}, + Rewatch: true, + Realize: false, // true would be more awesome, but unexpected for users } // MetaRes is the interface a resource must implement to support meta params. @@ -81,6 +83,24 @@ type MetaParams struct { // has a count equal to 1, is different from a sema named `foo:1` which // also has a count equal to 1, but is a different semaphore. Sema []string `yaml:"sema"` + + // Rewatch specifies whether we re-run the Watch worker during a swap + // if it has errored. When doing a GraphCmp to swap the graphs, if this + // is true, and this particular worker has errored, then we'll remove it + // and add it back as a new vertex, thus causing it to run again. This + // is different from the Retry metaparam which applies during the normal + // execution. It is only when this is exhausted that we're in permanent + // worker failure, and only then can we rely on this metaparam. + Rewatch bool `yaml:"rewatch"` + + // Realize ensures that the resource is guaranteed to converge at least + // once before a potential graph swap removes or changes it. This + // guarantee is useful for fast changing graphs, to ensure that the + // brief creation of a resource is seen. This guarantee does not prevent + // against the engine quitting normally, and it can't guarantee it if + // the resource is blocked because of a failed pre-requisite resource. + // XXX: Not implemented! + Realize bool `yaml:"realize"` } // Cmp compares two AutoGroupMeta structs and determines if they're equivalent. @@ -118,6 +138,13 @@ func (obj *MetaParams) Cmp(meta *MetaParams) error { return errwrap.Wrapf(err, "values for Sema are different") } + if obj.Rewatch != meta.Rewatch { + return fmt.Errorf("values for Rewatch are different") + } + if obj.Realize != meta.Realize { + return fmt.Errorf("values for Realize are different") + } + return nil } @@ -147,13 +174,15 @@ func (obj *MetaParams) Copy() *MetaParams { copy(sema, obj.Sema) } return &MetaParams{ - Noop: obj.Noop, - Retry: obj.Retry, - Delay: obj.Delay, - Poll: obj.Poll, - Limit: obj.Limit, // FIXME: can we copy this type like this? test me! - Burst: obj.Burst, - Sema: sema, + Noop: obj.Noop, + Retry: obj.Retry, + Delay: obj.Delay, + Poll: obj.Poll, + Limit: obj.Limit, // FIXME: can we copy this type like this? test me! + Burst: obj.Burst, + Sema: sema, + Rewatch: obj.Rewatch, + Realize: obj.Realize, } } diff --git a/engine/resources.go b/engine/resources.go index fec342edc..abb76b051 100644 --- a/engine/resources.go +++ b/engine/resources.go @@ -21,8 +21,6 @@ import ( "encoding/gob" "fmt" - "github.com/purpleidea/mgmt/engine/event" - errwrap "github.com/pkg/errors" "gopkg.in/yaml.v2" ) @@ -93,22 +91,14 @@ type Init struct { // Called from within Watch: // Running must be called after your watches are all started and ready. - Running func() error + Running func() // Event sends an event notifying the engine of a possible state change. - Event func() error - - // Events returns a channel that we must watch for messages from the - // engine. When it closes, this is a signal to shutdown. - Events chan *event.Msg - - // Read processes messages that come in from the Events channel. It is a - // helper method that knows how to handle the pause mechanism correctly. - Read func(*event.Msg) error + Event func() - // Dirty marks the resource state as dirty. This signals to the engine - // that CheckApply will have some work to do in order to converge it. - Dirty func() + // Done returns a channel that will close to signal to us that it's time + // for us to shutdown. + Done chan struct{} // Called from within CheckApply: diff --git a/engine/resources/augeas.go b/engine/resources/augeas.go index 7e67d1be3..bb4e3aed0 100644 --- a/engine/resources/augeas.go +++ b/engine/resources/augeas.go @@ -135,10 +135,7 @@ func (obj *AugeasRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -158,23 +155,15 @@ func (obj *AugeasRes) Watch() error { obj.init.Logf("Event(%s): %v", event.Body.Name, event.Body.Op) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/aws_ec2.go b/engine/resources/aws_ec2.go index a7a30e187..7dbe831e8 100644 --- a/engine/resources/aws_ec2.go +++ b/engine/resources/aws_ec2.go @@ -423,9 +423,7 @@ func (obj *AwsEc2Res) longpollWatch() error { // We tell the engine that we're running right away. This is not correct, // but the api doesn't have a way to signal when the waiters are ready. - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running // cancellable context used for exiting cleanly ctx, cancel := context.WithCancel(context.TODO()) @@ -488,14 +486,6 @@ func (obj *AwsEc2Res) longpollWatch() error { // process events from the goroutine for { select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - case msg, ok := <-obj.awsChan: if !ok { return nil @@ -509,15 +499,16 @@ func (obj *AwsEc2Res) longpollWatch() error { continue default: obj.init.Logf("State: %v", msg.state) - obj.init.Dirty() // dirty send = true } + + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } + if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } @@ -587,14 +578,6 @@ func (obj *AwsEc2Res) snsWatch() error { // process events for { select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - case msg, ok := <-obj.awsChan: if !ok { return nil @@ -607,20 +590,19 @@ func (obj *AwsEc2Res) snsWatch() error { // is confirmed, we are ready to receive events, so we // can notify the engine that we're running. if msg.event == awsEc2EventWatchReady { - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running continue } obj.init.Logf("State: %v", msg.event) - obj.init.Dirty() // dirty send = true + + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } + if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/cron.go b/engine/resources/cron.go index 568d06034..195c38853 100644 --- a/engine/resources/cron.go +++ b/engine/resources/cron.go @@ -271,10 +271,7 @@ func (obj *CronRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -285,7 +282,7 @@ func (obj *CronRes) Watch() error { obj.init.Logf("%+v", event) } send = true - obj.init.Dirty() // dirty + case event, ok := <-obj.recWatcher.Events(): // process unit file recwatch events if !ok { // channel shutdown @@ -298,21 +295,14 @@ func (obj *CronRes) Watch() error { obj.init.Logf("Event(%s): %v", event.Body.Name, event.Body.Op) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/docker_container.go b/engine/resources/docker_container.go index 399e3513b..ed9acb213 100644 --- a/engine/resources/docker_container.go +++ b/engine/resources/docker_container.go @@ -168,10 +168,7 @@ func (obj *DockerContainerRes) Watch() error { eventChan, errChan := obj.client.Events(ctx, types.EventsOptions{}) - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -184,27 +181,21 @@ func (obj *DockerContainerRes) Watch() error { obj.init.Logf("%+v", event) } send = true - obj.init.Dirty() // dirty + case err, ok := <-errChan: if !ok { return nil } return err - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/exec.go b/engine/resources/exec.go index b70e59579..4c697a546 100644 --- a/engine/resources/exec.go +++ b/engine/resources/exec.go @@ -157,10 +157,7 @@ func (obj *ExecRes) Watch() error { ioChan = obj.bufioChanScanner(scanner) } - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -180,24 +177,16 @@ func (obj *ExecRes) Watch() error { obj.init.Logf("watch output: %s", data.text) if data.text != "" { send = true - obj.init.Dirty() // dirty } - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/exec_test.go b/engine/resources/exec_test.go index 7adb0054d..d63e33f46 100644 --- a/engine/resources/exec_test.go +++ b/engine/resources/exec_test.go @@ -31,9 +31,6 @@ func fakeInit(t *testing.T) *engine.Init { t.Logf("test: "+format, v...) } return &engine.Init{ - Running: func() error { - return nil - }, Debug: debug, Logf: logf, } diff --git a/engine/resources/file.go b/engine/resources/file.go index bc31075d3..97782f9e2 100644 --- a/engine/resources/file.go +++ b/engine/resources/file.go @@ -194,10 +194,7 @@ func (obj *FileRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -217,23 +214,15 @@ func (obj *FileRes) Watch() error { obj.init.Logf("Event(%s): %v", event.Body.Name, event.Body.Op) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/group.go b/engine/resources/group.go index a5efaee04..38cc11cd2 100644 --- a/engine/resources/group.go +++ b/engine/resources/group.go @@ -85,10 +85,7 @@ func (obj *GroupRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -108,23 +105,15 @@ func (obj *GroupRes) Watch() error { obj.init.Logf("Event(%s): %v", event.Body.Name, event.Body.Op) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/hostname.go b/engine/resources/hostname.go index 15e8c9025..f26a191d5 100644 --- a/engine/resources/hostname.go +++ b/engine/resources/hostname.go @@ -127,33 +127,22 @@ func (obj *HostnameRes) Watch() error { signals := make(chan *dbus.Signal, 10) // closed by dbus package bus.Signal(signals) - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { select { case <-signals: send = true - obj.init.Dirty() // dirty - - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/kv.go b/engine/resources/kv.go index 0a6a063b4..65a0ecec8 100644 --- a/engine/resources/kv.go +++ b/engine/resources/kv.go @@ -102,11 +102,7 @@ func (obj *KVRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *KVRes) Watch() error { - - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running ch := obj.init.World.StrMapWatch(obj.Key) // get possible events! @@ -125,23 +121,15 @@ func (obj *KVRes) Watch() error { obj.init.Logf("Event!") } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/mount.go b/engine/resources/mount.go index f5cfcc298..4fe20de3c 100644 --- a/engine/resources/mount.go +++ b/engine/resources/mount.go @@ -224,10 +224,7 @@ func (obj *MountRes) Watch() error { // close the recwatcher when we're done defer recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // bubble up a NACK... - } + obj.init.Running() // when started, notify engine that we're running var send bool var done bool @@ -248,7 +245,6 @@ func (obj *MountRes) Watch() error { obj.init.Logf("event(%s): %v", event.Body.Name, event.Body.Op) } - obj.init.Dirty() send = true case event, ok := <-ch: @@ -263,24 +259,16 @@ func (obj *MountRes) Watch() error { obj.init.Logf("event: %+v", event) } - obj.init.Dirty() send = true - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/msg.go b/engine/resources/msg.go index 1588bf37f..6b82d5415 100644 --- a/engine/resources/msg.go +++ b/engine/resources/msg.go @@ -94,30 +94,20 @@ func (obj *MsgRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *MsgRes) Watch() error { - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running - var send = false // send event? + //var send = false // send event? for { select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs - if send { - send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } - } + //if send { + // send = false + // obj.init.Event() // notify engine of an event (this can block) + //} } } @@ -137,7 +127,7 @@ func (obj *MsgRes) isAllStateOK() bool { func (obj *MsgRes) updateStateOK() { // XXX: this resource doesn't entirely make sense to me at the moment. if !obj.isAllStateOK() { - obj.init.Dirty() + //obj.init.Dirty() // XXX: removed with API cleanup } } diff --git a/engine/resources/net.go b/engine/resources/net.go index 20279d4d6..1ab58f226 100644 --- a/engine/resources/net.go +++ b/engine/resources/net.go @@ -247,10 +247,7 @@ func (obj *NetRes) Watch() error { } }() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? var done bool @@ -272,7 +269,6 @@ func (obj *NetRes) Watch() error { } send = true - obj.init.Dirty() // dirty case event, ok := <-recWatcher.Events(): if !ok { @@ -290,23 +286,15 @@ func (obj *NetRes) Watch() error { } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/noop.go b/engine/resources/noop.go index ddfadf5aa..990d0df1d 100644 --- a/engine/resources/noop.go +++ b/engine/resources/noop.go @@ -63,31 +63,15 @@ func (obj *NoopRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *NoopRes) Watch() error { - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running - var send = false // send event? - for { - select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - } - - // do all our event sending all together to avoid duplicate msgs - if send { - send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } - } + select { + case <-obj.init.Done: // closed by the engine to signal shutdown } + + //obj.init.Event() // notify engine of an event (this can block) + + return nil } // CheckApply method for Noop resource. Does nothing, returns happy! diff --git a/engine/resources/nspawn.go b/engine/resources/nspawn.go index cbd473ad0..31b037081 100644 --- a/engine/resources/nspawn.go +++ b/engine/resources/nspawn.go @@ -167,10 +167,7 @@ func (obj *NspawnRes) Watch() error { bus.Signal(busChan) defer bus.RemoveSignal(busChan) // not needed here, but nice for symmetry - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -187,24 +184,16 @@ func (obj *NspawnRes) Watch() error { return fmt.Errorf("unknown event: %s", event.Name) } send = true - obj.init.Dirty() // dirty } - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/password.go b/engine/resources/password.go index 2676a5835..ef28b3bc8 100644 --- a/engine/resources/password.go +++ b/engine/resources/password.go @@ -182,10 +182,7 @@ func (obj *PasswordRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -199,23 +196,15 @@ func (obj *PasswordRes) Watch() error { return errwrap.Wrapf(err, "unknown %s watcher error", obj) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/pkg.go b/engine/resources/pkg.go index 33a87268c..c1c84d8a4 100644 --- a/engine/resources/pkg.go +++ b/engine/resources/pkg.go @@ -67,7 +67,7 @@ type PkgRes struct { // Default returns some sensible defaults for this resource. func (obj *PkgRes) Default() engine.Res { return &PkgRes{ - State: PkgStateInstalled, // i think this is preferable to "newest" + State: PkgStateInstalled, // this *is* preferable to "newest" } } @@ -121,10 +121,7 @@ func (obj *PkgRes) Watch() error { return errwrap.Wrapf(err, "error adding signal match") } - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -146,23 +143,15 @@ func (obj *PkgRes) Watch() error { } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/print.go b/engine/resources/print.go index dac863f03..55fbcafe0 100644 --- a/engine/resources/print.go +++ b/engine/resources/print.go @@ -66,31 +66,15 @@ func (obj *PrintRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *PrintRes) Watch() error { - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested + obj.init.Running() // when started, notify engine that we're running + + select { + case <-obj.init.Done: // closed by the engine to signal shutdown } - var send = false // send event? - for { - select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - } + //obj.init.Event() // notify engine of an event (this can block) - // do all our event sending all together to avoid duplicate msgs - if send { - send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } - } - } + return nil } // CheckApply method for Print resource. Does nothing, returns happy! diff --git a/engine/resources/resources_test.go b/engine/resources/resources_test.go index 66f9d7064..8c594db7c 100644 --- a/engine/resources/resources_test.go +++ b/engine/resources/resources_test.go @@ -28,7 +28,6 @@ import ( "time" "github.com/purpleidea/mgmt/engine" - "github.com/purpleidea/mgmt/engine/event" "github.com/purpleidea/mgmt/util" ) @@ -220,36 +219,33 @@ func TestResources1(t *testing.T) { readyChan := make(chan struct{}) eventChan := make(chan struct{}) - eventsChan := make(chan *event.Msg) + doneChan := make(chan struct{}) debug := testing.Verbose() // set via the -test.v flag to `go test` logf := func(format string, v ...interface{}) { t.Logf(fmt.Sprintf("test #%d: Res: ", index)+format, v...) } init := &engine.Init{ - Running: func() error { + Running: func() { close(readyChan) select { // this always sends one! case eventChan <- struct{}{}: } - return nil }, // Watch runs this to send a changed event. - Event: func() error { + Event: func() { select { case eventChan <- struct{}{}: } - return nil }, // Watch listens on this for close/pause events. - Events: eventsChan, - Debug: debug, - Logf: logf, + Done: doneChan, + Debug: debug, + Logf: logf, // unused - Dirty: func() {}, Recv: func() map[string]*engine.Send { return map[string]*engine.Send{} }, @@ -341,7 +337,7 @@ func TestResources1(t *testing.T) { } } t.Logf("test #%d: shutting down Watch", index) - close(eventsChan) // send Watch shutdown command + close(doneChan) // send Watch shutdown command }() Loop: for { diff --git a/engine/resources/svc.go b/engine/resources/svc.go index b0e2107c4..77eedc79a 100644 --- a/engine/resources/svc.go +++ b/engine/resources/svc.go @@ -120,10 +120,7 @@ func (obj *SvcRes) Watch() error { bus.Signal(buschan) defer bus.RemoveSignal(buschan) // not needed here, but nice for symmetry - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var svc = fmt.Sprintf("%s.service", obj.Name()) // systemd name var send = false // send event? @@ -161,7 +158,6 @@ func (obj *SvcRes) Watch() error { if previous != invalid { // if invalid changed, send signal send = true - obj.init.Dirty() // dirty } if invalid { @@ -176,13 +172,8 @@ func (obj *SvcRes) Watch() error { // loop so that we can see the changed invalid signal obj.init.Logf("daemon reload") - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } } else { if !activeSet { @@ -220,26 +211,18 @@ func (obj *SvcRes) Watch() error { obj.init.Logf("stopped") } send = true - obj.init.Dirty() // dirty case err := <-subErrors: return errwrap.Wrapf(err, "unknown %s error", obj) - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } } if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/test.go b/engine/resources/test.go index daece3b29..7594d0893 100644 --- a/engine/resources/test.go +++ b/engine/resources/test.go @@ -125,31 +125,15 @@ func (obj *TestRes) Close() error { // Watch is the primary listener for this resource and it outputs events. func (obj *TestRes) Watch() error { - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } - - var send = false // send event? - for { - select { - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } - } + obj.init.Running() // when started, notify engine that we're running - // do all our event sending all together to avoid duplicate msgs - if send { - send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } - } + select { + case <-obj.init.Done: // closed by the engine to signal shutdown } + + //obj.init.Event() // notify engine of an event (this can block) + + return nil } // CheckApply method for Test resource. Does nothing, returns happy! diff --git a/engine/resources/timer.go b/engine/resources/timer.go index d5308c522..58738a6e2 100644 --- a/engine/resources/timer.go +++ b/engine/resources/timer.go @@ -75,10 +75,7 @@ func (obj *TimerRes) Watch() error { obj.ticker = obj.newTicker() defer obj.ticker.Stop() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -87,20 +84,13 @@ func (obj *TimerRes) Watch() error { send = true obj.init.Logf("received tick") - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/user.go b/engine/resources/user.go index b13a537ab..1508bf531 100644 --- a/engine/resources/user.go +++ b/engine/resources/user.go @@ -119,10 +119,7 @@ func (obj *UserRes) Watch() error { } defer obj.recWatcher.Close() - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -142,23 +139,15 @@ func (obj *UserRes) Watch() error { obj.init.Logf("Event(%s): %v", event.Body.Name, event.Body.Op) } send = true - obj.init.Dirty() // dirty - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/resources/virt.go b/engine/resources/virt.go index 5ae96ae68..500166a32 100644 --- a/engine/resources/virt.go +++ b/engine/resources/virt.go @@ -326,10 +326,7 @@ func (obj *VirtRes) Watch() error { } defer obj.conn.DomainEventDeregister(gaCallbackID) - // notify engine that we're running - if err := obj.init.Running(); err != nil { - return err // exit if requested - } + obj.init.Running() // when started, notify engine that we're running var send = false // send event? for { @@ -340,31 +337,26 @@ func (obj *VirtRes) Watch() error { switch event { case libvirt.DOMAIN_EVENT_DEFINED: if obj.Transient { - obj.init.Dirty() // dirty send = true } case libvirt.DOMAIN_EVENT_UNDEFINED: if !obj.Transient { - obj.init.Dirty() // dirty send = true } case libvirt.DOMAIN_EVENT_STARTED: fallthrough case libvirt.DOMAIN_EVENT_RESUMED: if obj.State != "running" { - obj.init.Dirty() // dirty send = true } case libvirt.DOMAIN_EVENT_SUSPENDED: if obj.State != "paused" { - obj.init.Dirty() // dirty send = true } case libvirt.DOMAIN_EVENT_STOPPED: fallthrough case libvirt.DOMAIN_EVENT_SHUTDOWN: if obj.State != "shutoff" { - obj.init.Dirty() // dirty send = true } processExited = true @@ -375,7 +367,6 @@ func (obj *VirtRes) Watch() error { // verify, detect and patch appropriately! fallthrough case libvirt.DOMAIN_EVENT_CRASHED: - obj.init.Dirty() // dirty send = true processExited = true // FIXME: is this okay for PMSUSPENDED ? } @@ -390,7 +381,6 @@ func (obj *VirtRes) Watch() error { if state == libvirt.CONNECT_DOMAIN_EVENT_AGENT_LIFECYCLE_STATE_CONNECTED { obj.guestAgentConnected = true - obj.init.Dirty() // dirty send = true obj.init.Logf("Guest agent connected") @@ -409,21 +399,14 @@ func (obj *VirtRes) Watch() error { case err := <-errorChan: return fmt.Errorf("unknown %s libvirt error: %s", obj, err) - case event, ok := <-obj.init.Events: - if !ok { - return nil - } - if err := obj.init.Read(event); err != nil { - return err - } + case <-obj.init.Done: // closed by the engine to signal shutdown + return nil } // do all our event sending all together to avoid duplicate msgs if send { send = false - if err := obj.init.Event(); err != nil { - return err // exit if requested - } + obj.init.Event() // notify engine of an event (this can block) } } } diff --git a/engine/reverse.go b/engine/reverse.go new file mode 100644 index 000000000..61befbf3d --- /dev/null +++ b/engine/reverse.go @@ -0,0 +1,65 @@ +// Mgmt +// Copyright (C) 2013-2018+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package engine + +import ( + "fmt" +) + +// ReversibleRes is an interface that a resource can implement if it wants to +// have some resource run when it disappears. A disappearance happens when a +// resource is defined in one instance of the graph, and is gone in the +// subsequent one. This is helpful for building robust programs with the engine. +// Default implementations for most of the methods declared in this interface +// can be obtained for your resource by anonymously adding the traits.Reversible +// struct to your resource implementation. +type ReversibleRes interface { + Res + + // ReversibleMeta lets you get or set meta params for the reversible + // trait. + ReversibleMeta() *ReversibleMeta + + // SetReversibleMeta lets you set all of the meta params for the + // reversible trait in a single call. + SetReversibleMeta(*ReversibleMeta) + + // Reversed returns the "reverse" or "reciprocal" resource. This is used + // to "clean" up after a previously defined resource has been removed. + // Interestingly, this returns the core Res interface instead of a + // ReversibleRes, because there is no requirement that the reverse of a + // Res be the same kind of Res, and the reverse might not be reversible! + Reversed() (Res, error) +} + +// ReversibleMeta provides some parameters specific to reversible resources. +type ReversibleMeta struct { + // Disabled specifies that reversing should be disabled for this + // resource. + Disabled bool + + // TODO: add options here, including whether to reverse edges, etc... +} + +// Cmp compares two ReversibleMeta structs and determines if they're equivalent. +func (obj *ReversibleMeta) Cmp(rm *ReversibleMeta) error { + if obj.Disabled != rm.Disabled { + return fmt.Errorf("values for Disabled are different") + } + return nil +} diff --git a/engine/traits/reverse.go b/engine/traits/reverse.go new file mode 100644 index 000000000..0ae44e13b --- /dev/null +++ b/engine/traits/reverse.go @@ -0,0 +1,48 @@ +// Mgmt +// Copyright (C) 2013-2018+ James Shubin and the project contributors +// Written by James Shubin and the project contributors +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +package traits + +import ( + "github.com/purpleidea/mgmt/engine" +) + +// Reversible contains a general implementation with most of the properties and +// methods needed to support reversing resources. It may be used as a starting +// point to avoid re-implementing the straightforward methods. +type Reversible struct { + meta *engine.ReversibleMeta + + // Bug5819 works around issue https://github.com/golang/go/issues/5819 + Bug5819 interface{} // XXX: workaround +} + +// ReversibleMeta lets you get or set meta params for the reversing trait. +func (obj *Reversible) ReversibleMeta() *engine.ReversibleMeta { + if obj.meta == nil { // set the defaults if previously empty + obj.meta = &engine.ReversibleMeta{ + Disabled: true, // by default we're disabled + } + } + return obj.meta +} + +// SetReversibleMeta lets you set all of the meta params for the reversing trait +// in a single call. +func (obj *Reversible) SetReversibleMeta(meta *engine.ReversibleMeta) { + obj.meta = meta +} diff --git a/lang/interpret_test.go b/lang/interpret_test.go index b5644d07c..a74468770 100644 --- a/lang/interpret_test.go +++ b/lang/interpret_test.go @@ -1387,13 +1387,15 @@ func TestAstInterpret0(t *testing.T) { stringptr := "this is meta" x.StringPtr = &stringptr m := &engine.MetaParams{ - Noop: true, // overwritten - Retry: -1, - Delay: 0, - Poll: 5, - Limit: 4.2, - Burst: 3, - Sema: []string{"foo:1", "bar:3"}, + Noop: true, // overwritten + Retry: -1, + Delay: 0, + Poll: 5, + Limit: 4.2, + Burst: 3, + Sema: []string{"foo:1", "bar:3"}, + Rewatch: false, + Realize: true, } x.SetMetaParams(m) graph.AddVertex(t1) @@ -1411,10 +1413,14 @@ func TestAstInterpret0(t *testing.T) { limit => 4.2, burst => 3, sema => ["foo:1", "bar:3",], + rewatch => false, + realize => true, + reverse => true, autoedge => true, autogroup => true, }, Meta:noop => true, + Meta:reverse => true, Meta:autoedge => true, Meta:autogroup => true, } diff --git a/lang/interpret_test/TestAstFunc1/metaparams0.graph b/lang/interpret_test/TestAstFunc1/metaparams0.graph index a045351a3..599993bc8 100644 --- a/lang/interpret_test/TestAstFunc1/metaparams0.graph +++ b/lang/interpret_test/TestAstFunc1/metaparams0.graph @@ -1,20 +1,26 @@ -Edge: bool(false) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # noop -Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # autoedge -Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # autogroup +Edge: bool(false) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # noop +Edge: bool(false) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # rewatch +Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # autoedge +Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # autogroup +Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # realize +Edge: bool(true) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # reverse Edge: bool(true) -> var(b) # b Edge: bool(true) -> var(b) # b -Edge: float(4.2) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # limit -Edge: int(-1) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # retry -Edge: int(0) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # delay -Edge: int(3) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # burst -Edge: int(5) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # poll -Edge: list(str(foo:1), str(bar:3)) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) # sema +Edge: float(4.2) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # limit +Edge: int(-1) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # retry +Edge: int(0) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # delay +Edge: int(3) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # burst +Edge: int(5) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # poll +Edge: list(str(foo:1), str(bar:3)) -> struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) # sema Edge: str(bar:3) -> list(str(foo:1), str(bar:3)) # 1 Edge: str(foo:1) -> list(str(foo:1), str(bar:3)) # 0 Edge: str(hello world) -> call:fmt.printf(str(hello world)) # a Vertex: bool(false) Vertex: bool(false) Vertex: bool(false) +Vertex: bool(false) +Vertex: bool(true) +Vertex: bool(true) Vertex: bool(true) Vertex: bool(true) Vertex: bool(true) @@ -32,6 +38,6 @@ Vertex: str(bar:3) Vertex: str(foo:1) Vertex: str(greeting) Vertex: str(hello world) -Vertex: struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); autoedge: bool(true); autogroup: bool(true)) +Vertex: struct(noop: bool(false); retry: int(-1); delay: int(0); poll: int(5); limit: float(4.2); burst: int(3); sema: list(str(foo:1), str(bar:3)); rewatch: bool(false); realize: bool(true); reverse: bool(true); autoedge: bool(true); autogroup: bool(true)) Vertex: var(b) Vertex: var(b) diff --git a/lang/interpret_test/TestAstFunc1/metaparams0/main.mcl b/lang/interpret_test/TestAstFunc1/metaparams0/main.mcl index e3f7ccbe8..167f10d43 100644 --- a/lang/interpret_test/TestAstFunc1/metaparams0/main.mcl +++ b/lang/interpret_test/TestAstFunc1/metaparams0/main.mcl @@ -13,6 +13,9 @@ test "greeting" { limit => 4.2, burst => 3, sema => ["foo:1", "bar:3",], + rewatch => false, + realize => true, + reverse => true, autoedge => true, autogroup => true, }, diff --git a/lang/structs.go b/lang/structs.go index e41af8426..b85fec22c 100644 --- a/lang/structs.go +++ b/lang/structs.go @@ -638,6 +638,10 @@ func (obj *StmtRes) edges(resName string) ([]*interfaces.Edge, error) { func (obj *StmtRes) metaparams(res engine.Res) error { meta := engine.DefaultMetaParams.Copy() // defaults + var rm *engine.ReversibleMeta + if r, ok := res.(engine.ReversibleRes); ok { + rm = r.ReversibleMeta() // get a struct with the defaults + } var aem *engine.AutoEdgeMeta if r, ok := res.(engine.EdgeableRes); ok { aem = r.AutoEdgeMeta() // get a struct with the defaults @@ -706,6 +710,21 @@ func (obj *StmtRes) metaparams(res engine.Res) error { } meta.Sema = values + case "rewatch": + meta.Rewatch = v.Bool() // must not panic + + case "realize": + meta.Realize = v.Bool() // must not panic + + case "reverse": + if v.Type().Cmp(types.TypeBool) == nil { + if rm != nil { + rm.Disabled = !v.Bool() // must not panic + } + } else { + // TODO: read values from struct into rm.XXX + } + case "autoedge": if aem != nil { aem.Disabled = !v.Bool() // must not panic @@ -752,6 +771,19 @@ func (obj *StmtRes) metaparams(res engine.Res) error { } meta.Sema = values } + if val, exists := v.Struct()["rewatch"]; exists { + meta.Rewatch = val.Bool() // must not panic + } + if val, exists := v.Struct()["realize"]; exists { + meta.Realize = val.Bool() // must not panic + } + if val, exists := v.Struct()["reverse"]; exists && rm != nil { + if val.Type().Cmp(types.TypeBool) == nil { + rm.Disabled = !val.Bool() // must not panic + } else { + // TODO: read values from struct into rm.XXX + } + } if val, exists := v.Struct()["autoedge"]; exists && aem != nil { aem.Disabled = !val.Bool() // must not panic } @@ -765,6 +797,9 @@ func (obj *StmtRes) metaparams(res engine.Res) error { } res.SetMetaParams(meta) // set it! + if r, ok := res.(engine.ReversibleRes); ok { + r.SetReversibleMeta(rm) // set + } if r, ok := res.(engine.EdgeableRes); ok { r.SetAutoEdgeMeta(aem) // set } @@ -1139,6 +1174,9 @@ func (obj *StmtResMeta) Init(data *interfaces.Data) error { case "limit": case "burst": case "sema": + case "rewatch": + case "realize": + case "reverse": case "autoedge": case "autogroup": case MetaField: @@ -1225,50 +1263,83 @@ func (obj *StmtResMeta) Unify(kind string) ([]interfaces.Invariant, error) { } // add additional invariants based on what's in obj.Property !!! - var typ *types.Type + var invar interfaces.Invariant + static := func(typ *types.Type) interfaces.Invariant { + return &unification.EqualsInvariant{ + Expr: obj.MetaExpr, + Type: typ, + } + } switch p := strings.ToLower(obj.Property); p { // TODO: we could add these fields dynamically if we were fancy! case "noop": - typ = types.TypeBool + invar = static(types.TypeBool) case "retry": - typ = types.TypeInt + invar = static(types.TypeInt) case "delay": - typ = types.TypeInt + invar = static(types.TypeInt) case "poll": - typ = types.TypeInt + invar = static(types.TypeInt) case "limit": // rate.Limit - typ = types.TypeFloat + invar = static(types.TypeFloat) case "burst": - typ = types.TypeInt + invar = static(types.TypeInt) case "sema": - typ = types.NewType("[]str") + invar = static(types.NewType("[]str")) + + case "rewatch": + invar = static(types.TypeBool) + + case "realize": + invar = static(types.TypeBool) + + case "reverse": + ors := []interfaces.Invariant{} + + invarBool := static(types.TypeBool) + ors = append(ors, invarBool) + + // TODO: decide what fields we might want here + //invarStruct := static(types.NewType("struct{edges str}")) + //ors = append(ors, invarStruct) + + invar = &unification.ExclusiveInvariant{ + Invariants: ors, // one and only one of these should be true + } case "autoedge": - typ = types.TypeBool + invar = static(types.TypeBool) case "autogroup": - typ = types.TypeBool + invar = static(types.TypeBool) // autoedge and autogroup aren't part of the `MetaRes` interface, but we // can merge them in here for simplicity in the public user interface... case MetaField: // FIXME: allow partial subsets of this struct, and in any order // FIXME: we might need an updated unification engine to do this - typ = types.NewType("struct{noop bool; retry int; delay int; poll int; limit float; burst int; sema []str; autoedge bool; autogroup bool}") + wrap := func(reverse *types.Type) *types.Type { + return types.NewType(fmt.Sprintf("struct{noop bool; retry int; delay int; poll int; limit float; burst int; sema []str; rewatch bool; realize bool; reverse %s; autoedge bool; autogroup bool}", reverse.String())) + } + ors := []interfaces.Invariant{} + invarBool := static(wrap(types.TypeBool)) + ors = append(ors, invarBool) + // TODO: decide what fields we might want here + //invarStruct := static(wrap(types.NewType("struct{edges str}"))) + //ors = append(ors, invarStruct) + invar = &unification.ExclusiveInvariant{ + Invariants: ors, // one and only one of these should be true + } default: return nil, fmt.Errorf("unknown property: %s", p) } - invar := &unification.EqualsInvariant{ - Expr: obj.MetaExpr, - Type: typ, - } invariants = append(invariants, invar) return invariants, nil diff --git a/lib/main.go b/lib/main.go index 86867826f..233ee8a99 100644 --- a/lib/main.go +++ b/lib/main.go @@ -647,8 +647,10 @@ func (obj *Main) Run() error { // Start needs to be synchronous because we don't want // to loop around and cause a pause before we unpaused. - if err := obj.ge.Start(); err != nil { // sync - Logf("error starting graph: %+v", err) + // Commit already starts things, but we still need to + // resume anything that was pre-existing and was paused. + if err := obj.ge.Resume(); err != nil { // sync + Logf("error resuming graph: %+v", err) continue } converger.Resume() // after Start() diff --git a/pgraph/graphsync.go b/pgraph/graphsync.go index 6e1c207ac..8a6c553b1 100644 --- a/pgraph/graphsync.go +++ b/pgraph/graphsync.go @@ -40,10 +40,10 @@ func strEdgeCmpFn(e1, e2 Edge) (bool, error) { // GraphSync updates the Graph so that it matches the newGraph. It leaves // identical elements alone so that they don't need to be refreshed. // It tries to mutate existing elements into new ones, if they support this. -// This updates the Graph on success only. +// This updates the Graph on success only. If it fails, then the graph won't +// have been modified. // FIXME: should we do this with copies of the vertex resources? func (obj *Graph) GraphSync(newGraph *Graph, vertexCmpFn func(Vertex, Vertex) (bool, error), vertexAddFn func(Vertex) error, vertexRemoveFn func(Vertex) error, edgeCmpFn func(Edge, Edge) (bool, error)) error { - oldGraph := obj.Copy() // work on a copy of the old graph if oldGraph == nil { var err error @@ -69,8 +69,11 @@ func (obj *Graph) GraphSync(newGraph *Graph, vertexCmpFn func(Vertex, Vertex) (b var lookup = make(map[Vertex]Vertex) var vertexKeep []Vertex // list of vertices which are the same in new graph - var edgeKeep []Edge // list of vertices which are the same in new graph + var vertexDels []Vertex // list of vertices which are to be removed + var vertexAdds []Vertex // list of vertices which are to be added + var edgeKeep []Edge // list of edges which are the same in new graph + // XXX: run this as a topological sort or reverse topological sort? for v := range newGraph.Adjacency() { // loop through the vertices (resources) var vertex Vertex // step one, direct compare with res.Cmp @@ -92,27 +95,44 @@ func (obj *Graph) GraphSync(newGraph *Graph, vertexCmpFn func(Vertex, Vertex) (b // vertex = oldGraph.MutateMatch(res) //} + // run the removes BEFORE the adds, so don't do the add here... if vertex == nil { // no match found yet - if err := vertexAddFn(v); err != nil { - return errwrap.Wrapf(err, "vertexAddFn failed") - } + vertexAdds = append(vertexAdds, v) // append vertex = v - oldGraph.AddVertex(vertex) // call standalone in case not part of an edge } lookup[v] = vertex // used for constructing edges vertexKeep = append(vertexKeep, vertex) // append } - // get rid of any vertices we shouldn't keep (that aren't in new graph) for v := range oldGraph.Adjacency() { if !VertexContains(v, vertexKeep) { - if err := vertexRemoveFn(v); err != nil { - return errwrap.Wrapf(err, "vertexRemoveFn failed") - } - oldGraph.DeleteVertex(v) + vertexDels = append(vertexDels, v) // append + } + } + + // see if any of the add/remove functions actually fail first + // XXX: run this as a reverse topological sort or topological sort? + for _, vertex := range vertexDels { + if err := vertexRemoveFn(vertex); err != nil { + return errwrap.Wrapf(err, "vertexRemoveFn failed") + } + } + for _, vertex := range vertexAdds { + if err := vertexAddFn(vertex); err != nil { + return errwrap.Wrapf(err, "vertexAddFn failed") } } + // no add/remove functions failed, so we can actually modify the graph! + for _, vertex := range vertexDels { + oldGraph.DeleteVertex(vertex) + } + for _, vertex := range vertexAdds { + oldGraph.AddVertex(vertex) // call standalone in case not part of an edge + } + + // XXX: fixup this part so the CmpFn stuff fails early, and THEN we edit + // the graph at the end, if no errors happened... // compare edges for v1 := range newGraph.Adjacency() { // loop through the vertices (resources) for v2, e := range newGraph.Adjacency()[v1] {