Skip to content

Commit

Permalink
Ensure we get at least one netdump in each topic for tested EVE upgrade
Browse files Browse the repository at this point in the history
If upgrade fails, the recorded netdumps will remain persisted and
available for retrieval from the older EVE version.

Signed-off-by: Milan Lenco <milan@zededa.com>
  • Loading branch information
milan-zededa committed Jul 6, 2023
1 parent 22ccc9d commit f6de6ea
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 11 deletions.
25 changes: 20 additions & 5 deletions pkg/pillar/cmd/zedagent/handleconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/lf-edge/eve/pkg/pillar/types"
"github.com/lf-edge/eve/pkg/pillar/utils"
fileutils "github.com/lf-edge/eve/pkg/pillar/utils/file"
"github.com/lf-edge/eve/pkg/pillar/zboot"
"github.com/lf-edge/eve/pkg/pillar/zedcloud"
uuid "github.com/satori/go.uuid"
"google.golang.org/protobuf/proto"
Expand Down Expand Up @@ -1081,14 +1082,28 @@ func isNettraceEnabled(ctx *zedagentContext) bool {
return true
}

// Function decides if the next call to SendOnAllIntf for /config request should be traced
// and netdump published at the end (see libs/nettrace and pkg/pillar/netdump).
func traceNextConfigReq(ctx *zedagentContext) bool {
// Function decides if the next HTTP request should be traced and netdump published.
func traceNextReq(ctx *zedagentContext, lastNetdump time.Time) bool {
if !isNettraceEnabled(ctx) {
return false
}
return ctx.lastConfigNetdumpPub.IsZero() ||
time.Since(ctx.lastConfigNetdumpPub) >= ctx.netdumpInterval
if lastNetdump.IsZero() {
// No netdump published yet.
return true
}
uptime := time.Since(ctx.startTime)
lastNetdumpAge := time.Since(lastNetdump)
// Ensure we get at least one netdump for the currently tested EVE upgrade.
if zboot.IsCurrentPartitionStateInProgress() && lastNetdumpAge > uptime {
return true
}
return lastNetdumpAge >= ctx.netdumpInterval
}

// Function decides if the next call to SendOnAllIntf for /config request should be traced
// and netdump published at the end (see libs/nettrace and pkg/pillar/netdump).
func traceNextConfigReq(ctx *zedagentContext) bool {
return traceNextReq(ctx, ctx.lastConfigNetdumpPub)
}

// Publish netdump containing traces of executed config requests.
Expand Down
6 changes: 1 addition & 5 deletions pkg/pillar/cmd/zedagent/reportinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -1147,11 +1147,7 @@ func isUpdating(ctx *zedagentContext) bool {
// Function decides if the next call to SendOnAllIntf for /info request should be traced
// and netdump published at the end (see libs/nettrace and pkg/pillar/netdump).
func traceNextInfoReq(ctx *zedagentContext) bool {
if !isNettraceEnabled(ctx) {
return false
}
return ctx.lastInfoNetdumpPub.IsZero() ||
time.Since(ctx.lastInfoNetdumpPub) >= ctx.netdumpInterval
return traceNextReq(ctx, ctx.lastInfoNetdumpPub)
}

// Publish netdump containing traces of executed /info requests.
Expand Down
2 changes: 2 additions & 0 deletions pkg/pillar/cmd/zedagent/zedagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ type zedagentContext struct {
netdumpInterval time.Duration
lastConfigNetdumpPub time.Time // last call to publishConfigNetdump
lastInfoNetdumpPub time.Time // last call to publishInfoNetdump
startTime time.Time
}

// AddAgentSpecificCLIFlags adds CLI options
Expand Down Expand Up @@ -315,6 +316,7 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar
zedagentCtx.ps = ps
zedagentCtx.hangFlag = *zedagentCtx.hangPtr
zedagentCtx.fatalFlag = *zedagentCtx.fatalPtr
zedagentCtx.startTime = time.Now()

flowlogQueue := make(chan *flowlog.FlowMessage, flowlogQueueCap)
triggerDeviceInfo := make(chan destinationBitset, 1)
Expand Down
2 changes: 2 additions & 0 deletions pkg/pillar/dpcmanager/dpcmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ type DpcManager struct {
netDumper *netdump.NetDumper // nil if netdump is disabled
netdumpInterval time.Duration
lastNetdumpPub time.Time // last call to publishNetdump
startTime time.Time
}

// Watchdog : methods used by DpcManager to interact with Watchdog.
Expand Down Expand Up @@ -238,6 +239,7 @@ func (m *DpcManager) Init(ctx context.Context) error {

// Run DpcManager as a separate task with its own loop and a watchdog file.
func (m *DpcManager) Run(ctx context.Context) (err error) {
m.startTime = time.Now()
m.networkEvents = m.NetworkMonitor.WatchEvents(ctx, "dpc-reconciler")
m.wwanEvents, err = m.WwanWatcher.Watch(ctx)
if err != nil {
Expand Down
13 changes: 12 additions & 1 deletion pkg/pillar/dpcmanager/netdump.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/lf-edge/eve/pkg/pillar/netdump"
"github.com/lf-edge/eve/pkg/pillar/zboot"
)

// Topic for netdumps of successful connectivity tests.
Expand All @@ -31,7 +32,17 @@ func (m *DpcManager) traceNextConnTest() bool {
if len(m.dpcList.PortConfigList) == 0 || m.dpcList.CurrentIndex != 0 {
return false
}
return m.lastNetdumpPub.IsZero() || time.Since(m.lastNetdumpPub) >= m.netdumpInterval
if m.lastNetdumpPub.IsZero() {
// No netdump published yet for DPC testing.
return true
}
uptime := time.Since(m.startTime)
lastNetdumpAge := time.Since(m.lastNetdumpPub)
// Ensure we get at least one netdump for the currently tested EVE upgrade.
if zboot.IsCurrentPartitionStateInProgress() && lastNetdumpAge > uptime {
return true
}
return lastNetdumpAge >= m.netdumpInterval
}

// Publish netdump containing traces of executed connectivity probes.
Expand Down

0 comments on commit f6de6ea

Please sign in to comment.