Skip to content
This repository has been archived by the owner on Oct 14, 2024. It is now read-only.

fix: ScanResults stuck in aborted state #427

Merged
1 commit merged into from
Jun 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions runtime_scan/pkg/orchestrator/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ const (

ScanResultPollingInterval = "SCAN_RESULT_POLLING_INTERVAL"
ScanResultReconcileTimeout = "SCAN_RESULT_RECONCILE_TIMEOUT"
ScanResultAbortTimeout = "SCAN_RESULT_ABORT_TIMEOUT"

ScanResultProcessorPollingInterval = "SCAN_RESULT_PROCESSOR_POLLING_INTERVAL"
ScanResultProcessorReconcileTimeout = "SCAN_RESULT_PROCESSOR_RECONCILE_TIMEOUT"
Expand Down Expand Up @@ -119,6 +120,7 @@ func setConfigDefaults(backendHost string, backendPort int, backendBaseURL strin
viper.SetDefault(DiscoveryInterval, discovery.DefaultInterval.String())
viper.SetDefault(ControllerStartupDelay, DefaultControllerStartupDelay.String())
viper.SetDefault(ProviderKind, DefaultProviderKind)
viper.SetDefault(ScanResultAbortTimeout, scanresultwatcher.DefaultAbortTimeout)

viper.AutomaticEnv()
}
Expand Down Expand Up @@ -155,6 +157,7 @@ func LoadConfig(backendHost string, backendPort int, baseURL string) (*Config, e
ScanResultWatcherConfig: scanresultwatcher.Config{
PollPeriod: viper.GetDuration(ScanResultPollingInterval),
ReconcileTimeout: viper.GetDuration(ScanResultReconcileTimeout),
AbortTimeout: viper.GetDuration(ScanResultAbortTimeout),
ScannerConfig: scanresultwatcher.ScannerConfig{
DeleteJobPolicy: scanresultwatcher.GetDeleteJobPolicyType(viper.GetString(DeleteJobPolicy)),
ScannerImage: viper.GetString(ScannerContainerImage),
Expand Down
2 changes: 2 additions & 0 deletions runtime_scan/pkg/orchestrator/scanresultwatcher/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
const (
DefaultPollInterval = time.Minute
DefaultReconcileTimeout = 5 * time.Minute
DefaultAbortTimeout = 10 * time.Minute
)

type Config struct {
Expand All @@ -33,6 +34,7 @@ type Config struct {
PollPeriod time.Duration
ReconcileTimeout time.Duration
ScannerConfig ScannerConfig
AbortTimeout time.Duration
}

func (c Config) WithBackendClient(b *backendclient.BackendClient) Config {
Expand Down
57 changes: 54 additions & 3 deletions runtime_scan/pkg/orchestrator/scanresultwatcher/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func New(c Config) *Watcher {
scannerConfig: c.ScannerConfig,
pollPeriod: c.PollPeriod,
reconcileTimeout: c.ReconcileTimeout,
abortTimeout: c.AbortTimeout,
queue: common.NewQueue[ScanResultReconcileEvent](),
}
}
Expand All @@ -52,6 +53,7 @@ type Watcher struct {
scannerConfig ScannerConfig
pollPeriod time.Duration
reconcileTimeout time.Duration
abortTimeout time.Duration

queue *ScanResultQueue
}
Expand Down Expand Up @@ -159,11 +161,13 @@ func (w *Watcher) Reconcile(ctx context.Context, event ScanResultReconcileEvent)
return err
}
case models.TargetScanStateStateReadyToScan, models.TargetScanStateStateInProgress:
case models.TargetScanStateStateAborted:
// TODO(chrisgacsal): make sure that TargetScanResult state is set to ABORTED state once the TargetScanResult
// schema is extended with timeout field and the deadline is missed.
break
case models.TargetScanStateStateAborted, models.TargetScanStateStateNotScanned:
break
if err = w.reconcileAborted(ctx, &scanResult); err != nil {
return err
}
case models.TargetScanStateStateNotScanned:
case models.TargetScanStateStateDone:
if err = w.reconcileDone(ctx, &scanResult); err != nil {
return err
Expand Down Expand Up @@ -396,3 +400,50 @@ func (w *Watcher) cleanupResources(ctx context.Context, scanResult *models.Targe

return nil
}

// nolint:cyclop
func (w *Watcher) reconcileAborted(ctx context.Context, scanResult *models.TargetScanResult) error {
logger := log.GetLoggerFromContextOrDiscard(ctx)

scanResultID, ok := scanResult.GetID()
if !ok {
return errors.New("invalid ScanResult: ID is nil")
}

// Check if ScanResult is in aborted state for more time than the timeout allows
if scanResult.Status == nil || scanResult.Status.General == nil {
return errors.New("invalid ScanResult: Status or General is nil")
}

var transitionTimeToAbort time.Time
if scanResult.Status.General.LastTransitionTime != nil {
fishkerez marked this conversation as resolved.
Show resolved Hide resolved
transitionTimeToAbort = *scanResult.Status.General.LastTransitionTime
logger.Debugf("ScanResult moved to aborted state: %s", transitionTimeToAbort)
}

now := time.Now()
abortTimedOut := now.After(transitionTimeToAbort.Add(w.abortTimeout))
if !abortTimedOut {
logger.Tracef("ScanResult in aborted state is not timed out yet. TransitionTime=%s Timeout=%s",
transitionTimeToAbort, w.abortTimeout)
return nil
}
logger.Tracef("ScanResult in aborted state is timed out. TransitionTime=%s Timeout=%s",
transitionTimeToAbort, w.abortTimeout)

scanResult.Status.General.State = utils.PointerTo(models.TargetScanStateStateDone)
scanResult.Status.General.LastTransitionTime = utils.PointerTo(now)
scanResult.Status.General.Errors = utils.PointerTo([]string{
fmt.Sprintf("failed to wait for scanner to finish graceful shutdown on abort after: %s", w.abortTimeout),
})

scanResultPatch := models.TargetScanResult{
Status: scanResult.Status,
}
err := w.backend.PatchScanResult(ctx, scanResultPatch, scanResultID)
if err != nil {
return fmt.Errorf("failed to update ScanResult. ScanResult=%s: %w", scanResultID, err)
}

return nil
}