Skip to content

Commit

Permalink
WIP resolve test issues
Browse files Browse the repository at this point in the history
- Update TestConflictingDefinitions_NamespaceToRoot to expect
  a management conflict from the remediator, and for the
  remediator's conflict to replace the applier's conflict.
- De-dupe management conflict errors from remediator & applier.
- Fix a bug in the remediator that was dropping management
  conflict errors when using a watch filter. With the filter,
  changing the filter labels causes a DELETE event, instead of
  an UPDATE event, like it did before. The only way to
  double-check is to query the cluster, and we don't want to do
  that in the watch handler for performance reasons. So instead,
  update the remediator queue worker to query the cluster to
  confirm object deletion. Then also update the worker to be able
  to add management conflict errors to the same conflictHandler
  used by the watcher.
- Add HasConflictErrors() to the conflictHandler to replace the
  locked managementConflict bool in the watch.Manager. This will
  cause the retry run loop to re-apply if there's still a conflict.
  Is this desired behavior? Or do we only want to re-apply the
  first time? If so, we may need to cache the errors in the run
  loop so we can detect when new conflicts are added. Or maybe
  just expose the whole conflictHandler.
- Clean up parse.Run a little. Add resetRetryBackoff method to
  avoid leaking implementation details to the rest of the parser.
- Remove obsolete retry setting, since it wasn't exposed and was
  previously replaced with a backoff strategy.
- Fix the reconciler container CPU calculation to use milli-cpu.
- Add ConflictingObjectID to ManagementConflictError to allow
  de-duping by ID.
- Fix KptManagementConflictError to return a ManagementConflictError,
  not just a ResourceError. The currentManager will be UNKNOWN,
  but it only seems to be used by reportRootSyncConflicts, which
  only reports conflicts from the remediator, not the applier.
  • Loading branch information
karlkfi committed Apr 18, 2024
1 parent c6cba74 commit cd8b14b
Show file tree
Hide file tree
Showing 27 changed files with 371 additions and 255 deletions.
1 change: 0 additions & 1 deletion cmd/reconciler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ func main() {
ReconcilerScope: declared.Scope(*scope),
ResyncPeriod: *resyncPeriod,
PollingPeriod: *pollingPeriod,
RetryPeriod: configsync.DefaultReconcilerRetryPeriod,
StatusUpdatePeriod: configsync.DefaultReconcilerSyncStatusUpdatePeriod,
SourceRoot: absSourceDir,
RepoRoot: absRepoRoot,
Expand Down
109 changes: 49 additions & 60 deletions e2e/testcases/multi_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,12 @@ func TestConflictingDefinitions_RootToNamespace(t *testing.T) {
ntopts.RepoSyncPermissions(policy.RBACAdmin()), // NS Reconciler manages Roles
)

// Increase reconciler logging
rootSyncObj := fake.RootSyncObjectV1Beta1(rootSyncNN.Name)
nt.Must(nt.KubeClient.MergePatch(rootSyncObj, `{"spec": {"override": {"logLevels": [{"containerName": "reconciler", "logLevel": 5}]}}}`))
repoSyncObj := fake.RepoSyncObjectV1Beta1(repoSyncNN.Namespace, repoSyncNN.Name)
nt.Must(nt.KubeClient.MergePatch(repoSyncObj, `{"spec": {"override": {"logLevels": [{"containerName": "reconciler", "logLevel": 5}]}}}`))

podRoleFilePath := fmt.Sprintf("acme/namespaces/%s/pod-role.yaml", testNs)
nt.T.Logf("Add a Role to root: %s", configsync.RootSyncName)
roleObj := rootPodRole()
Expand Down Expand Up @@ -395,123 +401,106 @@ func TestConflictingDefinitions_NamespaceToRoot(t *testing.T) {
ntopts.RepoSyncPermissions(policy.RBACAdmin()), // NS reconciler manages Roles
)

// Increase reconciler logging
rootSyncObj := fake.RootSyncObjectV1Beta1(rootSyncNN.Name)
nt.Must(nt.KubeClient.MergePatch(rootSyncObj, `{"spec": {"override": {"logLevels": [{"containerName": "reconciler", "logLevel": 5}]}}}`))
repoSyncObj := fake.RepoSyncObjectV1Beta1(repoSyncNN.Namespace, repoSyncNN.Name)
nt.Must(nt.KubeClient.MergePatch(repoSyncObj, `{"spec": {"override": {"logLevels": [{"containerName": "reconciler", "logLevel": 5}]}}}`))

podRoleFilePath := fmt.Sprintf("acme/namespaces/%s/pod-role.yaml", testNs)
nt.T.Logf("Add a Role to Namespace repo: %s", configsync.RootSyncName)
nt.T.Logf("Add a Role to Namespace repo: %v", repoSyncNN)
nsRoleObj := namespacePodRole()
nt.Must(nt.NonRootRepos[repoSyncNN].Add(podRoleFilePath, nsRoleObj))
nt.Must(nt.NonRootRepos[repoSyncNN].CommitAndPush("declare Role"))
if err := nt.WatchForAllSyncs(); err != nil {
nt.T.Fatal(err)
}
nt.Must(nt.WatchForAllSyncs())

err := nt.Validate("pods", testNs, &rbacv1.Role{},
// Validate Role is managed by the RepoSync reconciler
nt.Must(nt.Validate(nsRoleObj.Name, nsRoleObj.Namespace, &rbacv1.Role{},
roleHasRules(nsRoleObj.Rules),
testpredicates.IsManagedBy(nt.Scheme, declared.Scope(repoSyncNN.Namespace), repoSyncNN.Name))
if err != nil {
nt.T.Fatal(err)
}
testpredicates.IsManagedBy(nt.Scheme, declared.Scope(repoSyncNN.Namespace), repoSyncNN.Name)))

// Test Role managed by the RepoSync, NOT the RootSync
nt.MetricsExpectations.AddObjectApply(configsync.RepoSyncKind, repoSyncNN, nsRoleObj)

// Validate no errors from root reconciler.
err = nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
nt.Must(nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
Sync: rootSyncNN,
})
if err != nil {
nt.T.Fatal(err)
}
}))

// Validate no errors from namespace reconciler.
err = nomostest.ValidateStandardMetricsForRepoSync(nt, metrics.Summary{
nt.Must(nomostest.ValidateStandardMetricsForRepoSync(nt, metrics.Summary{
Sync: repoSyncNN,
})
if err != nil {
nt.T.Fatal(err)
}
}))

nt.T.Logf("Declare a conflicting Role in the Root repo: %s", configsync.RootSyncName)
rootRoleObj := rootPodRole()
nt.Must(nt.RootRepos[configsync.RootSyncName].Add(podRoleFilePath, rootRoleObj))
nt.Must(nt.RootRepos[configsync.RootSyncName].CommitAndPush("add conflicting pod role to Root"))

nt.T.Logf("The RootSync should update the Role")
err = nt.WatchForAllSyncs(nomostest.RootSyncOnly())
if err != nil {
nt.T.Fatal(err)
}
nt.Must(nt.WatchForAllSyncs(nomostest.RootSyncOnly()))

// The RepoSync remediator won't try to revert a change to a managed object
// if it was adopted by a RepoSync reconciler.
// So without a new commit or pod replacement, the reconciler won't
// automatically re-sync until the ~1hr resync period expires.
// nt.T.Logf("Push a new commit with an empty file to trigger re-sync by the RepoSync reconciler")
// nt.Must(nt.NonRootRepos[repoSyncNN].AddFile("acme/empty.txt", []byte{}))
// nt.Must(nt.NonRootRepos[repoSyncNN].CommitAndPush("declare Role"))

nt.T.Logf("The RepoSync %s reports a problem since it can't sync the declaration.", testNs)
nt.WaitForRepoSyncSyncError(repoSyncNN.Namespace, repoSyncNN.Name, status.ManagementConflictErrorCode, "declared in another repository")

nt.MetricsExpectations.AddObjectApply(configsync.RootSyncKind, rootSyncNN, rootRoleObj)

// Validate no errors from root reconciler.
err = nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
nt.Must(nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
Sync: rootSyncNN,
})
if err != nil {
nt.T.Fatal(err)
}
}))

// Validate reconciler error metric is emitted from namespace reconciler.
rootSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
repoSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
if err != nil {
nt.T.Fatal(err)
}
commitHash := nt.NonRootRepos[repoSyncNN].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerSyncError(nt, rootSyncLabels, commitHash),
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Sync: 1,
}))
if err != nil {
nt.T.Fatal(err)
}
nt.Must(nomostest.ValidateMetrics(nt,
nomostest.ReconcilerSyncError(nt, repoSyncLabels, commitHash),
nomostest.ReconcilerErrorMetrics(nt, repoSyncLabels, commitHash, metrics.ErrorSummary{
Conflicts: 1, // from the remediator
Sync: 1, // remediator conflict error replaces applier conflict error
})))

nt.T.Logf("Ensure the Role matches the one in the Root repo %s", configsync.RootSyncName)
err = nt.Validate("pods", testNs, &rbacv1.Role{},
nt.Must(nt.Validate(nsRoleObj.Name, nsRoleObj.Namespace, &rbacv1.Role{},
roleHasRules(rootPodRole().Rules),
testpredicates.IsManagedBy(nt.Scheme, declared.RootReconciler, configsync.RootSyncName))
if err != nil {
nt.T.Fatal(err)
}
testpredicates.IsManagedBy(nt.Scheme, declared.RootReconciler, configsync.RootSyncName)))

nt.T.Logf("Remove the Role from the Namespace repo %s", repoSyncNN)
nt.Must(nt.NonRootRepos[repoSyncNN].Remove(podRoleFilePath))
nt.Must(nt.NonRootRepos[repoSyncNN].CommitAndPush("remove conflicting pod role from Namespace repo"))
if err := nt.WatchForAllSyncs(); err != nil {
nt.T.Fatal(err)
}
nt.Must(nt.WatchForAllSyncs())

nt.T.Logf("Ensure the Role still matches the one in the Root repo %s", configsync.RootSyncName)
err = nt.Validate("pods", testNs, &rbacv1.Role{},
nt.Must(nt.Validate(nsRoleObj.Name, nsRoleObj.Namespace, &rbacv1.Role{},
roleHasRules(rootPodRole().Rules),
testpredicates.IsManagedBy(nt.Scheme, declared.RootReconciler, configsync.RootSyncName))
if err != nil {
nt.T.Fatal(err)
}
testpredicates.IsManagedBy(nt.Scheme, declared.RootReconciler, configsync.RootSyncName)))

// Test Role managed by the RootSync, NOT the RepoSync
nt.MetricsExpectations.AddObjectApply(configsync.RootSyncKind, rootSyncNN, rootRoleObj)
// RepoSync won't delete the object, because it doesn't own it, due to the AdoptIfNoInventory strategy.
nt.MetricsExpectations.RemoveObject(configsync.RepoSyncKind, repoSyncNN, nsRoleObj)

// Validate no errors from root reconciler.
err = nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
nt.Must(nomostest.ValidateStandardMetricsForRootSync(nt, metrics.Summary{
Sync: rootSyncNN,
})
if err != nil {
nt.T.Fatal(err)
}
}))

// Validate no errors from namespace reconciler.
err = nomostest.ValidateStandardMetricsForRepoSync(nt, metrics.Summary{
nt.Must(nomostest.ValidateStandardMetricsForRepoSync(nt, metrics.Summary{
Sync: repoSyncNN,
})
if err != nil {
nt.T.Fatal(err)
}
}))
}

func TestConflictingDefinitions_RootToRoot(t *testing.T) {
Expand Down
6 changes: 0 additions & 6 deletions pkg/api/configsync/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,6 @@ const (
// from source (even without a new commit).
DefaultReconcilerResyncPeriod = time.Hour

// DefaultReconcilerRetryPeriod is the time delay between polling the
// filesystem for source updates to sync, when the previous attempt errored.
// Note: This retry period is also used for watch updates.
// TODO: replace with retry-backoff strategy
DefaultReconcilerRetryPeriod = time.Second

// DefaultReconcilerSyncStatusUpdatePeriod is the time delay between async
// status updates by the reconciler. These updates report new management
// conflict errors from the remediator, if there are any.
Expand Down
9 changes: 6 additions & 3 deletions pkg/applier/management_conflict_err.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package applier

import (
"kpt.dev/configsync/pkg/core"
"kpt.dev/configsync/pkg/metadata"
"kpt.dev/configsync/pkg/status"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -23,10 +24,12 @@ import (
// KptManagementConflictError indicates that the passed resource is illegally
// declared in multiple repositories.
// TODO: merge with status.ManagementConflictError if cli-utils supports reporting the conflicting manager in InventoryOverlapError.
func KptManagementConflictError(resource client.Object) status.Error {
func KptManagementConflictError(resource client.Object) status.ManagementConflictError {
newManager := core.GetAnnotation(resource, metadata.ResourceManagerKey)
currentManager := "UNKNOWN"
return status.ManagementConflictErrorBuilder.
Sprintf("The %q reconciler cannot manage resources declared in another repository. "+
"Remove the declaration for this resource from either the current repository, or the managed repository.",
resource.GetAnnotations()[metadata.ResourceManagerKey]).
BuildWithResources(resource)
newManager).
BuildWithConflictingManagers(resource, newManager, currentManager)
}
10 changes: 6 additions & 4 deletions pkg/diff/precedence.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ func CanManage(scope declared.Scope, syncName string, obj client.Object, op admi
}
oldManager := core.GetAnnotation(obj, metadata.ResourceManagerKey)
newManager := declared.ResourceManager(scope, syncName)
reconciler, _ := reconcilerName(newManager)
reconciler, _ := ReconcilerNameFromManager(newManager)
err := ValidateManager(reconciler, oldManager, core.IDOf(obj), op)
if err != nil {
klog.V(3).Infof("diff.CanManage? %v", err)
klog.V(1).Infof("diff.CanManage? %v", err)
return false
}
return true
Expand All @@ -89,7 +89,7 @@ func ValidateManager(reconciler, manager string, id core.ID, op admissionv1.Oper
return nil
}

oldReconciler, syncScope := reconcilerName(manager)
oldReconciler, syncScope := ReconcilerNameFromManager(manager)

if err := declared.ValidateScope(string(syncScope)); err != nil {
// All managers are allowed to manage an object with an invalid manager.
Expand Down Expand Up @@ -145,7 +145,9 @@ func ValidateManager(reconciler, manager string, id core.ID, op admissionv1.Oper
return nil
}

func reconcilerName(manager string) (string, declared.Scope) {
// ReconcilerNameFromManager parses the manager annotation value and returns the
// reconciler deployment name and scope.
func ReconcilerNameFromManager(manager string) (string, declared.Scope) {
syncScope, syncName := declared.ManagerScopeAndName(manager)
var reconciler string
if syncScope == declared.RootReconciler {
Expand Down
1 change: 1 addition & 0 deletions pkg/parse/annotations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func TestAddAnnotationsAndLabels(t *testing.T) {
expected: []ast.FileObject{fake.Role(
core.Namespace("foo"),
core.Label(metadata.ManagedByKey, metadata.ManagedByValue),
core.Label(metadata.OwningInventoryKey, applier.InventoryID("rs", "some-namespace")),
core.Annotation(metadata.ResourceManagementKey, "enabled"),
core.Annotation(metadata.ResourceManagerKey, "some-namespace_rs"),
core.Annotation(metadata.SyncTokenAnnotationKey, "1234567"),
Expand Down
3 changes: 0 additions & 3 deletions pkg/parse/opts.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ type Options struct {
// (even without a new commit).
ResyncPeriod time.Duration

// RetryPeriod is how long the Parser waits between retries, after an error.
RetryPeriod time.Duration

// StatusUpdatePeriod is how long the Parser waits between updates of the
// sync status, to account for management conflict errors from the Remediator.
StatusUpdatePeriod time.Duration
Expand Down
Loading

0 comments on commit cd8b14b

Please sign in to comment.