Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configsync_sync_generation metric resource tag #763

Merged
merged 1 commit into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 122 additions & 99 deletions e2e/nomostest/prometheus_metrics.go

Large diffs are not rendered by default.

8 changes: 3 additions & 5 deletions e2e/testcases/cluster_selectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,17 +853,15 @@ func TestClusterSelectorAnnotationConflicts(t *testing.T) {
nt.Must(nt.RootRepos[configsync.RootSyncName].CommitAndPush("Add both cluster selector annotations to a role binding"))
nt.WaitForRootSyncSourceError(configsync.RootSyncName, selectors.ClusterSelectorAnnotationConflictErrorCode, "")

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncNN := nomostest.RootSyncNN(configsync.RootSyncName)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Source: 1,
}))
if err != nil {
Expand Down
8 changes: 2 additions & 6 deletions e2e/testcases/custom_resource_definitions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/e2e/nomostest/testpredicates"
"kpt.dev/configsync/e2e/nomostest/testwatcher"
"kpt.dev/configsync/pkg/api/configmanagement"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/importer/analyzer/validation/nonhierarchical"
"kpt.dev/configsync/pkg/kinds"
Expand Down Expand Up @@ -80,17 +79,14 @@ func mustRemoveCustomResourceWithDefinition(nt *nomostest.NT, crd client.Object)

nt.WaitForRootSyncSourceError(configsync.RootSyncName, nonhierarchical.UnsupportedCRDRemovalErrorCode, "")

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Source: 1,
}))
if err != nil {
Expand Down
16 changes: 5 additions & 11 deletions e2e/testcases/custom_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/e2e/nomostest/testpredicates"
"kpt.dev/configsync/e2e/nomostest/testwatcher"
"kpt.dev/configsync/pkg/api/configmanagement"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/kinds"
"kpt.dev/configsync/pkg/status"
Expand Down Expand Up @@ -119,17 +118,14 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1Beta1(t *testing.T) {

nt.WaitForRootSyncSourceError(configsync.RootSyncName, status.UnknownKindErrorCode, "")

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Source: 1,
}))
if err != nil {
Expand Down Expand Up @@ -234,20 +230,18 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {

nt.WaitForRootSyncSourceError(configsync.RootSyncName, status.UnknownKindErrorCode, "")

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, firstCommitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, firstCommitHash, metrics.ErrorSummary{
// Remediator conflict after the first commit, because the declared
// Anvil was deleted by another client after successful sync.
Conflicts: 1,
}),
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, secondCommitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, secondCommitHash, metrics.ErrorSummary{
// No remediator conflict after the second commit, because the
// reconciler hasn't been updated with the latest declared resources,
// because there was a source error.
Expand Down
36 changes: 14 additions & 22 deletions e2e/testcases/invalid_git_branch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ import (
"kpt.dev/configsync/e2e/nomostest/metrics"
"kpt.dev/configsync/e2e/nomostest/ntopts"
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/pkg/api/configmanagement"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/core"
"kpt.dev/configsync/pkg/status"
"kpt.dev/configsync/pkg/testing/fake"
)
Expand All @@ -38,17 +36,15 @@ func TestInvalidRootSyncBranchStatus(t *testing.T) {

nt.WaitForRootSyncSourceError(configsync.RootSyncName, status.SourceErrorCode, "")

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncNN := nomostest.RootSyncNN(configsync.RootSyncName)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Source: 1,
}))
if err != nil {
Expand All @@ -69,10 +65,10 @@ func TestInvalidRootSyncBranchStatus(t *testing.T) {

func TestInvalidRepoSyncBranchStatus(t *testing.T) {
nt := nomostest.New(t, nomostesting.SyncSource, ntopts.NamespaceRepo(namespaceRepo, configsync.RepoSyncName))
nn := nomostest.RepoSyncNN(namespaceRepo, configsync.RepoSyncName)
rs := nomostest.RepoSyncObjectV1Beta1FromNonRootRepo(nt, nn)
rs.Spec.Branch = "invalid-branch"
nt.Must(nt.RootRepos[configsync.RootSyncName].Add(nomostest.StructuredNSPath(namespaceRepo, rs.Name), rs))
repoSyncNN := nomostest.RepoSyncNN(namespaceRepo, configsync.RepoSyncName)
repoSync := nomostest.RepoSyncObjectV1Beta1FromNonRootRepo(nt, repoSyncNN)
repoSync.Spec.Branch = "invalid-branch"
nt.Must(nt.RootRepos[configsync.RootSyncName].Add(nomostest.StructuredNSPath(namespaceRepo, repoSync.Name), repoSync))
nt.Must(nt.RootRepos[configsync.RootSyncName].CommitAndPush("Update RepoSync to invalid branch name"))

nt.WaitForRepoSyncSourceError(namespaceRepo, configsync.RepoSyncName, status.SourceErrorCode, "")
Expand All @@ -85,31 +81,27 @@ func TestInvalidRepoSyncBranchStatus(t *testing.T) {
nt.T.Fatal(err)
}

nsReconcilerName := core.NsReconcilerName(nn.Namespace, nn.Name)
nsReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nsReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
repoSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)
commitHash := nt.NonRootRepos[repoSyncNN].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
// Source error prevents apply, so don't wait for a sync with the current commit.
nomostest.ReconcilerErrorMetrics(nt, nsReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, repoSyncLabels, commitHash, metrics.ErrorSummary{
Source: 1,
}))
if err != nil {
nt.T.Fatal(err)
}

rs.Spec.Branch = gitproviders.MainBranch
nt.Must(nt.RootRepos[configsync.RootSyncName].Add(nomostest.StructuredNSPath(namespaceRepo, rs.Name), rs))
repoSync.Spec.Branch = gitproviders.MainBranch
nt.Must(nt.RootRepos[configsync.RootSyncName].Add(nomostest.StructuredNSPath(namespaceRepo, repoSync.Name), repoSync))
nt.Must(nt.RootRepos[configsync.RootSyncName].CommitAndPush("Update RepoSync to valid branch name"))

// Ensure RepoSync's active branch is checked out, so the correct commit is used for validation.
nt.Must(nt.NonRootRepos[nn].CheckoutBranch(gitproviders.MainBranch))
nt.Must(nt.NonRootRepos[repoSyncNN].CheckoutBranch(gitproviders.MainBranch))

if err := nt.WatchForAllSyncs(); err != nil {
nt.T.Fatal(err)
Expand All @@ -124,7 +116,7 @@ func TestInvalidRepoSyncBranchStatus(t *testing.T) {
}

err = nomostest.ValidateStandardMetricsForRepoSync(nt, metrics.Summary{
Sync: nn,
Sync: repoSyncNN,
ObjectCount: 0, // no additional managed objects
})
if err != nil {
Expand Down
31 changes: 11 additions & 20 deletions e2e/testcases/multi_sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/e2e/nomostest/testpredicates"
"kpt.dev/configsync/e2e/nomostest/testwatcher"
"kpt.dev/configsync/pkg/api/configmanagement"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/api/configsync/v1beta1"
"kpt.dev/configsync/pkg/applier"
Expand Down Expand Up @@ -243,6 +242,8 @@ func TestMultiSyncs_Unstructured_MixedControl(t *testing.T) {
validateReconcilerResource(nt, kinds.Secret(), map[string]string{metadata.SyncNamespaceLabel: testNs}, 5)
validateReconcilerResource(nt, kinds.Secret(), map[string]string{metadata.SyncNamespaceLabel: testNs2}, 1)
validateReconcilerResource(nt, kinds.Secret(), map[string]string{metadata.SyncNameLabel: nr1}, 2)

// TODO: validate sync-generation label
}

func validateReconcilerResource(nt *nomostest.NT, gvk schema.GroupVersionKind, labels map[string]string, expectedCount int) {
Expand Down Expand Up @@ -303,23 +304,19 @@ func TestConflictingDefinitions_RootToNamespace(t *testing.T) {
nt.WaitForRepoSyncSyncError(repoSyncNN.Namespace, repoSyncNN.Name, status.ManagementConflictErrorCode, "declared in another repository")

nt.T.Logf("Validate reconciler error metric is emitted from namespace reconciler %s", repoSyncNN)
nsReconcilerName := core.NsReconcilerName(repoSyncNN.Namespace, repoSyncNN.Name)
nsReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nsReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
repoSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
if err != nil {
nt.T.Fatal(err)
}

commitHash := nt.NonRootRepos[repoSyncNN].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
// ManagementConflictErrorWrap is recorded by the remediator, while
// KptManagementConflictError is recorded by the applier, but they have
// similar error messages. So while there should be a ReconcilerError
// metric, there might not be a LastSyncTimestamp with status=error.
// nomostest.ReconcilerSyncError(nt, nsReconcilerPod.Name, commitHash),
nomostest.ReconcilerErrorMetrics(nt, nsReconcilerPod.Name, commitHash, metrics.ErrorSummary{
// nomostest.ReconcilerSyncError(nt, repoSyncLabels, commitHash),
nomostest.ReconcilerErrorMetrics(nt, repoSyncLabels, commitHash, metrics.ErrorSummary{
Sync: 1,
}))
if err != nil {
Expand Down Expand Up @@ -437,18 +434,15 @@ func TestConflictingDefinitions_NamespaceToRoot(t *testing.T) {
}

// Validate reconciler error metric is emitted from namespace reconciler.
nsReconcilerName := core.NsReconcilerName(repoSyncNN.Namespace, repoSyncNN.Name)
nsReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nsReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
if err != nil {
nt.T.Fatal(err)
}
commitHash := nt.NonRootRepos[repoSyncNN].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerSyncError(nt, nsReconcilerPod.Name, commitHash),
nomostest.ReconcilerErrorMetrics(nt, nsReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerSyncError(nt, rootSyncLabels, commitHash),
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Sync: 1,
}))
if err != nil {
Expand Down Expand Up @@ -697,18 +691,15 @@ func TestConflictingDefinitions_NamespaceToNamespace(t *testing.T) {
nt.T.Fatal(err)
}
nt.T.Logf("Validate reconciler error metric is emitted from Namespace reconciler %s", repoSyncNN2)
nsReconciler2Name := core.NsReconcilerName(repoSyncNN2.Namespace, repoSyncNN2.Name)
nsReconciler2Pod, err := nt.KubeClient.GetDeploymentPod(
nsReconciler2Name, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
repoSync2Labels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN2)
if err != nil {
nt.T.Fatal(err)
}
commitHash := nt.NonRootRepos[repoSyncNN2].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerSyncError(nt, nsReconciler2Pod.Name, commitHash),
nomostest.ReconcilerErrorMetrics(nt, nsReconciler2Pod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerSyncError(nt, repoSync2Labels, commitHash),
nomostest.ReconcilerErrorMetrics(nt, repoSync2Labels, commitHash, metrics.ErrorSummary{
Sync: 1,
}))
if err != nil {
Expand Down
16 changes: 5 additions & 11 deletions e2e/testcases/namespace_repo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/e2e/nomostest/testpredicates"
"kpt.dev/configsync/e2e/nomostest/testwatcher"
"kpt.dev/configsync/pkg/api/configmanagement"
v1 "kpt.dev/configsync/pkg/api/configmanagement/v1"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/api/configsync/v1beta1"
Expand Down Expand Up @@ -345,6 +344,7 @@ func checkRepoSyncResourcesNotPresent(nt *nomostest.NT, namespace string, secret

func TestDeleteNamespaceReconcilerDeployment(t *testing.T) {
bsNamespace := "bookstore"
rootSyncNN := nomostest.RootSyncNN(configsync.RootSyncName)
repoSyncNN := nomostest.RepoSyncNN(bsNamespace, configsync.RepoSyncName)
nt := nomostest.New(
t,
Expand All @@ -353,7 +353,6 @@ func TestDeleteNamespaceReconcilerDeployment(t *testing.T) {
ntopts.WithCentralizedControl,
)

rootReconciler := core.RootReconcilerName(configsync.RootSyncName)
nsReconciler := core.NsReconcilerName(bsNamespace, configsync.RepoSyncName)

// Validate status condition "Reconciling" and Stalled is set to "False" after
Expand Down Expand Up @@ -391,27 +390,22 @@ func TestDeleteNamespaceReconcilerDeployment(t *testing.T) {
nt.T.Errorf("RepoSync did not finish reconciling: %v", err)
}

rootSyncReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
rootReconciler, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}
repoSyncReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nsReconciler, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
repoSyncLabels, err := nomostest.MetricLabelsForRepoSync(nt, repoSyncNN)
if err != nil {
nt.T.Fatal(err)
}

rootCommitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)
nnCommitHash := nt.NonRootRepos[repoSyncNN].MustHash(nt.T)

// Skip sync & ops metrics and just validate reconciler-manager and reconciler errors.
err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerManagerMetrics(nt),
nomostest.ReconcilerErrorMetrics(nt, rootSyncReconcilerPod.Name, rootCommitHash, metrics.ErrorSummary{}),
nomostest.ReconcilerErrorMetrics(nt, repoSyncReconcilerPod.Name, nnCommitHash, metrics.ErrorSummary{}))
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, rootCommitHash, metrics.ErrorSummary{}),
nomostest.ReconcilerErrorMetrics(nt, repoSyncLabels, nnCommitHash, metrics.ErrorSummary{}))
if err != nil {
nt.T.Fatal(err)
}
Expand Down
13 changes: 5 additions & 8 deletions e2e/testcases/namespaces_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"kpt.dev/configsync/e2e/nomostest/ntopts"
nomostesting "kpt.dev/configsync/e2e/nomostest/testing"
"kpt.dev/configsync/e2e/nomostest/testpredicates"
"kpt.dev/configsync/pkg/api/configmanagement"
"kpt.dev/configsync/pkg/api/configsync"
"kpt.dev/configsync/pkg/core"
"kpt.dev/configsync/pkg/kinds"
Expand Down Expand Up @@ -725,21 +724,19 @@ func TestDontDeleteAllNamespaces(t *testing.T) {
safetyNSObj := fake.NamespaceObject(nt.RootRepos[configsync.RootSyncName].SafetyNSName)
nt.MetricsExpectations.RemoveObject(configsync.RootSyncKind, rootSyncNN, safetyNSObj)

rootReconcilerPod, err := nt.KubeClient.GetDeploymentPod(
nomostest.DefaultRootReconcilerName, configmanagement.ControllerNamespace,
nt.DefaultWaitTimeout)
rootSyncLabels, err := nomostest.MetricLabelsForRootSync(nt, rootSyncNN)
if err != nil {
nt.T.Fatal(err)
}
commitHash := nt.RootRepos[configsync.RootSyncName].MustHash(nt.T)

err = nomostest.ValidateMetrics(nt,
nomostest.ReconcilerSyncError(nt, rootReconcilerPod.Name, commitHash),
nomostest.ReconcilerSourceMetrics(nt, rootReconcilerPod.Name, commitHash,
nomostest.ReconcilerSyncError(nt, rootSyncLabels, commitHash),
nomostest.ReconcilerSourceMetrics(nt, rootSyncLabels, commitHash,
nt.MetricsExpectations.ExpectedRootSyncObjectCount(configsync.RootSyncName)),
nomostest.ReconcilerOperationsMetrics(nt, rootReconcilerPod.Name,
nomostest.ReconcilerOperationsMetrics(nt, rootSyncLabels,
nt.MetricsExpectations.ExpectedRootSyncObjectOperations(configsync.RootSyncName)...),
nomostest.ReconcilerErrorMetrics(nt, rootReconcilerPod.Name, commitHash, metrics.ErrorSummary{
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, commitHash, metrics.ErrorSummary{
Sync: 1,
}))
if err != nil {
Expand Down
Loading