Skip to content
This repository has been archived by the owner on Jun 29, 2022. It is now read-only.

Commit

Permalink
Merge pull request #120 from kinvolk/surajssd/allow-scraping-node-exp…
Browse files Browse the repository at this point in the history
…orter

prometheus: Test if all the endpoints are scraped
  • Loading branch information
surajssd authored Apr 2, 2020
2 parents 9eb9f0e + 0603616 commit 49e8346
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ resource "aws_security_group_rule" "controller-node-exporter" {
source_security_group_id = aws_security_group.worker.id
}

# Allow Prometheus to scrape kube-proxy.
resource "aws_security_group_rule" "kube-proxy-metrics" {
security_group_id = aws_security_group.controller.id

type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
source_security_group_id = aws_security_group.worker.id
}

# Allow apiserver to access kubelets for exec, log, port-forward
resource "aws_security_group_rule" "controller-kubelet" {
security_group_id = aws_security_group.controller.id
Expand Down Expand Up @@ -211,6 +222,17 @@ resource "aws_security_group_rule" "worker-node-exporter" {
self = true
}

# Allow Prometheus to scrape kube-proxy.
resource "aws_security_group_rule" "worker-kube-proxy" {
security_group_id = aws_security_group.worker.id

type = "ingress"
protocol = "tcp"
from_port = 10249
to_port = 10249
self = true
}

resource "aws_security_group_rule" "ingress-health" {
security_group_id = aws_security_group.worker.id

Expand Down
4 changes: 2 additions & 2 deletions pkg/assets/generated_assets.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion test/monitoring/components_alerts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func testComponentAlerts(t *testing.T, v1api v1.API) {
}
}

func getComponentAlertRetryFunc(t *testing.T, v1api v1.API, tc alertTestCase) func() (done bool, err error) {
func getComponentAlertRetryFunc(t *testing.T, v1api v1.API, tc alertTestCase) wait.ConditionFunc {
return func() (done bool, err error) {
ctx, cancel := context.WithTimeout(context.Background(), contextTimeout*time.Second)
defer cancel()
Expand Down
4 changes: 4 additions & 0 deletions test/monitoring/monitoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ func TestPrometheus(t *testing.T) {
Name: "ComponentAlerts",
Func: testComponentAlerts,
},
{
Name: "ScrapeTargetReachability",
Func: testScrapeTargetRechability,
},
}

// Invoke the test functions passing them the test object and the prometheus client.
Expand Down
89 changes: 89 additions & 0 deletions test/monitoring/scrape_targets_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright 2020 The Lokomotive Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +build aws packet
// +build poste2e

package monitoring

import (
"context"
"fmt"
"os"
"testing"
"text/tabwriter"
"time"

v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"k8s.io/apimachinery/pkg/util/wait"
)

func testScrapeTargetRechability(t *testing.T, v1api v1.API) {
var w *tabwriter.Writer

if err := wait.PollImmediate(
retryInterval, timeout, getScrapeTargetRetryFunc(t, v1api, w),
); err != nil {
t.Errorf("%v", err)

// Finally print the table of all the targets that are down.
if err := w.Flush(); err != nil {
t.Errorf("error printing the unreachable targets: %v", err)
}
}
}

func getScrapeTargetRetryFunc(t *testing.T, v1api v1.API, w *tabwriter.Writer) wait.ConditionFunc {
return func() (done bool, err error) {
ctx, cancel := context.WithTimeout(context.Background(), contextTimeout*time.Second)
defer cancel()

targets, err := v1api.Targets(ctx)
if err != nil {
return false, fmt.Errorf("error listing targets from prometheus: %v", err)
}

// Initialize the tabwriter to print the output in tabular format.
w = new(tabwriter.Writer)
w.Init(os.Stdout, 16, 8, 2, '\t', 0)
fmt.Fprintf(w, "\n")
fmt.Fprintf(w, "Service\tHealth\n")
fmt.Fprintf(w, "-------\t------\n")

// Boolean used to identify if tests failed.
var testsFailed bool

for _, target := range targets.Active {
if target.Health == v1.HealthGood {
continue
}

// This variable marks that the test has failed but we don't return from here because we
// need the list of all the targets that are not in UP state.
testsFailed = true

fmt.Fprintf(w, "%s/%s\t%s\n",
target.Labels["namespace"], target.Labels["service"], target.Health)
}

fmt.Fprintf(w, "\n")

if testsFailed {
t.Logf("Some prometheus scrape targets are down. Retrying ...")
return false, nil
}

return true, nil
}
}

0 comments on commit 49e8346

Please sign in to comment.