Skip to content

Commit

Permalink
Added the alert for domain down
Browse files Browse the repository at this point in the history
  • Loading branch information
Aman Shah authored and ashish1099 committed Nov 18, 2024
1 parent 5ff03b4 commit 7c06efc
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
13 changes: 13 additions & 0 deletions argocd-helm-charts/prometheus-linuxaid/rules/domain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
groups:
- name: monitor::domains
rules:
- alert: monitor::domains::status
expr: |
probe_http_status_code != 0 and probe_http_content_length > 0
and on(certname) obmondo_monitoring{alert_id="monitor::domains::status"} > 0
labels:
severity: critical
alert_id: monitor::domains::status
annotations:
summary: "For server **{{ $labels.certname }}**,this **{{ $labels.domain }}** domain is down"
description: Domain {{ $labels.domain }} is down for certname {{ $labels.certname }}. Please fix this.
28 changes: 28 additions & 0 deletions argocd-helm-charts/prometheus-linuxaid/tests/domain.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
evaluation_interval: 12h

rule_files:
- ../rules/domain.yaml

tests:
- interval: 12h
input_series:
- series: obmondo_monitoring{certname="dev01.example", alert_id="monitor::domains::status"}
values: 1x1000
- series: probe_http_status_code{domain="example.com",certname="dev01.example"}
values: 200x100
- series: probe_http_content_length{domain="example.com",certname="dev01.example"}
values: 150x100

alert_rule_test:
- alertname: monitor::domains::status
eval_time: 24h
exp_alerts:
- exp_labels:
severity: critical
certname: dev01.example
alert_id: monitor::domains::status
domain: example.com
exp_annotations:
summary: "For server **dev01.example**,this **example.com** domain is down"
description: Domain example.com is down for certname dev01.example. Please fix this.

0 comments on commit 7c06efc

Please sign in to comment.