Skip to content

Commit

Permalink
feat: Argo CD mixin v1
Browse files Browse the repository at this point in the history
  • Loading branch information
adinhodovic committed Nov 5, 2023
0 parents commit 6b32a08
Show file tree
Hide file tree
Showing 24 changed files with 4,812 additions and 0 deletions.
83 changes: 83 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
---
name: ci
on:
push:
branches:
- main
pull_request:
branches:
- main
env:
golang-version: '1.17'
jobs:
generate:
runs-on: ubuntu-latest
name: Generate yaml
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make generate && git diff --exit-code

jsonnet-lint:
runs-on: ubuntu-latest
name: Jsonnet linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make jsonnet-lint

dashboards-lint:
runs-on: ubuntu-latest
name: Grafana dashboard linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make dashboards-lint

alerts-lint:
runs-on: ubuntu-latest
name: Alerts linter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make alerts-lint

fmt:
runs-on: ubuntu-latest
name: Jsonnet formatter
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make fmt && git diff --exit-code

unit-tests:
runs-on: ubuntu-latest
name: Unit tests
steps:
- uses: actions/checkout@v2
with:
persist-credentials: false
- uses: actions/setup-go@v2
with:
go-version: ${{ env.golang-version }}
- run: make --always-make test
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
vendor
tmp
jsonnetfile.lock.json
./dashboards_out/lint
4 changes: 4 additions & 0 deletions .lint
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
exclusions:
template-job-rule:
panel-job-instance-rule:
target-rate-interval-rule:
78 changes: 78 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
BIN_DIR ?= $(shell pwd)/tmp/bin

JSONNET_VENDOR=vendor
GRAFANA_DASHBOARD_LINTER_BIN=$(BIN_DIR)/dashboard-linter
JB_BIN=$(BIN_DIR)/jb
JSONNET_BIN=$(BIN_DIR)/jsonnet
JSONNETLINT_BIN=$(BIN_DIR)/jsonnet-lint
JSONNETFMT_BIN=$(BIN_DIR)/jsonnetfmt
PROMTOOL_BIN=$(BIN_DIR)/promtool
TOOLING=$(JB_BIN) $(JSONNETLINT_BIN) $(JSONNET_BIN) $(JSONNETFMT_BIN) $(PROMTOOL_BIN) $(GRAFANA_DASHBOARD_LINTER_BIN)
JSONNETFMT_ARGS=-n 2 --max-blank-lines 2 --string-style s --comment-style s
SRC_DIR ?=dashboards
OUT_DIR ?=dashboards_out

.PHONY: all
all: fmt generate lint test

.PHONY: generate
generate: prometheus_alerts.yaml $(OUT_DIR) # prometheus_rules.yaml

$(JSONNET_VENDOR): $(JB_BIN) jsonnetfile.json
$(JB_BIN) install

.PHONY: fmt
fmt: $(JSONNETFMT_BIN)
find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETFMT_BIN) $(JSONNETFMT_ARGS) -i

prometheus_alerts.yaml: $(JSONNET_BIN) mixin.libsonnet lib/alerts.jsonnet alerts/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/alerts.jsonnet > $@

prometheus_rules.yaml: $(JSONNET_BIN) mixin.libsonnet lib/rules.jsonnet rules/*.libsonnet
@$(JSONNET_BIN) -J vendor -S lib/rules.jsonnet > $@

$(OUT_DIR): $(JSONNET_BIN) $(JSONNET_VENDOR) mixin.libsonnet lib/dashboards.jsonnet $(SRC_DIR)/*.libsonnet
@mkdir -p $(OUT_DIR)
@$(JSONNET_BIN) -J vendor -m $(OUT_DIR) lib/dashboards.jsonnet

.PHONY: lint
lint: jsonnet-lint alerts-lint dashboards-lint

.PHONY: jsonnet-lint
jsonnet-lint: $(JSONNETLINT_BIN) $(JSONNET_VENDOR)
@find . -name 'vendor' -prune -o -name '*.libsonnet' -print -o -name '*.jsonnet' -print | \
xargs -n 1 -- $(JSONNETLINT_BIN) -J vendor


.PHONY: alerts-lint
alerts-lint: $(PROMTOOL_BIN) prometheus_alerts.yaml # prometheus_rules.yaml
# @$(PROMTOOL_BIN) check rules prometheus_rules.yaml
@$(PROMTOOL_BIN) check rules prometheus_alerts.yaml

$(OUT_DIR)/.lint: $(OUT_DIR)
@cp .lint $@

.PHONY: dashboards-lint
dashboards-lint: $(GRAFANA_DASHBOARD_LINTER_BIN) $(OUT_DIR)/.lint
# Replace rates var with $$__rate_interval to make dashboard-linter happy.
@sed -i -e 's/1w/$$__rate_interval/g' $(OUT_DIR)/*.json
@find $(OUT_DIR) -name '*.json' -print0 | xargs -n 1 -0 $(GRAFANA_DASHBOARD_LINTER_BIN) lint --strict


.PHONY: clean
clean:
# Remove all files and directories ignored by git.
git clean -Xfd .

.PHONY: test
test: $(PROMTOOL_BIN) prometheus_alerts.yaml # prometheus_rules.yaml
@$(PROMTOOL_BIN) test rules tests.yaml

$(BIN_DIR):
mkdir -p $(BIN_DIR)

$(TOOLING): $(BIN_DIR)
@echo Installing tools from hack/tools.go
@cd scripts && go list -mod=mod -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o $(BIN_DIR) %

66 changes: 66 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Prometheus Monitoring Mixin for Django

A set of Grafana dashboards and Prometheus alerts for Django. [Blog post](https://hodovi.cc/blog/django-monitoring-with-prometheus-and-grafana/) covering Django monitoring with Prometheus and Grafana.

## Dashboards

- [Django Overview](https://grafana.com/grafana/dashboards/17617-django-overview/) - Django Overview, a simple overview of the database, cache and requests.

![django-overview](images/django-overview.png)

- [Django Requests Overview](https://grafana.com/grafana/dashboards/17616-django-requests-overview/) - Django request overview, providing insights of all requests filterable by view and method. Separate graphs for app and admin views, has as well weekly breakdowns for top templates, top exceptions by type, top exceptions by view and top responses by view.

![django-requests-overview](images/django-requests-overview.png)

- [Django Requests by View](https://grafana.com/grafana/dashboards/17613-django-requests-by-view/) - Django requests by view, a breakdown of requests by view that shows compute expensive metrics as latency buckets alongside requests, responses and status codes.

![django-requests-by-view](images/django-requests-by-view.png)

There are also generated dashboards in the `./dashboards_out` directory.

## How to use

This mixin is designed to be vendored into the repo with your infrastructure config.
To do this, use [jsonnet-bundler](https://github.com/jsonnet-bundler/jsonnet-bundler):

You then have three options for deploying your dashboards

1. Generate the config files and deploy them yourself
2. Use jsonnet to deploy this mixin along with Prometheus and Grafana
3. Use prometheus-operator to deploy this mixin

Or import the dashboard using json in `./dashboards_out`, alternatively import them from the `Grafana.com` dashboard page.

## Generate config files

You can manually generate the alerts, dashboards and rules files, but first you
must install some tools:

```sh
go get github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb
brew install jsonnet
```

Then, grab the mixin and its dependencies:

```sh
git clone https://github.com/danihodovic/django-exporter
cd django-exporter/django-mixin
jb install
```

Finally, build the mixin:

```sh
make prometheus-alerts.yaml
make dashboards_out
```

The `prometheus-alerts.yaml` file then need to passed
to your Prometheus server, and the files in `dashboards_out` need to be imported
into you Grafana server. The exact details will depending on how you deploy your
monitoring stack.

## Alerts

The mixin follows the [monitoring-mixins guidelines](https://github.com/monitoring-mixins/docs#guidelines-for-alert-names-labels-and-annotations) for alerts.
118 changes: 118 additions & 0 deletions alerts/alerts.libsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
prometheusAlerts+:: {
groups+: [
{
name: 'django',
rules: [
{
alert: 'DjangoMigrationsUnapplied',
expr: |||
sum(
django_migrations_unapplied_total{
%(djangoSelector)s
}
) by (namespace, job)
> 0
||| % $._config,
labels: {
severity: 'warning',
},
'for': '15m',
annotations: {
summary: 'Django has unapplied migrations.',
description: 'The job {{ $labels.job }} has unapplied migrations.',
dashboard_url: $._config.overviewDashboardUrl + '?var-job={{ $labels.job }}',
},
},
{
alert: 'DjangoDatabaseException',
expr: |||
sum (
increase(
django_db_errors_total{
%(djangoSelector)s
}[10m]
)
) by (type, namespace, job)
> 0
||| % $._config,
labels: {
severity: 'info',
},
annotations: {
summary: 'Django database exception.',
description: 'The job {{ $labels.job }} has hit the database exception {{ $labels.type }}.',
dashboard_url: $._config.overviewDashboardUrl + '?var-job={{ $labels.job }}',
},
},
{
alert: 'DjangoHighHttp4xxErrorRate',
expr: |||
sum(
rate(
django_http_responses_total_by_status_view_method_total{
%(djangoSelector)s,
status=~"^4.*",
view!~"%(djangoIgnoredViews)s"
}[%(django4xxInterval)s]
)
) by (namespace, job, view)
/
sum(
rate(
django_http_responses_total_by_status_view_method_total{
%(djangoSelector)s,
view!~"%(djangoIgnoredViews)s"
}[%(django4xxInterval)s]
)
) by (namespace, job, view)
* 100 > %(django4xxThreshold)s
||| % $._config,
'for': '1m',
annotations: {
summary: 'Django high HTTP 4xx error rate.',
description: 'More than %(django4xxThreshold)s%% HTTP requests with status 4xx for {{ $labels.job }}/{{ $labels.view }} the past %(django4xxInterval)s.' % $._config,
dashboard_url: $._config.requestsByViewDashboardUrl + '?var-job={{ $labels.job }}&var-view={{ $labels.view }}',
},
labels: {
severity: $._config.django4xxSeverity,
},
},
{
alert: 'DjangoHighHttp5xxErrorRate',
expr: |||
sum(
rate(
django_http_responses_total_by_status_view_method_total{
%(djangoSelector)s,
status=~"^5.*",
view!~"%(djangoIgnoredViews)s"
}[%(django5xxInterval)s]
)
) by (namespace, job, view)
/
sum(
rate(
django_http_responses_total_by_status_view_method_total{
%(djangoSelector)s,
view!~"%(djangoIgnoredViews)s"
}[%(django5xxInterval)s]
)
) by (namespace, job, view)
* 100 > %(django5xxThreshold)s
||| % $._config,
'for': '1m',
annotations: {
summary: 'Django high HTTP 5xx error rate.',
description: 'More than %(django5xxThreshold)s%% HTTP requests with status 5xx for {{ $labels.job }}/{{ $labels.view }} the past %(django5xxInterval)s.' % $._config,
dashboard_url: $._config.requestsByViewDashboardUrl + '?var-job={{ $labels.job }}&var-view={{ $labels.view }}',
},
labels: {
severity: $._config.django5xxSeverity,
},
},
],
},
],
},
}
Loading

0 comments on commit 6b32a08

Please sign in to comment.