Skip to content

Commit

Permalink
WIP: Direct paging improvements (#3064)
Browse files Browse the repository at this point in the history
# What this PR does
* Create Direct Paging integration (with default route) when team is
created with bulk_update
* Create notification policies when user is created with bulk_update
* If user notification policies are empty change it to Email
* Minor markup and wording improvements
* Add grafana queue to helm chart
* Remove disabled commands for redis helm chart
* Improve Dockerfile caching

## Which issue(s) this PR fixes

## Checklist

- [ ] Unit, integration, and e2e (if applicable) tests updated
- [ ] Documentation added (or `pr:no public docs` PR label added if not
required)
- [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not
required)
  • Loading branch information
iskhakov authored Sep 28, 2023
1 parent 9126f21 commit 5101473
Show file tree
Hide file tree
Showing 29 changed files with 318 additions and 186 deletions.
6 changes: 6 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Ignore everything
*

# Allow directories
!/engine
!/grafana-plugin
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix shifts for current user internal endpoint to return the right shift PK ([#3036](https://github.com/grafana/oncall/pull/3036))
- Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038))

### Added

- Create Direct Paging integration by default for every team, create default E-Mail notification policy for every user ([#3064](https://github.com/grafana/oncall/pull/3064))

## v1.3.37 (2023-09-12)

### Added
Expand Down
138 changes: 86 additions & 52 deletions Tiltfile
Original file line number Diff line number Diff line change
@@ -1,82 +1,116 @@
running_under_parent_tiltfile = os.getenv('TILT_PARENT', 'false') == 'true'
running_under_parent_tiltfile = os.getenv("TILT_PARENT", "false") == "true"
# The user/pass that you will login to Grafana with
grafana_admin_user_pass = os.getenv('GRAFANA_ADMIN_USER_PASS', 'oncall')
grafana_admin_user_pass = os.getenv("GRAFANA_ADMIN_USER_PASS", "oncall")
# HELM_PREFIX must be "oncall-dev" as it is hardcoded in dev/helm-local.yml
HELM_PREFIX="oncall-dev"
HELM_PREFIX = "oncall-dev"
# Use docker registery generated by ctlptl (dev/kind-config.yaml)
DOCKER_REGISTRY="localhost:63628/"
DOCKER_REGISTRY = "localhost:63628/"

if not running_under_parent_tiltfile:
# Load the custom Grafana extensions
v1alpha1.extension_repo(name='grafana-tilt-extensions',
ref='main',
url='https://github.com/grafana/tilt-extensions')
v1alpha1.extension(name='grafana', repo_name='grafana-tilt-extensions', repo_path='grafana')
v1alpha1.extension_repo(
name="grafana-tilt-extensions",
ref="main",
url="https://github.com/grafana/tilt-extensions",
)
v1alpha1.extension(
name="grafana", repo_name="grafana-tilt-extensions", repo_path="grafana"
)

load('ext://grafana', 'grafana')
load('ext://configmap', 'configmap_create')
load("ext://grafana", "grafana")
load("ext://configmap", "configmap_create")
load("ext://docker_build_sub", "docker_build_sub")

# Tell ops-devenv/Tiltifle where our plugin.json file lives
plugin_file = os.path.abspath('grafana-plugin/src/plugin.json')
plugin_file = os.path.abspath("grafana-plugin/src/plugin.json")


def plugin_json():
return plugin_file


allow_k8s_contexts(["kind-kind"])

docker_build(
"localhost:63628/oncall/engine:dev",
"./engine",
target = 'prod',
live_update=[
sync('./engine/', '/etc/app'),
run('cd /etc/app && pip install -r requirements.txt',
trigger='./engine/requirements.txt'),
]
local_resource("download-cache", cmd="docker pull grafana/oncall:latest; docker tag grafana/oncall localhost:63628/grafana/oncall:latest")

# Build the image including frontend folder for pytest
docker_build_sub(
"localhost:63628/oncall/engine:dev",
context="./engine",
cache_from="localhost:63628/grafana/oncall:latest",
# only=["./engine", "./grafana-plugin"],
ignore=["./grafana-plugin/test-results/", "./grafana-plugin/dist/", "./grafana-plugin/e2e-tests/"],
child_context=".",
target="dev",
extra_cmds=["ADD ./grafana-plugin/src/plugin.json /etc/grafana-plugin/src/plugin.json"],
live_update=[
sync("./engine/", "/etc/app"),
run(
"cd /etc/app && pip install -r requirements.txt",
trigger="./engine/requirements.txt",
),
],
)

# Build the plugin in the background
local_resource('build-ui',
labels=['OnCallUI'],
cmd='cd grafana-plugin && yarn install && yarn build:dev',
serve_cmd='cd grafana-plugin && ONCALL_API_URL=http://oncall-dev-engine:8080 yarn watch',
allow_parallel=True)
local_resource(
"build-ui",
labels=["OnCallUI"],
cmd="cd grafana-plugin && yarn install && yarn build:dev",
serve_cmd="cd grafana-plugin && ONCALL_API_URL=http://oncall-dev-engine:8080 yarn watch",
allow_parallel=True,
)

yaml = helm(
'helm/oncall',
name=HELM_PREFIX,
values=['./dev/helm-local.yml'])
yaml = helm("helm/oncall", name=HELM_PREFIX, values=["./dev/helm-local.yml"])

k8s_yaml(yaml)

# Generate and load the grafana deploy yaml
configmap_create('grafana-oncall-app-provisioning',
namespace='default',
from_file='dev/grafana/provisioning/plugins/grafana-oncall-app-provisioning.yaml')
configmap_create(
"grafana-oncall-app-provisioning",
namespace="default",
from_file="dev/grafana/provisioning/plugins/grafana-oncall-app-provisioning.yaml",
)

k8s_resource(objects=['grafana-oncall-app-provisioning:configmap'],
new_name='grafana-oncall-app-provisioning-configmap',
resource_deps = ['build-ui', 'engine'],
labels=['Grafana'])
k8s_resource(
objects=["grafana-oncall-app-provisioning:configmap"],
new_name="grafana-oncall-app-provisioning-configmap",
resource_deps=["build-ui", "engine"],
labels=["Grafana"],
)

# Use separate grafana helm chart
if not running_under_parent_tiltfile:
grafana(context='grafana-plugin',
plugin_files = ['grafana-plugin/src/plugin.json'],
namespace='default',
deps = ['grafana-oncall-app-provisioning-configmap', 'build-ui', 'engine'],
extra_env={
'GF_SECURITY_ADMIN_PASSWORD': 'oncall',
'GF_SECURITY_ADMIN_USER': 'oncall',
'GF_AUTH_ANONYMOUS_ENABLED': 'false',
},
)

k8s_resource(workload='celery', resource_deps=['mariadb', 'redis-master'], labels=['OnCallBackend'])
k8s_resource(workload='engine', port_forwards=8080, resource_deps=['mariadb', 'redis-master'], labels=['OnCallBackend'])
k8s_resource(workload='redis-master', labels=['OnCallDeps'])
k8s_resource(workload='mariadb', labels=['OnCallDeps'])
grafana(
context="grafana-plugin",
plugin_files=["grafana-plugin/src/plugin.json"],
namespace="default",
deps=["grafana-oncall-app-provisioning-configmap", "build-ui", "engine"],
extra_env={
"GF_SECURITY_ADMIN_PASSWORD": "oncall",
"GF_SECURITY_ADMIN_USER": "oncall",
"GF_AUTH_ANONYMOUS_ENABLED": "false",
},
)

k8s_resource(
workload="celery",
resource_deps=["mariadb", "redis-master"],
labels=["OnCallBackend"],
)
k8s_resource(
workload="engine",
port_forwards=8080,
resource_deps=["mariadb", "redis-master"],
labels=["OnCallBackend"],
)
k8s_resource(workload="redis-master", labels=["OnCallDeps"])
k8s_resource(workload="mariadb", labels=["OnCallDeps"])


# name all tilt resources after the k8s object namespace + name
def resource_name(id):
return id.name.replace(HELM_PREFIX + '-', '')
return id.name.replace(HELM_PREFIX + "-", "")


workload_to_resource_function(resource_name)
2 changes: 2 additions & 0 deletions dev/helm-local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ redis:
tag: 7.0.5
auth:
password: oncallpassword
master:
disableCommands: []
rabbitmq:
enabled: false
oncall:
Expand Down
5 changes: 1 addition & 4 deletions engine/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@ RUN apk add bash \

WORKDIR /etc/app
COPY ./requirements.txt ./
COPY ./pip/cache ./pip/cache
RUN if uname -m | grep -q "aarch64" ; then pip install pip/cache/grpcio-1.57.0-cp311-cp311-linux_aarch64.whl ; else echo "skip" ; fi
RUN pip install --upgrade pip
RUN pip install --upgrade setuptools wheel
COPY ./pip/cache /root/.cache/pip/wheels/
RUN pip install -r requirements.txt

# we intentionally have two COPY commands, this is to have the requirements.txt in a separate build step
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ def _get_notification_plan_for_user(self, user_to_notify, future_step=False, imp
# last passed step order + 1
notification_policy_order = last_user_log.notification_policy.order + 1

notification_policies = UserNotificationPolicy.objects.filter(user=user_to_notify, important=important)
notification_policies = user_to_notify.get_or_create_notification_policies(important=important)

for notification_policy in notification_policies:
future_notification = notification_policy.order >= notification_policy_order
Expand Down
3 changes: 2 additions & 1 deletion engine/apps/alerts/models/alert_receive_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,9 @@ def get_default_template_attribute(self, render_for, attr_name):

@classmethod
def create(cls, **kwargs):
organization = kwargs["organization"]
with transaction.atomic():
other_channels = cls.objects_with_deleted.select_for_update().filter(organization=kwargs["organization"])
other_channels = cls.objects_with_deleted.select_for_update().filter(organization=organization)
channel = cls(**kwargs)
smile_code = number_to_smiles_translator(other_channels.count())
verbal_name = (
Expand Down
6 changes: 2 additions & 4 deletions engine/apps/alerts/tasks/notify_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,8 @@ def notify_group_task(alert_group_pk, escalation_policy_snapshot_order=None):
if not user.is_notification_allowed:
continue

notification_policies = UserNotificationPolicy.objects.filter(
user=user,
important=escalation_policy_step == EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT,
)
important = escalation_policy_step == EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT
notification_policies = user.get_or_create_notification_policies(important=important)

if notification_policies:
usergroup_notification_plan += "\n_{} (".format(
Expand Down
2 changes: 1 addition & 1 deletion engine/apps/alerts/tasks/notify_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def notify_user_task(
user_has_notification = UserHasNotification.objects.filter(pk=user_has_notification.pk).select_for_update()[0]

if previous_notification_policy_pk is None:
notification_policy = UserNotificationPolicy.objects.filter(user=user, important=important).first()
notification_policy = user.get_or_create_notification_policies(important=important).first()
if notification_policy is None:
task_logger.info(
f"notify_user_task: Failed to notify. No notification policies. user_id={user_pk} alert_group_id={alert_group_pk} important={important}"
Expand Down
6 changes: 2 additions & 4 deletions engine/apps/api/views/user_notification_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,13 @@ def get_queryset(self):
except ValueError:
raise BadRequest(detail="Invalid user param")
if user_id is None or user_id == self.request.user.public_primary_key:
queryset = self.model.objects.filter(user=self.request.user, important=important)
target_user = self.request.user
else:
try:
target_user = User.objects.get(public_primary_key=user_id)
except User.DoesNotExist:
raise BadRequest(detail="User does not exist")

queryset = self.model.objects.filter(user=target_user, important=important)

queryset = target_user.get_or_create_notification_policies(important=important)
return self.serializer_class.setup_eager_loading(queryset)

def get_object(self):
Expand Down
23 changes: 2 additions & 21 deletions engine/apps/base/models/user_notification_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,7 @@ def create_default_policies_for_user(self, user: User) -> None:
if user.notification_policies.filter(important=False).exists():
return

model = self.model
policies_to_create = (
model(
user=user,
step=model.Step.NOTIFY,
notify_by=NotificationChannelOptions.DEFAULT_NOTIFICATION_CHANNEL,
order=0,
),
model(user=user, step=model.Step.WAIT, wait_delay=datetime.timedelta(minutes=15), order=1),
model(user=user, step=model.Step.NOTIFY, notify_by=model.NotificationChannel.PHONE_CALL, order=2),
)
policies_to_create = user.default_notification_policies_defaults

try:
super().bulk_create(policies_to_create)
Expand All @@ -92,16 +82,7 @@ def create_important_policies_for_user(self, user: User) -> None:
if user.notification_policies.filter(important=True).exists():
return

model = self.model
policies_to_create = (
model(
user=user,
step=model.Step.NOTIFY,
notify_by=model.NotificationChannel.PHONE_CALL,
important=True,
order=0,
),
)
policies_to_create = user.important_notification_policies_defaults

try:
super().bulk_create(policies_to_create)
Expand Down
44 changes: 41 additions & 3 deletions engine/apps/user_management/models/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

from django.conf import settings
from django.core.validators import MinLengthValidator
from django.db import models
from django.db import models, transaction

from apps.metrics_exporter.helpers import metrics_bulk_update_team_label_cache
from apps.alerts.models import AlertReceiveChannel, ChannelFilter
from apps.metrics_exporter.helpers import metrics_add_integration_to_cache, metrics_bulk_update_team_label_cache
from apps.metrics_exporter.metrics_cache_manager import MetricsCacheManager
from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length

Expand Down Expand Up @@ -51,7 +52,44 @@ def sync_for_organization(
for team in grafana_teams.values()
if team["id"] not in existing_team_ids
)
organization.teams.bulk_create(teams_to_create, batch_size=5000)

with transaction.atomic():
organization.teams.bulk_create(teams_to_create, batch_size=5000)
# Retrieve primary keys for the newly created users
#
# If the model’s primary key is an AutoField, the primary key attribute can only be retrieved
# on certain databases (currently PostgreSQL, MariaDB 10.5+, and SQLite 3.35+).
# On other databases, it will not be set.
# https://docs.djangoproject.com/en/4.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create
created_teams = organization.teams.exclude(team_id__in=existing_team_ids)
direct_paging_integrations_to_create = []
for team in created_teams:
alert_receive_channel = AlertReceiveChannel(
organization=organization,
team=team,
integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
verbal_name=f"Direct paging ({team.name if team else 'No'} team)",
)
direct_paging_integrations_to_create.append(alert_receive_channel)
AlertReceiveChannel.objects.bulk_create(direct_paging_integrations_to_create, batch_size=5000)
created_direct_paging_integrations = AlertReceiveChannel.objects.filter(
organization=organization,
integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
).exclude(team__team_id__in=existing_team_ids)
default_channel_filters_to_create = []
for integration in created_direct_paging_integrations:
channel_filter = ChannelFilter(
alert_receive_channel=integration,
filtering_term=None,
is_default=True,
order=0,
)
default_channel_filters_to_create.append(channel_filter)
ChannelFilter.objects.bulk_create(default_channel_filters_to_create, batch_size=5000)

# Add direct paging integrations to metrics cache
for integration in direct_paging_integrations_to_create:
metrics_add_integration_to_cache(integration)

# delete excess teams
team_ids_to_delete = existing_team_ids - grafana_teams.keys()
Expand Down
Loading

0 comments on commit 5101473

Please sign in to comment.