WIP: Direct paging improvements (#3064)

# What this PR does * Create Direct Paging integration (with default route) when team is created with bulk_update * Create notification policies when user is created with bulk_update * If user notification policies are empty change it to Email * Minor markup and wording improvements * Add grafana queue to helm chart * Remove disabled commands for redis helm chart * Improve Dockerfile caching ## Which issue(s) this PR fixes ## Checklist - [ ] Unit, integration, and e2e (if applicable) tests updated - [ ] Documentation added (or `pr:no public docs` PR label added if not required) - [ ] `CHANGELOG.md` updated (or `pr:no changelog` PR label added if not required)
grafana · Sep 28, 2023 · 5101473 · 5101473
1 parent 9126f21
commit 5101473
Show file tree

Hide file tree

Showing 29 changed files with 318 additions and 186 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,6 @@
+# Ignore everything
+*
+
+# Allow directories
+!/engine
+!/grafana-plugin
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fix shifts for current user internal endpoint to return the right shift PK ([#3036](https://github.com/grafana/oncall/pull/3036))
 - Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038))
 
+### Added
+
+- Create Direct Paging integration by default for every team, create default E-Mail notification policy for every user ([#3064](https://github.com/grafana/oncall/pull/3064))
+
 ## v1.3.37 (2023-09-12)
 
 ### Added

diff --git a/Tiltfile b/Tiltfile
@@ -1,82 +1,116 @@
-running_under_parent_tiltfile = os.getenv('TILT_PARENT', 'false') == 'true'
+running_under_parent_tiltfile = os.getenv("TILT_PARENT", "false") == "true"
 # The user/pass that you will login to Grafana with
-grafana_admin_user_pass = os.getenv('GRAFANA_ADMIN_USER_PASS', 'oncall')
+grafana_admin_user_pass = os.getenv("GRAFANA_ADMIN_USER_PASS", "oncall")
 # HELM_PREFIX must be "oncall-dev" as it is hardcoded in dev/helm-local.yml
-HELM_PREFIX="oncall-dev"
+HELM_PREFIX = "oncall-dev"
 # Use docker registery generated by ctlptl (dev/kind-config.yaml)
-DOCKER_REGISTRY="localhost:63628/"
+DOCKER_REGISTRY = "localhost:63628/"
 
 if not running_under_parent_tiltfile:
     # Load the custom Grafana extensions
-    v1alpha1.extension_repo(name='grafana-tilt-extensions', 
-       ref='main',
-       url='https://github.com/grafana/tilt-extensions')
-v1alpha1.extension(name='grafana', repo_name='grafana-tilt-extensions', repo_path='grafana')
+    v1alpha1.extension_repo(
+        name="grafana-tilt-extensions",
+        ref="main",
+        url="https://github.com/grafana/tilt-extensions",
+    )
+v1alpha1.extension(
+    name="grafana", repo_name="grafana-tilt-extensions", repo_path="grafana"
+)
 
-load('ext://grafana', 'grafana')
-load('ext://configmap', 'configmap_create')
+load("ext://grafana", "grafana")
+load("ext://configmap", "configmap_create")
+load("ext://docker_build_sub", "docker_build_sub")
 
 # Tell ops-devenv/Tiltifle where our plugin.json file lives
-plugin_file = os.path.abspath('grafana-plugin/src/plugin.json')
+plugin_file = os.path.abspath("grafana-plugin/src/plugin.json")
+
+
 def plugin_json():
     return plugin_file
 
+
 allow_k8s_contexts(["kind-kind"])
 
-docker_build(
-     		 "localhost:63628/oncall/engine:dev", 
-     		 "./engine", 
-     		 target = 'prod',
-		     live_update=[
-			 	sync('./engine/', '/etc/app'),
-				run('cd /etc/app && pip install -r requirements.txt',
-					trigger='./engine/requirements.txt'),
-			 ]
+local_resource("download-cache", cmd="docker pull grafana/oncall:latest; docker tag grafana/oncall localhost:63628/grafana/oncall:latest")
+
+# Build the image including frontend folder for pytest
+docker_build_sub(
+    "localhost:63628/oncall/engine:dev",
+    context="./engine",
+    cache_from="localhost:63628/grafana/oncall:latest",
+    # only=["./engine", "./grafana-plugin"],
+    ignore=["./grafana-plugin/test-results/", "./grafana-plugin/dist/", "./grafana-plugin/e2e-tests/"],
+    child_context=".",
+    target="dev",
+    extra_cmds=["ADD ./grafana-plugin/src/plugin.json /etc/grafana-plugin/src/plugin.json"],
+    live_update=[
+        sync("./engine/", "/etc/app"),
+        run(
+            "cd /etc/app && pip install -r requirements.txt",
+            trigger="./engine/requirements.txt",
+        ),
+    ],
 )
 
 # Build the plugin in the background
-local_resource('build-ui',
-				labels=['OnCallUI'],
-				cmd='cd grafana-plugin && yarn install  && yarn build:dev',
-				serve_cmd='cd grafana-plugin && ONCALL_API_URL=http://oncall-dev-engine:8080 yarn watch',
-				allow_parallel=True)
+local_resource(
+    "build-ui",
+    labels=["OnCallUI"],
+    cmd="cd grafana-plugin && yarn install && yarn build:dev",
+    serve_cmd="cd grafana-plugin && ONCALL_API_URL=http://oncall-dev-engine:8080 yarn watch",
+    allow_parallel=True,
+)
 
-yaml = helm(
-  'helm/oncall',
-  name=HELM_PREFIX,
-  values=['./dev/helm-local.yml'])
+yaml = helm("helm/oncall", name=HELM_PREFIX, values=["./dev/helm-local.yml"])
 
 k8s_yaml(yaml)
 
 # Generate and load the grafana deploy yaml
-configmap_create('grafana-oncall-app-provisioning',
-				  namespace='default',
-				  from_file='dev/grafana/provisioning/plugins/grafana-oncall-app-provisioning.yaml')
+configmap_create(
+    "grafana-oncall-app-provisioning",
+    namespace="default",
+    from_file="dev/grafana/provisioning/plugins/grafana-oncall-app-provisioning.yaml",
+)
 
-k8s_resource(objects=['grafana-oncall-app-provisioning:configmap'],
-    		new_name='grafana-oncall-app-provisioning-configmap',
-			resource_deps = ['build-ui', 'engine'],
-			labels=['Grafana'])
+k8s_resource(
+    objects=["grafana-oncall-app-provisioning:configmap"],
+    new_name="grafana-oncall-app-provisioning-configmap",
+    resource_deps=["build-ui", "engine"],
+    labels=["Grafana"],
+)
 
 # Use separate grafana helm chart
 if not running_under_parent_tiltfile:
-	grafana(context='grafana-plugin',
-			plugin_files = ['grafana-plugin/src/plugin.json'],
-			namespace='default',
-			deps = ['grafana-oncall-app-provisioning-configmap', 'build-ui', 'engine'],
-			extra_env={
-				'GF_SECURITY_ADMIN_PASSWORD': 'oncall',
-				'GF_SECURITY_ADMIN_USER': 'oncall',
-                'GF_AUTH_ANONYMOUS_ENABLED': 'false',
-		    },
-			)
-
-k8s_resource(workload='celery', resource_deps=['mariadb', 'redis-master'], labels=['OnCallBackend'])
-k8s_resource(workload='engine', port_forwards=8080, resource_deps=['mariadb', 'redis-master'], labels=['OnCallBackend'])
-k8s_resource(workload='redis-master', labels=['OnCallDeps'])
-k8s_resource(workload='mariadb', labels=['OnCallDeps'])
+    grafana(
+        context="grafana-plugin",
+        plugin_files=["grafana-plugin/src/plugin.json"],
+        namespace="default",
+        deps=["grafana-oncall-app-provisioning-configmap", "build-ui", "engine"],
+        extra_env={
+            "GF_SECURITY_ADMIN_PASSWORD": "oncall",
+            "GF_SECURITY_ADMIN_USER": "oncall",
+            "GF_AUTH_ANONYMOUS_ENABLED": "false",
+        },
+    )
+
+k8s_resource(
+    workload="celery",
+    resource_deps=["mariadb", "redis-master"],
+    labels=["OnCallBackend"],
+)
+k8s_resource(
+    workload="engine",
+    port_forwards=8080,
+    resource_deps=["mariadb", "redis-master"],
+    labels=["OnCallBackend"],
+)
+k8s_resource(workload="redis-master", labels=["OnCallDeps"])
+k8s_resource(workload="mariadb", labels=["OnCallDeps"])
+
 
 # name all tilt resources after the k8s object namespace + name
 def resource_name(id):
-  return id.name.replace(HELM_PREFIX + '-', '')
+    return id.name.replace(HELM_PREFIX + "-", "")
+
+
 workload_to_resource_function(resource_name)
diff --git a/dev/helm-local.yml b/dev/helm-local.yml
@@ -16,6 +16,8 @@ redis:
     tag: 7.0.5
   auth:
     password: oncallpassword
+  master:
+    disableCommands: []
 rabbitmq:
   enabled: false
 oncall:

diff --git a/engine/Dockerfile b/engine/Dockerfile
@@ -17,10 +17,7 @@ RUN apk add bash \
 
 WORKDIR /etc/app
 COPY ./requirements.txt ./
-COPY ./pip/cache ./pip/cache
-RUN if uname -m | grep -q "aarch64" ; then pip install pip/cache/grpcio-1.57.0-cp311-cp311-linux_aarch64.whl ; else echo "skip"  ; fi
-RUN pip install --upgrade pip
-RUN pip install --upgrade setuptools wheel
+COPY ./pip/cache /root/.cache/pip/wheels/
 RUN pip install -r requirements.txt
 
 # we intentionally have two COPY commands, this is to have the requirements.txt in a separate build step

diff --git a/engine/apps/alerts/incident_log_builder/incident_log_builder.py b/engine/apps/alerts/incident_log_builder/incident_log_builder.py
@@ -668,7 +668,7 @@ def _get_notification_plan_for_user(self, user_to_notify, future_step=False, imp
                     # last passed step order + 1
                     notification_policy_order = last_user_log.notification_policy.order + 1
 
-        notification_policies = UserNotificationPolicy.objects.filter(user=user_to_notify, important=important)
+        notification_policies = user_to_notify.get_or_create_notification_policies(important=important)
 
         for notification_policy in notification_policies:
             future_notification = notification_policy.order >= notification_policy_order

diff --git a/engine/apps/alerts/models/alert_receive_channel.py b/engine/apps/alerts/models/alert_receive_channel.py
@@ -232,8 +232,9 @@ def get_default_template_attribute(self, render_for, attr_name):
 
     @classmethod
     def create(cls, **kwargs):
+        organization = kwargs["organization"]
         with transaction.atomic():
-            other_channels = cls.objects_with_deleted.select_for_update().filter(organization=kwargs["organization"])
+            other_channels = cls.objects_with_deleted.select_for_update().filter(organization=organization)
             channel = cls(**kwargs)
             smile_code = number_to_smiles_translator(other_channels.count())
             verbal_name = (

diff --git a/engine/apps/alerts/tasks/notify_group.py b/engine/apps/alerts/tasks/notify_group.py
@@ -76,10 +76,8 @@ def notify_group_task(alert_group_pk, escalation_policy_snapshot_order=None):
             if not user.is_notification_allowed:
                 continue
 
-            notification_policies = UserNotificationPolicy.objects.filter(
-                user=user,
-                important=escalation_policy_step == EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT,
-            )
+            important = escalation_policy_step == EscalationPolicy.STEP_NOTIFY_GROUP_IMPORTANT
+            notification_policies = user.get_or_create_notification_policies(important=important)
 
             if notification_policies:
                 usergroup_notification_plan += "\n_{} (".format(

diff --git a/engine/apps/alerts/tasks/notify_user.py b/engine/apps/alerts/tasks/notify_user.py
@@ -69,7 +69,7 @@ def notify_user_task(
         user_has_notification = UserHasNotification.objects.filter(pk=user_has_notification.pk).select_for_update()[0]
 
         if previous_notification_policy_pk is None:
-            notification_policy = UserNotificationPolicy.objects.filter(user=user, important=important).first()
+            notification_policy = user.get_or_create_notification_policies(important=important).first()
             if notification_policy is None:
                 task_logger.info(
                     f"notify_user_task: Failed to notify. No notification policies. user_id={user_pk} alert_group_id={alert_group_pk} important={important}"

diff --git a/engine/apps/api/views/user_notification_policy.py b/engine/apps/api/views/user_notification_policy.py
@@ -67,15 +67,13 @@ def get_queryset(self):
         except ValueError:
             raise BadRequest(detail="Invalid user param")
         if user_id is None or user_id == self.request.user.public_primary_key:
-            queryset = self.model.objects.filter(user=self.request.user, important=important)
+            target_user = self.request.user
         else:
             try:
                 target_user = User.objects.get(public_primary_key=user_id)
             except User.DoesNotExist:
                 raise BadRequest(detail="User does not exist")
-
-            queryset = self.model.objects.filter(user=target_user, important=important)
-
+        queryset = target_user.get_or_create_notification_policies(important=important)
         return self.serializer_class.setup_eager_loading(queryset)
 
     def get_object(self):

diff --git a/engine/apps/base/models/user_notification_policy.py b/engine/apps/base/models/user_notification_policy.py
@@ -71,17 +71,7 @@ def create_default_policies_for_user(self, user: User) -> None:
         if user.notification_policies.filter(important=False).exists():
             return
 
-        model = self.model
-        policies_to_create = (
-            model(
-                user=user,
-                step=model.Step.NOTIFY,
-                notify_by=NotificationChannelOptions.DEFAULT_NOTIFICATION_CHANNEL,
-                order=0,
-            ),
-            model(user=user, step=model.Step.WAIT, wait_delay=datetime.timedelta(minutes=15), order=1),
-            model(user=user, step=model.Step.NOTIFY, notify_by=model.NotificationChannel.PHONE_CALL, order=2),
-        )
+        policies_to_create = user.default_notification_policies_defaults
 
         try:
             super().bulk_create(policies_to_create)
@@ -92,16 +82,7 @@ def create_important_policies_for_user(self, user: User) -> None:
         if user.notification_policies.filter(important=True).exists():
             return
 
-        model = self.model
-        policies_to_create = (
-            model(
-                user=user,
-                step=model.Step.NOTIFY,
-                notify_by=model.NotificationChannel.PHONE_CALL,
-                important=True,
-                order=0,
-            ),
-        )
+        policies_to_create = user.important_notification_policies_defaults
 
         try:
             super().bulk_create(policies_to_create)

diff --git a/engine/apps/user_management/models/team.py b/engine/apps/user_management/models/team.py
@@ -2,9 +2,10 @@
 
 from django.conf import settings
 from django.core.validators import MinLengthValidator
-from django.db import models
+from django.db import models, transaction
 
-from apps.metrics_exporter.helpers import metrics_bulk_update_team_label_cache
+from apps.alerts.models import AlertReceiveChannel, ChannelFilter
+from apps.metrics_exporter.helpers import metrics_add_integration_to_cache, metrics_bulk_update_team_label_cache
 from apps.metrics_exporter.metrics_cache_manager import MetricsCacheManager
 from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length
 
@@ -51,7 +52,44 @@ def sync_for_organization(
             for team in grafana_teams.values()
             if team["id"] not in existing_team_ids
         )
-        organization.teams.bulk_create(teams_to_create, batch_size=5000)
+
+        with transaction.atomic():
+            organization.teams.bulk_create(teams_to_create, batch_size=5000)
+            # Retrieve primary keys for the newly created users
+            #
+            # If the model’s primary key is an AutoField, the primary key attribute can only be retrieved
+            # on certain databases (currently PostgreSQL, MariaDB 10.5+, and SQLite 3.35+).
+            # On other databases, it will not be set.
+            # https://docs.djangoproject.com/en/4.1/ref/models/querysets/#django.db.models.query.QuerySet.bulk_create
+            created_teams = organization.teams.exclude(team_id__in=existing_team_ids)
+            direct_paging_integrations_to_create = []
+            for team in created_teams:
+                alert_receive_channel = AlertReceiveChannel(
+                    organization=organization,
+                    team=team,
+                    integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
+                    verbal_name=f"Direct paging ({team.name if team else 'No'} team)",
+                )
+                direct_paging_integrations_to_create.append(alert_receive_channel)
+            AlertReceiveChannel.objects.bulk_create(direct_paging_integrations_to_create, batch_size=5000)
+            created_direct_paging_integrations = AlertReceiveChannel.objects.filter(
+                organization=organization,
+                integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
+            ).exclude(team__team_id__in=existing_team_ids)
+            default_channel_filters_to_create = []
+            for integration in created_direct_paging_integrations:
+                channel_filter = ChannelFilter(
+                    alert_receive_channel=integration,
+                    filtering_term=None,
+                    is_default=True,
+                    order=0,
+                )
+                default_channel_filters_to_create.append(channel_filter)
+            ChannelFilter.objects.bulk_create(default_channel_filters_to_create, batch_size=5000)
+
+            # Add direct paging integrations to metrics cache
+            for integration in direct_paging_integrations_to_create:
+                metrics_add_integration_to_cache(integration)
 
         # delete excess teams
         team_ids_to_delete = existing_team_ids - grafana_teams.keys()