Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add responders improvements #3128

Merged
merged 48 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
4d6794b
WIP direct paging changes
joeyorlando Oct 5, 2023
4bfd384
Merge branch 'dev' into jorlando/direct-paging
joeyorlando Oct 5, 2023
0d8b632
update tests
joeyorlando Oct 5, 2023
4a74bff
WIP
joeyorlando Oct 6, 2023
dc058c2
WIP
joeyorlando Oct 6, 2023
3d80dbf
WIP
joeyorlando Oct 7, 2023
25c7912
WIP
joeyorlando Oct 17, 2023
c8581b0
WIP
joeyorlando Oct 17, 2023
748791c
WIP
joeyorlando Oct 19, 2023
50bd4e7
update some unit tests
joeyorlando Oct 19, 2023
cd64267
add some more tests
joeyorlando Oct 19, 2023
8146c47
add more tests
joeyorlando Oct 19, 2023
0f86f12
allow filtering users by is_currently_oncall
joeyorlando Oct 19, 2023
4bb90c3
allow filtering by is_currently_oncall=false
joeyorlando Oct 19, 2023
2f72920
WIP
joeyorlando Oct 20, 2023
2ba681a
WIP
joeyorlando Oct 20, 2023
0bf9323
WIP
joeyorlando Oct 20, 2023
a46aede
WIP
joeyorlando Oct 23, 2023
5d98949
WIP
joeyorlando Oct 23, 2023
3f30e76
style add responders popup
joeyorlando Oct 23, 2023
8899845
update backend unit tests
joeyorlando Oct 23, 2023
4bcc713
update some more backend tests
joeyorlando Oct 24, 2023
8a1c17b
add title attribute to direct paging endpoint
joeyorlando Oct 24, 2023
2019eee
add more backend tests
joeyorlando Oct 24, 2023
fd5f5d8
more tests + UI styling changes
joeyorlando Oct 24, 2023
2b932c8
Merge branch 'dev' into jorlando/direct-paging
joeyorlando Oct 24, 2023
f51c735
add more frontend unit tests
joeyorlando Oct 24, 2023
28f33f4
update changelog
joeyorlando Oct 24, 2023
d2c2ad5
address PR comments
joeyorlando Oct 25, 2023
fc89fe8
revert change to GForm
joeyorlando Oct 25, 2023
0ac4521
add teams to objects in paged_users
joeyorlando Oct 25, 2023
a05e691
update public documentation
joeyorlando Oct 26, 2023
3808099
Merge branch 'dev' into jorlando/direct-paging
joeyorlando Oct 26, 2023
88fbfc6
Merge branch 'jorlando/direct-paging' of github.com:grafana/oncall in…
joeyorlando Oct 26, 2023
a21492a
disable submit button if form is not valid or user
joeyorlando Oct 26, 2023
8d80c8e
simplify optional prop null check
joeyorlando Oct 26, 2023
d0b0266
final frontend changes + update e2e tests
joeyorlando Oct 26, 2023
7140308
remove test.only
joeyorlando Oct 26, 2023
9acab65
update swagger UI in a subsequent PR
joeyorlando Oct 26, 2023
c5ee288
Merge branch 'dev' into jorlando/direct-paging
joeyorlando Oct 26, 2023
51fdb26
address failing build
joeyorlando Oct 26, 2023
d0c5b0e
Merge branch 'jorlando/direct-paging' of github.com:grafana/oncall in…
joeyorlando Oct 26, 2023
02d73c3
add unit tests for DirectPagingStore
joeyorlando Oct 26, 2023
cc5388a
address PR comments
joeyorlando Oct 27, 2023
a3b69fc
address frontend PR comments
joeyorlando Oct 27, 2023
3ac88ad
address more PR comments
joeyorlando Oct 27, 2023
9855f6f
remove unused method
joeyorlando Oct 27, 2023
500a696
final PR comments
joeyorlando Oct 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/sources/open-source/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ features:
should_escape: false
- command: /escalate
url: <ONCALL_ENGINE_PUBLIC_URL>/slack/interactive_api_endpoint/
description: Direct page user(s) or schedule(s)
description: Direct page a team or user(s)
should_escape: false
oauth_config:
redirect_urls:
Expand Down
56 changes: 49 additions & 7 deletions engine/apps/alerts/models/alert_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ class LogRecordUser(typing.TypedDict):
avatar_full: str


class PagedUser(typing.TypedDict):
username: str
name: str
pk: str
avatar: str
avatar_full: str
important: bool


class LogRecords(typing.TypedDict):
time: str # humanized delta relative to now
action: str # human-friendly description
Expand Down Expand Up @@ -509,22 +518,55 @@ def declare_incident_link(self) -> str:
def happened_while_maintenance(self):
return self.root_alert_group is not None and self.root_alert_group.maintenance_uuid is not None

def get_paged_users(self) -> QuerySet[User]:
def get_paged_users(self) -> typing.List[PagedUser]:
from apps.alerts.models import AlertGroupLogRecord

users_ids = set()
for log_record in self.log_records.filter(
user_ids: typing.Set[str] = set()
users: typing.List[PagedUser] = []

log_records = self.log_records.filter(
type__in=(AlertGroupLogRecord.TYPE_DIRECT_PAGING, AlertGroupLogRecord.TYPE_UNPAGE_USER)
):
)

for log_record in log_records:
# filter paging events, track still active escalations
info = log_record.get_step_specific_info()
user_id = info.get("user") if info else None
important = info.get("important") if info else None

if user_id is not None:
users_ids.add(
user_ids.add(
user_id
) if log_record.type == AlertGroupLogRecord.TYPE_DIRECT_PAGING else users_ids.discard(user_id)
) if log_record.type == AlertGroupLogRecord.TYPE_DIRECT_PAGING else user_ids.discard(user_id)

user_instances = User.objects.filter(public_primary_key__in=user_ids)
user_map = {u.public_primary_key: u for u in user_instances}

return User.objects.filter(public_primary_key__in=users_ids)
# mostly doing this second loop to avoid having to query each user individually in the first loop
for log_record in log_records:
# filter paging events, track still active escalations
info = log_record.get_step_specific_info()
user_id = info.get("user") if info else None
important = info.get("important") if info else False

if user_id is not None and (user := user_map.get(user_id)) is not None:
if log_record.type == AlertGroupLogRecord.TYPE_DIRECT_PAGING:
# add the user
users.append(
{
"pk": user.public_primary_key,
"name": user.name,
"username": user.username,
"avatar": user.avatar_url,
"avatar_full": user.avatar_full_url,
"important": important,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this important key here is really the only reason I modified this method to return typing.List[PagedUser] instead of the original typing.List[User] (obviously don't have the "important" context in the prior approach)

}
)
else:
# user was unpaged at some point, remove them
users = [u for u in users if u["pk"] != user_id]

return users

def _get_response_time(self):
"""Return response_time based on current alert group status."""
Expand Down
156 changes: 21 additions & 135 deletions engine/apps/alerts/paging.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,20 @@
import enum
import typing
from uuid import uuid4

from django.db import transaction
from django.db.models import Q

from apps.alerts.models import (
Alert,
AlertGroup,
AlertGroupLogRecord,
AlertReceiveChannel,
ChannelFilter,
EscalationChain,
UserHasNotification,
)
from apps.alerts.tasks.notify_user import notify_user_task
from apps.schedules.ical_utils import list_users_to_notify_from_ical
from apps.schedules.models import OnCallSchedule
from apps.user_management.models import Organization, Team, User


class PagingError(enum.StrEnum):
USER_HAS_NO_NOTIFICATION_POLICY = "USER_HAS_NO_NOTIFICATION_POLICY"
USER_IS_NOT_ON_CALL = "USER_IS_NOT_ON_CALL"


# notifications: (User|Schedule, important)
UserNotifications = list[tuple[User, bool]]
ScheduleNotifications = list[tuple[OnCallSchedule, bool]]


class NoNotificationPolicyWarning(typing.TypedDict):
error: typing.Literal[PagingError.USER_HAS_NO_NOTIFICATION_POLICY]
data: typing.Dict


ScheduleWarnings = typing.Dict[str, typing.List[str]]


class _NotOnCallWarningData(typing.TypedDict):
schedules: ScheduleWarnings


class NotOnCallWarning(typing.TypedDict):
error: typing.Literal[PagingError.USER_IS_NOT_ON_CALL]
data: _NotOnCallWarningData


AvailabilityWarning = NoNotificationPolicyWarning | NotOnCallWarning


class DirectPagingAlertGroupResolvedError(Exception):
Expand All @@ -56,6 +23,12 @@ class DirectPagingAlertGroupResolvedError(Exception):
DETAIL = "Cannot add responders for a resolved alert group" # Returned in BadRequest responses and Slack warnings


class DirectPagingUserTeamValidationError(Exception):
"""Raised when trying to use direct paging and no team or user is specified."""

DETAIL = "No team or user(s) specified" # Returned in BadRequest responses and Slack warnings


class _OnCall(typing.TypedDict):
title: str
message: str
Expand All @@ -68,14 +41,7 @@ class DirectPagingAlertPayload(typing.TypedDict):
oncall: _OnCall


def _trigger_alert(
organization: Organization,
team: Team | None,
title: str,
message: str,
from_user: User,
escalation_chain: EscalationChain = None,
) -> AlertGroup:
def _trigger_alert(organization: Organization, team: Team | None, message: str, from_user: User) -> AlertGroup:
"""Trigger manual integration alert from params."""
alert_receive_channel = AlertReceiveChannel.get_or_create_manual_integration(
organization=organization,
Expand All @@ -87,37 +53,24 @@ def _trigger_alert(
"verbal_name": f"Direct paging ({team.name if team else 'No'} team)",
},
)

channel_filter = None
if alert_receive_channel.default_channel_filter is None:
ChannelFilter.objects.create(
channel_filter = ChannelFilter.objects.create(
alert_receive_channel=alert_receive_channel,
notify_in_slack=True,
is_default=True,
)

channel_filter = None
if escalation_chain is not None:
channel_filter, _ = ChannelFilter.objects.get_or_create(
alert_receive_channel=alert_receive_channel,
escalation_chain=escalation_chain,
is_default=False,
defaults={
"filtering_term": f"escalate to {escalation_chain.name}",
"notify_in_slack": True,
},
)

permalink = None
if not title:
title = "Message from {}".format(from_user.username)

title = "Direct page from {}".format(from_user.username)
payload: DirectPagingAlertPayload = {
# Custom oncall property in payload to simplify rendering
"oncall": {
"title": title,
"message": message,
"uid": str(uuid4()), # avoid grouping
"author_username": from_user.username,
"permalink": permalink,
"permalink": None,
},
}

Expand All @@ -134,107 +87,40 @@ def _trigger_alert(
return alert.group


def check_user_availability(user: User) -> typing.List[AvailabilityWarning]:
"""Check user availability to be paged.

Return a warnings list indicating `error` and any additional related `data`.
"""
warnings: typing.List[AvailabilityWarning] = []
if not user.notification_policies.exists():
warnings.append(
{
"error": PagingError.USER_HAS_NO_NOTIFICATION_POLICY,
"data": {},
}
)

is_on_call = False
schedules = OnCallSchedule.objects.filter(
Q(cached_ical_file_primary__contains=user.username) | Q(cached_ical_file_primary__contains=user.email),
organization=user.organization,
)
schedules_data: ScheduleWarnings = {}
for s in schedules:
# keep track of schedules and on call users to suggest if needed
oncall_users = list_users_to_notify_from_ical(s)
schedules_data[s.name] = set(u.public_primary_key for u in oncall_users)
if user in oncall_users:
is_on_call = True
break

if not is_on_call:
# user is not on-call
# TODO: check working hours
warnings.append(
{
"error": PagingError.USER_IS_NOT_ON_CALL,
"data": {"schedules": schedules_data},
}
)

return warnings

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we no longer need this (nor the related API endpoint) since there is now an attribute on the User model, is_currently_oncall, that achieves this same functionality


def direct_paging(
organization: Organization,
team: Team | None,
from_user: User,
title: str = None,
message: str = None,
message: str,
team: Team | None = None,
users: UserNotifications | None = None,
schedules: ScheduleNotifications | None = None,
escalation_chain: EscalationChain | None = None,
alert_group: AlertGroup | None = None,
) -> AlertGroup | None:
"""Trigger escalation targeting given users/schedules.
"""Trigger escalation targeting given team/users.

If an alert group is given, update escalation to include the specified users.
Otherwise, create a new alert using given title and message.

Otherwise, create a new alert using given message.
"""

if users is None:
users = []

if schedules is None:
schedules = []

if escalation_chain is not None and alert_group is not None:
raise ValueError("Cannot change an existing alert group escalation chain")
if not users and team is None:
raise DirectPagingUserTeamValidationError

# Cannot add responders to a resolved alert group
if alert_group and alert_group.resolved:
raise DirectPagingAlertGroupResolvedError

# create alert group if needed
if alert_group is None:
alert_group = _trigger_alert(organization, team, title, message, from_user, escalation_chain=escalation_chain)

# initialize direct paged users (without a schedule)
users = [(u, important, None) for u, important in users]

# get on call users, add log entry for each schedule
for s, important in schedules:
oncall_users = list_users_to_notify_from_ical(s)
users += [(u, important, s) for u in oncall_users]
alert_group.log_records.create(
type=AlertGroupLogRecord.TYPE_DIRECT_PAGING,
author=from_user,
reason=f"{from_user.username} paged schedule {s.name}",
step_specific_info={"schedule": s.public_primary_key},
)
alert_group = _trigger_alert(organization, team, message, from_user)

for u, important, schedule in users:
reason = f"{from_user.username} paged user {u.username}"
if schedule:
reason += f" (from schedule {schedule.name})"
for u, important in users:
alert_group.log_records.create(
type=AlertGroupLogRecord.TYPE_DIRECT_PAGING,
author=from_user,
reason=reason,
reason=f"{from_user.username} paged user {u.username}",
step_specific_info={
"user": u.public_primary_key,
"schedule": schedule.public_primary_key if schedule else None,
"important": important,
},
)
Expand Down
45 changes: 45 additions & 0 deletions engine/apps/alerts/tests/test_alert_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,3 +482,48 @@ def test_alert_group_log_record_action_source(
alert_group.un_attach_by_user(user, action_source=action_source)
log_record = alert_group.log_records.last()
assert (log_record.type, log_record.action_source) == (AlertGroupLogRecord.TYPE_UNATTACHED, action_source)


@pytest.mark.django_db
def test_alert_group_get_paged_users(
make_organization_and_user,
make_user_for_organization,
make_alert_receive_channel,
make_alert_group,
):
organization, user = make_organization_and_user()
other_user = make_user_for_organization(organization)
alert_receive_channel = make_alert_receive_channel(organization)

def _make_log_record(alert_group, user, log_type, important=False):
alert_group.log_records.create(
type=log_type,
author=user,
reason="paged user",
step_specific_info={
"user": user.public_primary_key,
"important": important,
},
)

# user was paged - also check that important is persisted/available
alert_group = make_alert_group(alert_receive_channel)
_make_log_record(alert_group, user, AlertGroupLogRecord.TYPE_DIRECT_PAGING)
_make_log_record(alert_group, other_user, AlertGroupLogRecord.TYPE_DIRECT_PAGING, True)

paged_users = {u["pk"]: u["important"] for u in alert_group.get_paged_users()}

assert user.public_primary_key in paged_users
assert paged_users[user.public_primary_key] is False

assert other_user.public_primary_key in paged_users
assert paged_users[other_user.public_primary_key] is True

# user was paged and then unpaged
alert_group = make_alert_group(alert_receive_channel)
_make_log_record(alert_group, user, AlertGroupLogRecord.TYPE_DIRECT_PAGING)
_make_log_record(alert_group, user, AlertGroupLogRecord.TYPE_UNPAGE_USER)

_make_log_record(alert_group, other_user, AlertGroupLogRecord.TYPE_DIRECT_PAGING)

alert_group.get_paged_users()[0]["pk"] == other_user.public_primary_key
Loading
Loading