Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added PodEvicted default event #1497

Merged
merged 17 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
76b06ff
* Added on_pod_evicted_enricher action
itisallgood Jul 11, 2024
a648d82
* Moved pod_row function dublicate
itisallgood Jul 11, 2024
01b7cfa
*Fixed importing of pod_row
itisallgood Jul 11, 2024
c63d645
*Fixed pod_row import in node_enrichment
itisallgood Jul 11, 2024
f48648b
Merge branch 'master' into add-visibility-out-of-the-box-into-evictions
itisallgood Jul 11, 2024
abeb23e
*Updated import of the pod_row method
itisallgood Jul 11, 2024
800d623
*Added node_enrichment_utils and refactored node related playbooks
itisallgood Jul 12, 2024
7aae9a6
Merge branch 'add-visibility-out-of-the-box-into-evictions' of github…
itisallgood Jul 12, 2024
35c2401
Merge branch 'master' into add-visibility-out-of-the-box-into-evictions
itisallgood Jul 12, 2024
e2e70e3
*Removed log_enrichment from PodEvicted because it can't retrieve logs
itisallgood Jul 12, 2024
2d14494
Merge branch 'master' into add-visibility-out-of-the-box-into-evictions
itisallgood Jul 15, 2024
a4812a3
Added tests for node_enrichment_utils, fixed typing for node_enrichme…
itisallgood Jul 15, 2024
b67fef3
Merge branch 'add-visibility-out-of-the-box-into-evictions' of github…
itisallgood Jul 15, 2024
86bc134
Merge branch 'master' into add-visibility-out-of-the-box-into-evictions
itisallgood Jul 15, 2024
6c7e48f
*Refactored enrich_pod_with_node_events to use pods to get events ins…
itisallgood Jul 16, 2024
12fe317
Merge branch 'add-visibility-out-of-the-box-into-evictions' of github…
itisallgood Jul 16, 2024
e97a166
Merge branch 'master' into add-visibility-out-of-the-box-into-evictions
itisallgood Jul 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions helm/robusta/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ builtinPlaybooks:
- image_pull_backoff_reporter: {}

# playbooks for non-prometheus based monitoring that use prometheus for enrichment
- name: "PodEvicted"
triggers:
- on_pod_evicted: {}
actions:
- on_pod_evicted_enricher: {}
itisallgood marked this conversation as resolved.
Show resolved Hide resolved
- pod_events_enricher: {}
- enrich_pod_with_node_events: {}
- logs_enricher: {}
itisallgood marked this conversation as resolved.
Show resolved Hide resolved

- name: "PodOOMKill"
triggers:
- on_pod_oom_killed:
Expand Down
28 changes: 28 additions & 0 deletions playbooks/robusta_playbooks/event_enrichments.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,34 @@ def pod_events_enricher(event: PodEvent, params: EventEnricherParams):
)


@action
def enrich_pod_with_node_events(event: PodEvent, params: EventEnricherParams):
"""
Given a Kubernetes pod, fetch related events in the near past for its node
"""
pod = event.get_pod()
node: Node = Node.readNode(pod.spec.nodeName).obj
itisallgood marked this conversation as resolved.
Show resolved Hide resolved
itisallgood marked this conversation as resolved.
Show resolved Hide resolved
if not node:
logging.error(f"cannot run pods_node_events_enricher on alert with no node object: {event}")
return

events_table_block = get_resource_events_table(
"*Node events:*",
node.kind,
node.metadata.name,
node.metadata.namespace,
included_types=params.included_types,
max_events=params.max_events,
)
if events_table_block:
event.add_enrichment(
[events_table_block],
{SlackAnnotations.ATTACHMENT: True},
enrichment_type=EnrichmentType.k8s_events,
title="Node Events",
)


@action
def deployment_events_enricher(event: DeploymentEvent, params: ExtendedEventEnricherParams):
"""
Expand Down
27 changes: 11 additions & 16 deletions playbooks/robusta_playbooks/node_enrichments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
from typing import List

from hikaru.model.rel_1_26 import Pod, PodList
from robusta_playbooks.playbook_utils import pod_row

from robusta.api import (
BaseBlock,
EnrichmentType,
FileBlock,
Finding,
FindingSeverity,
Expand All @@ -19,19 +22,9 @@
TableBlock,
action,
create_node_graph_enrichment,
EnrichmentType
)


def pod_row(pod: Pod) -> List[str]:
ready_condition = [condition.status for condition in pod.status.conditions if condition.type == "Ready"]
return [
pod.metadata.namespace,
pod.metadata.name,
ready_condition[0] if ready_condition else "Unknown",
]


def has_resource_request(pod: Pod, resource_type: str) -> bool:
for container in pod.spec.containers:
try:
Expand Down Expand Up @@ -85,7 +78,7 @@ def node_running_pods_enricher(event: NodeEvent):

effected_pods_rows = [pod_row(pod) for pod in pod_list.items]
block_list.append(
TableBlock(effected_pods_rows, ["namespace", "name", "ready"], table_name=f"Pods running on the node")
TableBlock(effected_pods_rows, ["namespace", "name", "ready"], table_name="Pods running on the node")
)
event.add_enrichment(block_list)

Expand Down Expand Up @@ -127,7 +120,7 @@ def node_status_enricher(event: NodeEvent):
logging.error(f"node_status_enricher was called on event without node : {event}")
return

logging.info(f"node_status_enricher is depricated, use status_enricher instead")
logging.info("node_status_enricher is depricated, use status_enricher instead")

event.add_enrichment(
[
Expand All @@ -154,8 +147,9 @@ def node_dmesg_enricher(event: NodeEvent, params: PodRunningParams):
)
if exec_result:
event.add_enrichment(
[FileBlock(f"dmesg.log", exec_result.encode())], enrichment_type=EnrichmentType.text_file,
title="DMESG Info"
[FileBlock("dmesg.log", exec_result.encode())],
enrichment_type=EnrichmentType.text_file,
title="DMESG Info",
)


Expand Down Expand Up @@ -189,8 +183,9 @@ def node_health_watcher(event: NodeChangeEvent):
subject=KubeObjFindingSubject(event.obj),
)
event.add_finding(finding)
event.add_enrichment([KubernetesDiffBlock([], event.old_obj,
event.obj, event.obj.metadata.name, kind=event.obj.kind)])
event.add_enrichment(
[KubernetesDiffBlock([], event.old_obj, event.obj, event.obj.metadata.name, kind=event.obj.kind)]
)
node_status_enricher(event)


Expand Down
12 changes: 12 additions & 0 deletions playbooks/robusta_playbooks/playbook_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from typing import List

from hikaru.model.rel_1_26 import Pod


def pod_row(pod: Pod) -> List[str]:
ready_condition = [condition.status for condition in pod.status.conditions if condition.type == "Ready"]
itisallgood marked this conversation as resolved.
Show resolved Hide resolved
return [
pod.metadata.namespace,
pod.metadata.name,
ready_condition[0] if ready_condition else "Unknown",
]
80 changes: 80 additions & 0 deletions playbooks/robusta_playbooks/pod_evicted_enrichments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import logging
from typing import List

from hikaru.model.rel_1_26 import Node, PodList
from robusta_playbooks.playbook_utils import pod_row

from robusta.api import (
BaseBlock,
EnrichmentType,
Finding,
FindingSeverity,
PodEvent,
PodFindingSubject,
TableBlock,
action,
)


@action
def on_pod_evicted_enricher(event: PodEvent):
"""
Retrieves pod and node information for an OOMKilled pod
"""
pod = event.get_pod()
if not pod:
logging.error(f"cannot run on_pod_evicted_enricher on event with no pod: {event}")
return

try:
node = Node.readNode(pod.spec.nodeName).obj
itisallgood marked this conversation as resolved.
Show resolved Hide resolved
except Exception as e:
logging.error(f"Failed to read pod's node information: {e}")
return

finding = Finding(
title=f"Pod {pod.metadata.name} in namespace {pod.metadata.namespace} was Evicted",
aggregation_key="PodEvictedTriggered",
severity=FindingSeverity.HIGH,
subject=PodFindingSubject(pod),
)

node: Node = Node.readNode(pod.spec.nodeName).obj
node_labels = [("Node Name", pod.spec.nodeName)]
node_info_block = TableBlock(
[[k, v] for k, v in node_labels],
headers=["Field", "Value"],
table_name="*Node general info:*",
)
node_status_block = TableBlock(
[[condition.type, condition.status] for condition in node.status.conditions],
headers=["Type", "Status"],
table_name="*Node status details:*",
)

allocatable_resources_block = TableBlock(
[[resource, value] for resource, value in node.status.allocatable.items()],
headers=["Resource", "Value"],
table_name="*Node Allocatable Resources:*",
)

finding.add_enrichment(
[node_info_block, node_status_block, allocatable_resources_block],
enrichment_type=EnrichmentType.node_info,
title="Node Info",
)

event.add_finding(finding)

try:
pod_list = PodList.listPodForAllNamespaces(field_selector=f"spec.nodeName={node.metadata.name}").obj
except Exception as e:
logging.error(f"Failed to list pods for node {node.metadata.name}: {e}")
return

effected_pods_rows = [pod_row(pod) for pod in pod_list.items]
block_list: List[BaseBlock] = []
block_list.append(
TableBlock(effected_pods_rows, ["namespace", "name", "ready"], table_name="Pods running on the node")
)
event.add_enrichment(block_list)
Loading