Skip to content

Commit

Permalink
Collect framework_offers and allocator metrics in mesos input (influx…
Browse files Browse the repository at this point in the history
  • Loading branch information
branden authored and idohalevi committed Sep 23, 2020
1 parent 05e9e99 commit 391010b
Show file tree
Hide file tree
Showing 3 changed files with 400 additions and 204 deletions.
72 changes: 72 additions & 0 deletions plugins/inputs/mesos/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ For more information, please check the [Mesos Observability Metrics](http://meso
"system",
"agents",
"frameworks",
"framework_offers",
"tasks",
"messages",
"evqueue",
"registrar",
"allocator",
]
## A list of Mesos slaves, default is []
# slaves = []
Expand Down Expand Up @@ -100,6 +102,10 @@ Mesos master metric groups
- master/slaves_connected
- master/slaves_disconnected
- master/slaves_inactive
- master/slave_unreachable_canceled
- master/slave_unreachable_completed
- master/slave_unreachable_scheduled
- master/slaves_unreachable

- frameworks
- master/frameworks_active
Expand All @@ -108,6 +114,22 @@ Mesos master metric groups
- master/frameworks_inactive
- master/outstanding_offers

- framework offers
- master/frameworks/subscribed
- master/frameworks/calls_total
- master/frameworks/calls
- master/frameworks/events_total
- master/frameworks/events
- master/frameworks/operations_total
- master/frameworks/operations
- master/frameworks/tasks/active
- master/frameworks/tasks/terminal
- master/frameworks/offers/sent
- master/frameworks/offers/accepted
- master/frameworks/offers/declined
- master/frameworks/offers/rescinded
- master/frameworks/roles/suppressed

- tasks
- master/tasks_error
- master/tasks_failed
Expand All @@ -117,6 +139,11 @@ Mesos master metric groups
- master/tasks_running
- master/tasks_staging
- master/tasks_starting
- master/tasks_dropped
- master/tasks_gone
- master/tasks_gone_by_operator
- master/tasks_killing
- master/tasks_unreachable

- messages
- master/invalid_executor_to_framework_messages
Expand Down Expand Up @@ -155,11 +182,17 @@ Mesos master metric groups
- master/task_lost/source_master/reason_slave_removed
- master/task_lost/source_slave/reason_executor_terminated
- master/valid_executor_to_framework_messages
- master/invalid_operation_status_update_acknowledgements
- master/messages_operation_status_update_acknowledgement
- master/messages_reconcile_operations
- master/messages_suppress_offers
- master/valid_operation_status_update_acknowledgements

- evqueue
- master/event_queue_dispatches
- master/event_queue_http_requests
- master/event_queue_messages
- master/operator_event_stream_subscribers

- registrar
- registrar/state_fetch_ms
Expand All @@ -172,6 +205,45 @@ Mesos master metric groups
- registrar/state_store_ms/p99
- registrar/state_store_ms/p999
- registrar/state_store_ms/p9999
- registrar/state_store_ms/count
- registrar/log/ensemble_size
- registrar/log/recovered
- registrar/queued_operations
- registrar/registry_size_bytes

- allocator
- allocator/allocation_run_ms
- allocator/allocation_run_ms/count
- allocator/allocation_run_ms/max
- allocator/allocation_run_ms/min
- allocator/allocation_run_ms/p50
- allocator/allocation_run_ms/p90
- allocator/allocation_run_ms/p95
- allocator/allocation_run_ms/p99
- allocator/allocation_run_ms/p999
- allocator/allocation_run_ms/p9999
- allocator/allocation_runs
- allocator/allocation_run_latency_ms
- allocator/allocation_run_latency_ms/count
- allocator/allocation_run_latency_ms/max
- allocator/allocation_run_latency_ms/min
- allocator/allocation_run_latency_ms/p50
- allocator/allocation_run_latency_ms/p90
- allocator/allocation_run_latency_ms/p95
- allocator/allocation_run_latency_ms/p99
- allocator/allocation_run_latency_ms/p999
- allocator/allocation_run_latency_ms/p9999
- allocator/roles/shares/dominant
- allocator/event_queue_dispatches
- allocator/offer_filters/roles/active
- allocator/quota/roles/resources/offered_or_allocated
- allocator/quota/roles/resources/guarantee
- allocator/resources/cpus/offered_or_allocated
- allocator/resources/cpus/total
- allocator/resources/disk/offered_or_allocated
- allocator/resources/disk/total
- allocator/resources/mem/offered_or_allocated
- allocator/resources/mem/total

Mesos slave metric groups
- resources
Expand Down
54 changes: 50 additions & 4 deletions plugins/inputs/mesos/mesos.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ type Mesos struct {
}

var allMetrics = map[Role][]string{
MASTER: {"resources", "master", "system", "agents", "frameworks", "tasks", "messages", "evqueue", "registrar"},
MASTER: {"resources", "master", "system", "agents", "frameworks", "framework_offers", "tasks", "messages", "evqueue", "registrar", "allocator"},
SLAVE: {"resources", "agent", "system", "executors", "tasks", "messages"},
}

Expand All @@ -58,10 +58,12 @@ var sampleConfig = `
"system",
"agents",
"frameworks",
"framework_offers",
"tasks",
"messages",
"evqueue",
"registrar",
"allocator",
]
## A list of Mesos slaves, default is []
# slaves = []
Expand Down Expand Up @@ -305,6 +307,10 @@ func getMetrics(role Role, group string) []string {
"master/slaves_connected",
"master/slaves_disconnected",
"master/slaves_inactive",
"master/slave_unreachable_canceled",
"master/slave_unreachable_completed",
"master/slave_unreachable_scheduled",
"master/slaves_unreachable",
}

m["frameworks"] = []string{
Expand All @@ -315,6 +321,12 @@ func getMetrics(role Role, group string) []string {
"master/outstanding_offers",
}

// framework_offers and allocator metrics have unpredictable names, so they can't be listed here.
// These empty groups are included to prevent the "unknown metrics group" info log below.
// filterMetrics() filters these metrics by looking for names with the corresponding prefix.
m["framework_offers"] = []string{}
m["allocator"] = []string{}

m["tasks"] = []string{
"master/tasks_error",
"master/tasks_failed",
Expand All @@ -324,6 +336,11 @@ func getMetrics(role Role, group string) []string {
"master/tasks_running",
"master/tasks_staging",
"master/tasks_starting",
"master/tasks_dropped",
"master/tasks_gone",
"master/tasks_gone_by_operator",
"master/tasks_killing",
"master/tasks_unreachable",
}

m["messages"] = []string{
Expand Down Expand Up @@ -363,12 +380,18 @@ func getMetrics(role Role, group string) []string {
"master/task_lost/source_master/reason_slave_removed",
"master/task_lost/source_slave/reason_executor_terminated",
"master/valid_executor_to_framework_messages",
"master/invalid_operation_status_update_acknowledgements",
"master/messages_operation_status_update_acknowledgement",
"master/messages_reconcile_operations",
"master/messages_suppress_offers",
"master/valid_operation_status_update_acknowledgements",
}

m["evqueue"] = []string{
"master/event_queue_dispatches",
"master/event_queue_http_requests",
"master/event_queue_messages",
"master/operator_event_stream_subscribers",
}

m["registrar"] = []string{
Expand All @@ -382,6 +405,11 @@ func getMetrics(role Role, group string) []string {
"registrar/state_store_ms/p99",
"registrar/state_store_ms/p999",
"registrar/state_store_ms/p9999",
"registrar/log/ensemble_size",
"registrar/log/recovered",
"registrar/queued_operations",
"registrar/registry_size_bytes",
"registrar/state_store_ms/count",
}
} else if role == SLAVE {
m["resources"] = []string{
Expand Down Expand Up @@ -477,9 +505,27 @@ func (m *Mesos) filterMetrics(role Role, metrics *map[string]interface{}) {
}

for _, k := range metricsDiff(role, selectedMetrics) {
for _, v := range getMetrics(role, k) {
if _, ok = (*metrics)[v]; ok {
delete((*metrics), v)
switch k {
// allocator and framework_offers metrics have unpredictable names, so we have to identify them by name prefix.
case "allocator":
for m := range *metrics {
if strings.HasPrefix(m, "allocator/") {
delete((*metrics), m)
}
}
case "framework_offers":
for m := range *metrics {
if strings.HasPrefix(m, "master/frameworks/") || strings.HasPrefix(m, "frameworks/") {
delete((*metrics), m)
}
}

// All other metrics have predictable names. We can use getMetrics() to retrieve them.
default:
for _, v := range getMetrics(role, k) {
if _, ok = (*metrics)[v]; ok {
delete((*metrics), v)
}
}
}
}
Expand Down
Loading

0 comments on commit 391010b

Please sign in to comment.