forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
workqueue: Add pwq->stats[] and a monitoring script
Currently, the only way to peer into workqueue operations is through tracing. While possible, it isn't easy or convenient to monitor per-workqueue behaviors over time this way. Let's add pwq->stats[] that track relevant events and a drgn monitoring script - tools/workqueue/wq_monitor.py. It's arguable whether this needs to be configurable. However, it currently only has several counters and the runtime overhead shouldn't be noticeable given that they're on pwq's which are per-cpu on per-cpu workqueues and per-numa-node on unbound ones. Let's keep it simple for the time being. v2: Patch reordered to earlier with fewer fields. Field will be added back gradually. Help message improved. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com>
- Loading branch information
Showing
3 changed files
with
205 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
#!/usr/bin/env drgn | ||
# | ||
# Copyright (C) 2023 Tejun Heo <tj@kernel.org> | ||
# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. | ||
|
||
desc = """ | ||
This is a drgn script to monitor workqueues. For more info on drgn, visit | ||
https://github.com/osandov/drgn. | ||
total Total number of work items executed by the workqueue. | ||
infl The number of currently in-flight work items. | ||
CMwake The number of concurrency-management wake-ups while executing a | ||
work item of the workqueue. | ||
mayday The number of times the rescuer was requested while waiting for | ||
new worker creation. | ||
rescued The number of work items executed by the rescuer. | ||
""" | ||
|
||
import sys | ||
import signal | ||
import os | ||
import re | ||
import time | ||
import json | ||
|
||
import drgn | ||
from drgn.helpers.linux.list import list_for_each_entry,list_empty | ||
from drgn.helpers.linux.cpumask import for_each_possible_cpu | ||
|
||
import argparse | ||
parser = argparse.ArgumentParser(description=desc, | ||
formatter_class=argparse.RawTextHelpFormatter) | ||
parser.add_argument('workqueue', metavar='REGEX', nargs='*', | ||
help='Target workqueue name patterns (all if empty)') | ||
parser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, | ||
help='Monitoring interval (0 to print once and exit)') | ||
parser.add_argument('-j', '--json', action='store_true', | ||
help='Output in json') | ||
args = parser.parse_args() | ||
|
||
def err(s): | ||
print(s, file=sys.stderr, flush=True) | ||
sys.exit(1) | ||
|
||
workqueues = prog['workqueues'] | ||
|
||
WQ_UNBOUND = prog['WQ_UNBOUND'] | ||
WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] | ||
|
||
PWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution | ||
PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution | ||
PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups | ||
PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer | ||
PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer | ||
PWQ_NR_STATS = prog['PWQ_NR_STATS'] | ||
|
||
class WqStats: | ||
def __init__(self, wq): | ||
self.name = wq.name.string_().decode() | ||
self.unbound = wq.flags & WQ_UNBOUND != 0 | ||
self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 | ||
self.stats = [0] * PWQ_NR_STATS | ||
for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): | ||
for i in range(PWQ_NR_STATS): | ||
self.stats[i] += int(pwq.stats[i]) | ||
|
||
def dict(self, now): | ||
return { 'timestamp' : now, | ||
'name' : self.name, | ||
'unbound' : self.unbound, | ||
'mem_reclaim' : self.mem_reclaim, | ||
'started' : self.stats[PWQ_STAT_STARTED], | ||
'completed' : self.stats[PWQ_STAT_COMPLETED], | ||
'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], | ||
'mayday' : self.stats[PWQ_STAT_MAYDAY], | ||
'rescued' : self.stats[PWQ_STAT_RESCUED], } | ||
|
||
def table_header_str(): | ||
return f'{"":>24} {"total":>8} {"infl":>5} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' | ||
|
||
def table_row_str(self): | ||
cm_wakeup = '-' | ||
mayday = '-' | ||
rescued = '-' | ||
|
||
if not self.unbound: | ||
cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP]) | ||
|
||
if self.mem_reclaim: | ||
mayday = str(self.stats[PWQ_STAT_MAYDAY]) | ||
rescued = str(self.stats[PWQ_STAT_RESCUED]) | ||
|
||
out = f'{self.name[-24:]:24} ' \ | ||
f'{self.stats[PWQ_STAT_STARTED]:8} ' \ | ||
f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ | ||
f'{cm_wakeup:>7} ' \ | ||
f'{mayday:>7} ' \ | ||
f'{rescued:>7} ' | ||
return out.rstrip(':') | ||
|
||
exit_req = False | ||
|
||
def sigint_handler(signr, frame): | ||
global exit_req | ||
exit_req = True | ||
|
||
def main(): | ||
# handle args | ||
table_fmt = not args.json | ||
interval = args.interval | ||
|
||
re_str = None | ||
if args.workqueue: | ||
for r in args.workqueue: | ||
if re_str is None: | ||
re_str = r | ||
else: | ||
re_str += '|' + r | ||
|
||
filter_re = re.compile(re_str) if re_str else None | ||
|
||
# monitoring loop | ||
signal.signal(signal.SIGINT, sigint_handler) | ||
|
||
while not exit_req: | ||
now = time.time() | ||
|
||
if table_fmt: | ||
print() | ||
print(WqStats.table_header_str()) | ||
|
||
for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): | ||
stats = WqStats(wq) | ||
if filter_re and not filter_re.search(stats.name): | ||
continue | ||
if table_fmt: | ||
print(stats.table_row_str()) | ||
else: | ||
print(stats.dict(now)) | ||
|
||
if interval == 0: | ||
break | ||
time.sleep(interval) | ||
|
||
if __name__ == "__main__": | ||
main() |