Skip to content

Commit

Permalink
CAD-2907 workbench: extensions supporting block propagation analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
deepfire committed Jun 26, 2021
1 parent 82a1fe7 commit deb5763
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 19 deletions.
67 changes: 60 additions & 7 deletions nix/workbench/analyse.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
usage_analyse() {
usage "analyse" "Analyse cluster runs" <<EOF
machine RUN-NAME MACH-NAME
Analyse logs for MACH-NAME
block-propagation RUN-NAME
Block propagation analysis for the entire cluster
whole-cluster RUN-NAME
Analyse logs for the entire cluster run
machine-timeline RUN-NAME MACH-NAME
Produce a general performance timeline for MACH-NAME
EOF
}
Expand All @@ -13,7 +13,59 @@ analyse() {
local op=${1:-$(usage_analyse)}; shift

case "$op" in
machine | mach | m )
block-propagation | bp )
local usage="USAGE: wb analyse $op [RUN-NAME=current]"
local name=${1:-current}
local dir=$(run get "$name")
local adir=$dir/analysis

mkdir -p "$adir"

## 0. subset what we care about into the keyfile
local keyfile=$adir/substring-keys
locli analyse substring-keys | grep -v 'Temporary modify' > "$keyfile"
cat >>"$keyfile" <<EOF
TraceForgedBlock
AddedToCurrentChain
TraceChainSyncServerReadBlocked.AddBlock
TraceChainSyncServerRead.AddBlock
TraceBlockFetchServerSendBlock
TraceDownloadedHeader
CompletedBlockFetch
EOF
## 1. enumerate logs, filter by keyfile & consolidate
local logdirs=("$dir"/node-*/)

msg "filtering logs in: $dir/node-* "
local jq_args=(
--sort-keys
--compact-output
$(wb backend lostream-fixup-jqargs "$dir")
' delpaths([["app"],["env"],["loc"],["msg"],["ns"],["sev"]])
'"$(wb backend lostream-fixup-jqexpr)"
)
for d in "${logdirs[@]}"
do ## TODO: supervisor-specific logfile layout
grep -hFf "$keyfile" $(ls "$d"/stdout* | tac) | jq "${jq_args[@]}" > \
"$adir"/logs-$(basename "$d").flt.json &
done
wait

msg "log sizes: (files: $(ls "$adir"/*.flt.json | wc -l), lines: $(cat "$adir"/*.flt.json | wc -l))"

msg "analysing.."
local locli_args=(
--genesis "$dir"/genesis/genesis.json
--run-metafile "$dir"/meta.json
## ->
# --logobjects-json "$adir"/logs-cluster.logobjects.json
--analysis-json "$adir"/block-event-stream.json
)

locli 'analyse' 'block-propagation' \
"${locli_args[@]}" "$adir"/*.flt.json;;

machine-timeline | machine | mt )
local usage="USAGE: wb analyse $op [RUN-NAME=current] [MACH-NAME=node-1]"
local name=${1:-current}
local mach=${2:-node-1}
Expand All @@ -24,7 +76,7 @@ case "$op" in

## 0. subset what we care about into the keyfile
local keyfile=$adir/substring-keys
locli analyse substring-keys > "$keyfile"
locli analyse substring-keys | grep -v 'Temporary modify' > "$keyfile"

## 1. enumerate logs, filter by keyfile & consolidate
local logs=("$dir"/$mach/stdout) consolidated="$adir"/logs-$mach.json
Expand All @@ -34,6 +86,7 @@ case "$op" in
local locli_args=(
--genesis "$dir"/genesis/genesis.json
--run-metafile "$dir"/meta.json
## ->
--logobjects-json "$adir"/logs-$mach.logobjects.json
--slotstats-json "$adir"/logs-$mach.slotstats.json
--timeline-pretty "$adir"/logs-$mach.timeline.txt
Expand All @@ -45,7 +98,7 @@ case "$op" in
# --derived-vectors-1-csv "$adir"/logs-$mach.derived.1.csv
)

locli 'analyse' 'perf-timeline' \
locli 'analyse' 'machine-timeline' \
"${locli_args[@]}" "$consolidated";;

* ) usage_analyse;; esac
Expand Down
15 changes: 12 additions & 3 deletions nix/workbench/backend.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@ usage_backend() {
Given a run directory, print the node socket path
for 'cardano-cli'
start RUNDIR Start an allocated run
record-extended-env-config ENV-JSON [ENV-CONFIG-OPTS..]
Extend the environment JSON file with backend-specific
environment config
describe-run RUNDIR
pre-run-hook RUNDIR
lostream-fixup-jqargs RUNDIR
lostream-fixup-jqexpr
start-run RUNDIR Start an allocated run
assert-is BACKEND-NAME
Check that the current backend is as expected
Expand All @@ -25,11 +30,15 @@ local op=${1:-$(usage_backend)} # No need to shift -- backends will use the op.
case "${op}" in
is-running ) $WORKBENCH_BACKEND "$@";;
get-node-socket-path ) $WORKBENCH_BACKEND "$@";;
wait-for-local-node-socket ) $WORKBENCH_BACKEND "$@";;
record-extended-env-config ) $WORKBENCH_BACKEND "$@";;
describe-run ) $WORKBENCH_BACKEND "$@";;
pre-run-hook ) $WORKBENCH_BACKEND "$@";;
start-run ) $WORKBENCH_BACKEND "$@";;
lostream-fixup-jqargs ) $WORKBENCH_BACKEND "$@";;
lostream-fixup-jqexpr ) $WORKBENCH_BACKEND "$@";;

## Handle non-generic calls:
passthrough | pass ) $WORKBENCH_BACKEND "$@";;

assert-is )
local usage="USAGE: wb run $op BACKEND-NAME"
Expand Down
64 changes: 56 additions & 8 deletions nix/workbench/supervisor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ usage_supervisor() {
get-node-socket-path STATE-DIR
Given a state dir, print the default node socket path
for 'cardano-cli'
record-extended-env-config ENV-JSON [ENV-CONFIG-OPTS..]
Extend the env JSON with the backend-specific
environment config options
describe-run RUN-DIR
pre-run-hook RUN-DIR
start-run RUN-DIR
Supervisor-specific:
save-pids RUN-DIR
EOF
}

Expand All @@ -18,9 +30,11 @@ case "$op" in
name )
echo 'supervisor';;

passthrough | pass )
$0 "$@";;

is-running )
test "$(sleep 0.5s; netstat -pltn 2>/dev/null | grep ':9001 ' | wc -l)" != "0"
;;
test "$(sleep 0.5s; netstat -pltn 2>/dev/null | grep ':9001 ' | wc -l)" != "0";;

get-node-socket-path )
usage="USAGE: wb supervisor $op STATE-DIR"
Expand Down Expand Up @@ -78,6 +92,34 @@ EOF
then echo "workbench ERROR: state directory exists, but is not a symlink -- please remove it or choose another: $dir"; exit 1; fi
;;

save-pids )
usage="USAGE: wb supervisor $op RUN-DIR"
dir=${1:?$usage}; shift

svpid=$dir/supervisor/supervisord.pid pstree=$dir/supervisor/ps.tree
pstree -Ap "$(cat "$svpid")" > "$pstree"

pidsfile="$dir"/supervisor/cardano-node.pids
grep 'cabal.*cardano-node' "$pstree" |
sed -e 's/^.*-+-cardano-node(\([0-9]*\))-.*$/\1/' \
> "$pidsfile"

mapn2p="$dir"/supervisor/node2pid.map; echo '{}' > "$mapn2p"
mapp2n="$dir"/supervisor/pid2node.map; echo '{}' > "$mapp2n"
for node in $(jq_tolist keys "$dir"/node-specs.json)
do cabalpid=$(supervisorctl pid $node)
pid=$(fgrep -e "-cabal($cabalpid)-" "$pstree" |
sed -e 's/^.*-+-cardano-node(\([0-9]*\))-.*$/\1/')
jq_fmutate "$mapn2p" '. * { "'$node'": '$pid' }'
jq_fmutate "$mapp2n" '. * { "'$pid'": "'$node'" }'
done

msg "supervisor: pid file: $svpid"
msg "supervisor: process tree: $pstree"
msg "supervisor: node pids: $pidsfile"
msg "supervisor: node pid maps: $mapn2p $mapp2n"
;;

start-run )
usage="USAGE: wb supervisor $op RUN-DIR"
dir=${1:?$usage}; shift
Expand All @@ -104,11 +146,17 @@ EOF
sleep 5
done

msg "supervisor: pid file: $dir/supervisor/supervisord.pid"
pstree -Ap "$(cat "$dir"/supervisor/supervisord.pid)" |
grep 'cabal.*cardano-node' |
sed -e 's/^.*-+-{\?cardano-node}\?(\([0-9]*\))$/\1/' \
> "$dir"/supervisor/cardano-node.pids
;;
$0 save-pids "$dir";;

lostream-fixup-jqargs )
usage="USAGE: wb supervisor $op RUN-DIR"
dir=${1:?$usage}

echo --compact-output --slurpfile mapp2n "$dir"/supervisor/pid2node.map;;

lostream-fixup-jqexpr )
usage="USAGE: wb supervisor $op"

echo '| $mapp2n[0] as $map | . * { host: $map[.pid] }';;

* ) usage_supervisor;; esac
2 changes: 1 addition & 1 deletion nix/workbench/topology.sh
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ case "${op}" in

args=(--slurpfile profile "$profile_json"
--slurpfile topology "$topo_dir"/topology-nixops.json
--null-input
--null-input --compact-output
)
jq ' $topology[0] as $topo
| $topo.coreNodes
Expand Down

0 comments on commit deb5763

Please sign in to comment.