-
Notifications
You must be signed in to change notification settings - Fork 724
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2888: CAD-2907 workbench: block time budgeting support r=deepfire a=deepfire Workbench & `locli` improvements, primarily aimed at block time budgeting analysis support - assorted fixes & cleanups - `10-*` profiles, for a local 10-node cluster - `wb reanalyse *` re-analysis invocation fast path - locli: drop `Seq`, reverting to lists - block propagation event CDFs - generic, data-driven rendering of summaries and timelines - module reorganisation - block propagation timeline - thorough log anomaly processing: accumulation and attributed reporting - block propagation event filtering - cluster latency probe script - support processing of the legacy, AWS benchmark runs - support side-loaded alternate benchmark run roots, for direct on-AWS processing - parallel processing for the machine performance timeline analysis Co-authored-by: Kosyrev Serge <serge.kosyrev@iohk.io>
- Loading branch information
Showing
26 changed files
with
2,116 additions
and
606 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env bash | ||
|
||
hostmap=$(jq ' | ||
.public_ip | ||
| values | ||
| map({ "\(.hostname)": .public_ip}) | ||
| add' last-meta.json) | ||
|
||
echo 'obtaining latency matrix for the hostmap:' | ||
jq . -C <<<$hostmap | ||
|
||
nixops ssh-for-each --parallel -- " | ||
self=\$(hostname) | ||
function probe() { | ||
local host=\$1 ip=\$2 | ||
ping -qAc21 \$ip | | ||
grep 'rtt\|transmitted' | | ||
sed 's_, \|/_\n_g' | | ||
sed 's_ packets transmitted\| received\| packet loss\|time \|rtt min\|avg\|max\|mdev = \|ipg\|ewma \| ms\|ms\|%__g' | | ||
grep -v '^$' | | ||
jq '{ source: \"'\$self'\" | ||
, target: \"'\$host'\" | ||
, sent: .[0] | ||
, received: .[1] | ||
, percents_lost: .[2] | ||
, duration_ms: .[3] | ||
, ipg: .[8] | ||
, ewma: .[9] | ||
, rtt: { min: .[4], avg: .[5], max: .[6], mdev: .[7] } | ||
}' --slurp --compact-output | ||
} | ||
hostmap=${hostmap@Q} | ||
for host in \$(jq 'keys | .[]' <<<\$hostmap --raw-output) | ||
do ip=\$(jq '.[\$host]' --arg host \$host <<<\$hostmap --raw-output) | ||
probe \$host \$ip\ & | ||
done" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,105 +1,177 @@ | ||
usage_analyse() { | ||
usage "analyse" "Analyse cluster runs" <<EOF | ||
block-propagation RUN-NAME | ||
Block propagation analysis for the entire cluster | ||
Block propagation analysis for the entire cluster. | ||
machine-timeline RUN-NAME MACH-NAME | ||
Produce a general performance timeline for MACH-NAME | ||
Options of 'analyse' command: | ||
--reanalyse Skip the preparatory steps and launch 'locli' directly | ||
--time Time the 'locli' executable runs | ||
EOF | ||
} | ||
|
||
analyse() { | ||
local skip_preparation= time= dump_logobjects= | ||
while test $# -gt 0 | ||
do case "$1" in | ||
--reanalyse | --re ) skip_preparation='true';; | ||
--dump-logobjects ) dump_logobjects='true';; | ||
--time ) time='eval time';; | ||
* ) break;; esac; shift; done | ||
|
||
local op=${1:-$(usage_analyse)}; shift | ||
|
||
case "$op" in | ||
block-propagation | bp ) | ||
local usage="USAGE: wb analyse $op [RUN-NAME=current]" | ||
|
||
local name=${1:-current} | ||
local dir=$(run get "$name") | ||
local adir=$dir/analysis | ||
if test -z "$dir" | ||
then fail "malformed run: $name"; fi | ||
|
||
msg "analysing run $(jq .meta.tag "$dir"/meta.json --raw-output)" | ||
mkdir -p "$adir" | ||
|
||
## 0. subset what we care about into the keyfile | ||
local keyfile=$adir/substring-keys | ||
locli analyse substring-keys | grep -v 'Temporary modify' > "$keyfile" | ||
cat >>"$keyfile" <<EOF | ||
TraceForgedBlock | ||
AddedToCurrentChain | ||
TraceChainSyncServerReadBlocked.AddBlock | ||
TraceChainSyncServerRead.AddBlock | ||
TraceBlockFetchServerSendBlock | ||
TraceDownloadedHeader | ||
CompletedBlockFetch | ||
EOF | ||
## 1. enumerate logs, filter by keyfile & consolidate | ||
local logdirs=("$dir"/node-*/) | ||
|
||
msg "filtering logs in: $dir/node-* " | ||
local jq_args=( | ||
--sort-keys | ||
--compact-output | ||
$(wb backend lostream-fixup-jqargs "$dir") | ||
' delpaths([["app"],["env"],["loc"],["msg"],["ns"],["sev"]]) | ||
'"$(wb backend lostream-fixup-jqexpr)" | ||
) | ||
for d in "${logdirs[@]}" | ||
do ## TODO: supervisor-specific logfile layout | ||
grep -hFf "$keyfile" $(ls "$d"/stdout* | tac) | jq "${jq_args[@]}" > \ | ||
"$adir"/logs-$(basename "$d").flt.json & | ||
done | ||
wait | ||
|
||
msg "log sizes: (files: $(ls "$adir"/*.flt.json | wc -l), lines: $(cat "$adir"/*.flt.json | wc -l))" | ||
## 1. enumerate logs, filter by keyfile & consolidate | ||
local logdirs=("$dir"/node-*/ "$dir"/analysis/node-*/) | ||
|
||
if test -z "$skip_preparation" -o -z "$(ls "$adir"/logs-node-*.flt.json 2>/dev/null)" | ||
then | ||
msg "filtering logs in: $dir/node-* " | ||
local jq_args=( | ||
--sort-keys | ||
--compact-output | ||
$(wb backend lostream-fixup-jqargs "$dir") | ||
' delpaths([["app"],["env"],["loc"],["msg"],["ns"],["sev"]]) | ||
'"$(wb backend lostream-fixup-jqexpr)" | ||
) | ||
for d in "${logdirs[@]}" | ||
do local logfiles="$(ls "$d"/stdout* 2>/dev/null | tac) $(ls "$d"/node-*.json 2>/dev/null)" | ||
if test -z "$logfiles" | ||
then msg "no logs in $d, skipping.."; fi | ||
grep -hFf "$keyfile" $logfiles | | ||
jq "${jq_args[@]}" --arg dirHostname "$(basename "$d")" \ | ||
> "$adir"/logs-$(basename "$d").flt.json & | ||
done | ||
wait | ||
fi | ||
|
||
msg "log sizes: (files: $(ls "$adir"/*.flt.json 2>/dev/null | wc -l), lines: $(cat "$adir"/*.flt.json | wc -l))" | ||
|
||
msg "analysing.." | ||
local locli_args=( | ||
--genesis "$dir"/genesis/genesis.json | ||
--genesis "$dir"/genesis.json | ||
--run-metafile "$dir"/meta.json | ||
## -> | ||
# --logobjects-json "$adir"/logs-cluster.logobjects.json | ||
--analysis-json "$adir"/block-event-stream.json | ||
--timeline-pretty "$adir"/block-propagation.txt | ||
--analysis-json "$adir"/block-propagation.json | ||
) | ||
if test -n "$dump_logobjects"; then | ||
locli_args+=(--logobjects-json "$adir"/logs-cluster.logobjects.json); fi | ||
|
||
locli 'analyse' 'block-propagation' \ | ||
${time} locli 'analyse' 'block-propagation' \ | ||
"${locli_args[@]}" "$adir"/*.flt.json;; | ||
|
||
grep-filtered-logs | grep | g ) | ||
local usage="USAGE: wb analyse $op BLOCK [MACHSPEC=*] [RUN-NAME=current]" | ||
local expr=$1 | ||
local mach=${2:-*} | ||
local name=${3:-current} | ||
local dir=$(run get "$name") | ||
local adir=$dir/analysis | ||
|
||
grep -h "$expr" "$adir"/logs-$mach.flt.json;; | ||
|
||
list-blocks | blocks | bs ) | ||
local usage="USAGE: wb analyse $op [RUN-NAME=current]" | ||
local name=${1:-current} | ||
local dir=$(run get "$name") | ||
local adir=$dir/analysis | ||
|
||
fgrep -h "TraceForgedBlock" "$adir"/*.flt.json | | ||
jq '{ at: .at, host: .host } * .data | del(.peer) | del(.slot)' -c | | ||
sort | uniq;; | ||
|
||
block-propagation-block | bpb ) | ||
local usage="USAGE: wb analyse $op BLOCK [RUN-NAME=current]" | ||
local block=$1 | ||
local name=${2:-current} | ||
local dir=$(run get "$name") | ||
local adir=$dir/analysis | ||
|
||
grep -h "$block" "$adir"/*.flt.json | | ||
grep 'AddBlock\|TraceForgedBlock\|AddedToCurrentChain' | | ||
jq '{ at: .at, host: .host } * .data | del(.peer) | del(.slot)' -c | | ||
sort --stable | uniq;; | ||
|
||
machine-timeline | machine | mt ) | ||
local usage="USAGE: wb analyse $op [RUN-NAME=current] [MACH-NAME=node-1]" | ||
local name=${1:-current} | ||
local mach=${2:-node-1} | ||
local dir=$(run get "$name") | ||
local adir=$dir/analysis | ||
|
||
msg "analysing run $(jq .meta.tag "$dir"/meta.json --raw-output)" | ||
mkdir -p "$adir" | ||
|
||
## 0. subset what we care about into the keyfile | ||
local keyfile=$adir/substring-keys | ||
locli analyse substring-keys | grep -v 'Temporary modify' > "$keyfile" | ||
|
||
## 1. enumerate logs, filter by keyfile & consolidate | ||
local logs=("$dir"/$mach/stdout) consolidated="$adir"/logs-$mach.json | ||
grep -hFf "$keyfile" "${logs[@]}" > "$consolidated" | ||
msg "analysing logs of: $mach (lines: $(wc -l "$consolidated"))" | ||
|
||
local locli_args=( | ||
--genesis "$dir"/genesis/genesis.json | ||
--run-metafile "$dir"/meta.json | ||
## -> | ||
--logobjects-json "$adir"/logs-$mach.logobjects.json | ||
--slotstats-json "$adir"/logs-$mach.slotstats.json | ||
--timeline-pretty "$adir"/logs-$mach.timeline.txt | ||
--stats-csv "$adir"/logs-$mach.stats.csv | ||
--analysis-json "$adir"/logs-$mach.analysis.json | ||
# --timeline-csv "$adir"/logs-$mach.timeline.csv | ||
# --cpu-spans-histogram-png "$adir"/logs-"$mach".cpu85-span-lens.png | ||
# --derived-vectors-0-csv "$adir"/logs-$mach".derived.1.csv | ||
# --derived-vectors-1-csv "$adir"/logs-$mach.derived.1.csv | ||
) | ||
if test "$mach" = 'all' | ||
then local machs=($(wb run list-hosts $name)) | ||
else local machs=($mach); fi | ||
|
||
local num_jobs="\j" | ||
local num_threads=$(grep processor /proc/cpuinfo | wc -l) | ||
for mach in ${machs[*]} | ||
do ## Limit parallelism: | ||
sleep 0.5s | ||
while ((${num_jobs@P} >= num_threads - 4)) | ||
do wait -n; sleep 0.$(((RANDOM % 5) + 1))s; done | ||
( | ||
## 1. enumerate logs, filter by keyfile & consolidate | ||
local logs=($(ls "$dir"/$mach/stdout* 2>/dev/null | tac) $(ls "$dir"/$mach/node-*.json 2>/dev/null) $(ls "$dir"/analysis/$mach/node-*.json 2>/dev/null)) consolidated="$adir"/logs-$mach.json | ||
|
||
if test -z "${logs[*]}" | ||
then msg "no logs for $mach in run $name"; continue; fi | ||
|
||
if test -z "$skip_preparation" -o -z "$(ls "$adir"/logs-$mach.json 2>/dev/null)" | ||
then grep -hFf "$keyfile" "${logs[@]}" > "$consolidated"; fi | ||
|
||
msg "analysing logs of: $mach (lines: $(wc -l "$consolidated"))" | ||
local locli_args=( | ||
--genesis "$dir"/genesis.json | ||
--run-metafile "$dir"/meta.json | ||
## -> | ||
--timeline-pretty "$adir"/logs-$mach.timeline.txt | ||
--stats-csv "$adir"/logs-$mach.stats.csv | ||
--analysis-json "$adir"/logs-$mach.analysis.json | ||
# --slotstats-json "$adir"/logs-$mach.slotstats.json | ||
# --timeline-csv "$adir"/logs-$mach.timeline.csv | ||
# --cpu-spans-histogram-png "$adir"/logs-"$mach".cpu85-span-lens.png | ||
# --derived-vectors-0-csv "$adir"/logs-$mach".derived.1.csv | ||
# --derived-vectors-1-csv "$adir"/logs-$mach.derived.1.csv | ||
) | ||
if test -n "$dump_logobjects"; then | ||
locli_args+=(--logobjects-json "$adir"/logs-$mach.logobjects.json); fi | ||
|
||
${time} locli 'analyse' 'machine-timeline' \ | ||
"${locli_args[@]}" "$consolidated" | ||
) & | ||
done | ||
|
||
locli 'analyse' 'machine-timeline' \ | ||
"${locli_args[@]}" "$consolidated";; | ||
wait | ||
msg "analysis machine-timeline: All done.";; | ||
|
||
* ) usage_analyse;; esac | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.