diff --git a/misc/benchmarking_20221111/README.md b/misc/benchmarking_20221111/README.md new file mode 100644 index 0000000..32c26d8 --- /dev/null +++ b/misc/benchmarking_20221111/README.md @@ -0,0 +1,50 @@ +## Benchmarking + +If we use linux-flow-exporter without fine-tuning, it may not perform well. For +example, if you use multiple hooks, each hooks do the memory-copy and +command-execution for each, which may slow down the performance. The following +figure shows a specific example of slow performance depending on the number of +hook chains. + +![n_hook_performance](./n_hook_performance.png) + +There are two potential causes, one is caused by the hook type and the other is +caused by the hook execution. There are two hook types, shell and command, and +since command only executes and script generates tempolary shell-script file on +demand. Therefore, if file operations are the primary cause, it should be +improved by simply changing shell to command. As you can see from the figure, +these are not affected. It seems realistic to improve the performance by +improving the hook execution. + +The number of hooks can be reduced by using a single command, although the +number of pipes remains the same, but the number of child command executions can +be reduced. We show how much performance can be improved by combining the above +tasks into a single hook. Furthermore, if we are using jq or other such queries +in a split format, aggregating the queries themselves may also improve the +performance by reducing the number of pipes. The following figure shows how much +performance can be improved by aggregating multiple hooks into single hook. + +![optimization](./optimization.png) + +The goal of the linux-flow-exporter is to process 6M flows per second. This +number is set based on the session processing performance of commercial firewall +products, assuming that the number of flowlog discharge opportunities This +number is set based on the session processing performance of commercial firewall +products when assuming that the number of flowlog discharge opportunities is +about the same as the number of new sessions. (Note that session and flow are +different units.) + +* Reference: Paloalto Firewall appliance spec + * PA5280 new session: 3.5M session/sec + * PA5280 max sessions: 100M session + * PA-7080 new session: 6M session/sec + * PA-7080 max sessions: 416M session +* [Reference: Hitachi Solutions Paloalto Catalog](https://www.hitachi-solutions.co.jp/paloalto/sp/download/thanks/pr01_6746.html) +* [Reference: Paloalto Spec Sheet](https://www.paloaltonetworks.com/resources/datasheets/product-summary-specsheet) + +To improve this performance issue, change the frequency of hook execution that +it will be independent of the number of flow-data. The following figure shows +the performance difference as the number of flows increases. By introducing +batch hook mode, it is really improved. 98+% latency is eliminated. + +![n_flow_performance](./n_flow_performance.png) diff --git a/misc/benchmarking_20221111/benchmark_logs.md b/misc/benchmarking_20221111/benchmark_logs.md new file mode 100644 index 0000000..73b8d85 --- /dev/null +++ b/misc/benchmarking_20221111/benchmark_logs.md @@ -0,0 +1,281 @@ +## Appendix + +test command +``` +> iperf3 -c 10.2.0.2 -P128 -t5 +``` + +fixed configuration +``` +maxIpfixMessageLen: 100 +timerFinishedDrainSeconds: 1 +timerForceDrainSeconds: 100000 +timerTemplateFlushSeconds: 100000 +``` + +fixed commands +``` +#!/bin/sh +mkdir -p /var/run/flowctl +echo "#!/bin/sh\njq '. + {foo1: \"foo1\"}'" > /var/run/flowctl/hook1.sh +echo "#!/bin/sh\njq '. + {foo2: \"foo2\"}'" > /var/run/flowctl/hook2.sh +echo "#!/bin/sh\njq '. + {foo3: \"foo3\"}'" > /var/run/flowctl/hook3.sh +echo "#!/bin/sh\njq '. + {foo4: \"foo4\"}'" > /var/run/flowctl/hook4.sh +echo "#!/bin/sh\njq '. + {foo5: \"foo5\"}'" > /var/run/flowctl/hook5.sh +echo "#!/bin/sh\njq '. + {foo6: \"foo6\"}'" > /var/run/flowctl/hook6.sh +echo "#!/bin/sh\njq '. + {foo7: \"foo7\"}'" > /var/run/flowctl/hook7.sh +echo "#!/bin/sh\njq '. + {foo8: \"foo8\"}'" > /var/run/flowctl/hook8.sh +cat < /var/run/flowctl/hook_all1.sh +#!/bin/sh +jq '. + {foo1: "foo1"}' \ + | jq '. + {foo2: "foo2"}' \ + | jq '. + {foo3: "foo3"}' \ + | jq '. + {foo4: "foo4"}' \ + | jq '. + {foo5: "foo5"}' \ + | jq '. + {foo6: "foo6"}' \ + | jq '. + {foo7: "foo7"}' \ + | jq '. + {foo8: "foo8"}' +EOF +cat < /var/run/flowctl/hook_all2.sh +#!/bin/sh +jq '.+{foo1:"foo1",foo2:"foo2",foo3:"foo3",foo4:"foo4",foo5:"foo5",foo6:"foo6",foo7:"foo7",foo8:"foo8"}' +EOF +chmod +x /var/run/flowctl/hook*.sh +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + +{"level":"info","msg":"drain finished flows","usec":4618,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":5086,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":7614,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - shell: "#!/bin/sh\njq '. + {foo1: \"foo1\"}'" + +{"level":"info","msg":"drain finished flows","usec":8347875,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8343217,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8315474,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - shell: "#!/bin/sh\njq '. + {foo1: \"foo1\"}'" + - shell: "#!/bin/sh\njq '. + {foo2: \"foo2\"}'" + +{"level":"info","msg":"drain finished flows","usec":16466085,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":16467925,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":16415695,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - shell: "#!/bin/sh\njq '. + {foo1: \"foo1\"}'" + - shell: "#!/bin/sh\njq '. + {foo2: \"foo2\"}'" + - shell: "#!/bin/sh\njq '. + {foo3: \"foo3\"}'" + - shell: "#!/bin/sh\njq '. + {foo4: \"foo4\"}'" + - shell: "#!/bin/sh\njq '. + {foo5: \"foo5\"}'" + - shell: "#!/bin/sh\njq '. + {foo6: \"foo6\"}'" + - shell: "#!/bin/sh\njq '. + {foo7: \"foo7\"}'" + - shell: "#!/bin/sh\njq '. + {foo8: \"foo8\"}'" + + +{"level":"info","msg":"drain finished flows","usec":65414095,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":65104397,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":65380369,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hook1.sh + +{"level":"info","msg":"drain finished flows","usec":8116680,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8073259,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8144323,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hook1.sh + - command: /var/run/flowctl/hook2.sh + + +{"level":"info","msg":"drain finished flows","usec":16311184,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":16360520,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":16176635,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hook1.sh + - command: /var/run/flowctl/hook2.sh + - command: /var/run/flowctl/hook3.sh + - command: /var/run/flowctl/hook4.sh + - command: /var/run/flowctl/hook5.sh + - command: /var/run/flowctl/hook6.sh + - command: /var/run/flowctl/hook7.sh + - command: /var/run/flowctl/hook8.sh + +{"level":"info","msg":"drain finished flows","usec":64781848,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":65212016,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":64988219,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hook_all1.sh + +{"level":"info","msg":"drain finished flows","usec":2085080,"nFlows":34} +{"level":"info","msg":"drain finished flows","usec":2090360,"nFlows":34} + +{"level":"info","msg":"drain finished flows","usec":4068813,"nFlows":66} +{"level":"info","msg":"drain finished flows","usec":4074041,"nFlows":66} +{"level":"info","msg":"drain finished flows","usec":4069566,"nFlows":66} + +{"level":"info","msg":"drain finished flows","usec":7909099,"nFlows":130} +{"level":"info","msg":"drain finished flows","usec":7975187,"nFlows":130} +{"level":"info","msg":"drain finished flows","usec":8010269,"nFlows":130} + +{"level":"info","msg":"drain finished flows","usec":15793583,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":15694148,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":15797545,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hook_all2.sh + +{"level":"info","msg":"force drain current flows","usec":1079169,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":1077704,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":1070113,"nFlows":34} + +(2106597+2086915)/2 +(4105261+4095091+4131419)/3 + +{"level":"info","msg":"drain finished flows","usec":8125250,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8160731,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":8215767,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - shell: | + #!/bin/sh + jq '[.[] | . + {foo1:"foo1"}]' \ + | jq '[.[] | . + {foo2:"foo2"}]' \ + | jq '[.[] | . + {foo3:"foo3"}]' \ + | jq '[.[] | . + {foo4:"foo4"}]' \ + | jq '[.[] | . + {foo5:"foo5"}]' \ + | jq '[.[] | . + {foo6:"foo6"}]' \ + | jq '[.[] | . + {foo7:"foo7"}]' \ + | jq '[.[] | . + {foo8:"foo8"}]' + +258: (129003+131560+130863)/3 + +{"level":"info","msg":"drain finished flows","usec":129003,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":131560,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":130863,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hookbatch_all1.sh #many-jq + +34: (69832+74551+70318)/3 +{"level":"info","msg":"force drain current flows","usec":69832,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":74551,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":70318,"nFlows":34} + +66: (75263+78929+78031)/3 +{"level":"info","msg":"drain finished flows","usec":75263,"nFlows":66} +{"level":"info","msg":"drain finished flows","usec":78929,"nFlows":66} +{"level":"info","msg":"drain finished flows","usec":78031,"nFlows":66} + +130: (99978+96594+95671)/3 +{"level":"info","msg":"drain finished flows","usec":99978,"nFlows":130} +{"level":"info","msg":"drain finished flows","usec":96594,"nFlows":130} +{"level":"info","msg":"drain finished flows","usec":95671,"nFlows":130} + +258: (130711+127055+129459)/3 +{"level":"info","msg":"drain finished flows","usec":130711,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":127055,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":129459,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - shell: | + #!/bin/sh + jq '[.[] | . + {foo1:"foo1",foo2:"foo2",foo3:"foo3",foo4:"foo4",foo5:"foo5",foo6:"foo6",foo7:"foo7",foo8:"foo8"}]' + +258: (57903+52242+51003)/3 + +{"level":"info","msg":"drain finished flows","usec":57903,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":52242,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":51003,"nFlows":258} +``` + +``` +outputs: +- log: + file: /tmp/flowlog.json + hooks: + - command: /var/run/flowctl/hookbatch_all2.sh #one-jq + +34: (36970+38420+34403)/3 +{"level":"info","msg":"force drain current flows","usec":36970,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":38420,"nFlows":34} +{"level":"info","msg":"force drain current flows","usec":34403,"nFlows":34} + +66: (40202+39713+39580)/3 +{"level":"info","msg":"force drain current flows","usec":40202,"nFlows":66} +{"level":"info","msg":"force drain current flows","usec":39713,"nFlows":66} +{"level":"info","msg":"force drain current flows","usec":39580,"nFlows":66} + +130: (44911+45686+47030)/3 +{"level":"info","msg":"force drain current flows","usec":44911,"nFlows":130} +{"level":"info","msg":"force drain current flows","usec":45686,"nFlows":130} +{"level":"info","msg":"force drain current flows","usec":47030,"nFlows":130} + +258: (52168+56735+51616)/3 +{"level":"info","msg":"drain finished flows","usec":52168,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":56735,"nFlows":258} +{"level":"info","msg":"drain finished flows","usec":51616,"nFlows":258} +``` diff --git a/misc/benchmarking_20221111/generate_scripts.sh b/misc/benchmarking_20221111/generate_scripts.sh new file mode 100644 index 0000000..b39998c --- /dev/null +++ b/misc/benchmarking_20221111/generate_scripts.sh @@ -0,0 +1,41 @@ +#!/bin/sh +mkdir -p /var/run/flowctl +echo "#!/bin/sh\njq '. + {foo1: \"foo1\"}'" > /var/run/flowctl/hook1.sh +echo "#!/bin/sh\njq '. + {foo2: \"foo2\"}'" > /var/run/flowctl/hook2.sh +echo "#!/bin/sh\njq '. + {foo3: \"foo3\"}'" > /var/run/flowctl/hook3.sh +echo "#!/bin/sh\njq '. + {foo4: \"foo4\"}'" > /var/run/flowctl/hook4.sh +echo "#!/bin/sh\njq '. + {foo5: \"foo5\"}'" > /var/run/flowctl/hook5.sh +echo "#!/bin/sh\njq '. + {foo6: \"foo6\"}'" > /var/run/flowctl/hook6.sh +echo "#!/bin/sh\njq '. + {foo7: \"foo7\"}'" > /var/run/flowctl/hook7.sh +echo "#!/bin/sh\njq '. + {foo8: \"foo8\"}'" > /var/run/flowctl/hook8.sh +cat < /var/run/flowctl/hook_all1.sh +#!/bin/sh +jq '. + {foo1: "foo1"}' \ + | jq '. + {foo2: "foo2"}' \ + | jq '. + {foo3: "foo3"}' \ + | jq '. + {foo4: "foo4"}' \ + | jq '. + {foo5: "foo5"}' \ + | jq '. + {foo6: "foo6"}' \ + | jq '. + {foo7: "foo7"}' \ + | jq '. + {foo8: "foo8"}' +EOF +cat < /var/run/flowctl/hook_all2.sh +#!/bin/sh +jq '.+{foo1:"foo1",foo2:"foo2",foo3:"foo3",foo4:"foo4",foo5:"foo5",foo6:"foo6",foo7:"foo7",foo8:"foo8"}' +EOF +cat < /var/run/flowctl/hookbatch_all1.sh +#!/bin/sh +jq '[.[] | . + {foo1:"foo1"}]' \\ + | jq '[.[] | . + {foo2:"foo2"}]' \\ + | jq '[.[] | . + {foo3:"foo3"}]' \\ + | jq '[.[] | . + {foo4:"foo4"}]' \\ + | jq '[.[] | . + {foo5:"foo5"}]' \\ + | jq '[.[] | . + {foo6:"foo6"}]' \\ + | jq '[.[] | . + {foo7:"foo7"}]' \\ + | jq '[.[] | . + {foo8:"foo8"}]' +EOF +cat < /var/run/flowctl/hookbatch_all2.sh +#!/bin/sh +jq '[.[] | . + {foo1:"foo1",foo2:"foo2",foo3:"foo3",foo4:"foo4",foo5:"foo5",foo6:"foo6",foo7:"foo7",foo8:"foo8"}]' +EOF +chmod +x /var/run/flowctl/hook*.sh diff --git a/misc/benchmarking_20221111/n_flow_performance.png b/misc/benchmarking_20221111/n_flow_performance.png new file mode 100644 index 0000000..ce159b0 Binary files /dev/null and b/misc/benchmarking_20221111/n_flow_performance.png differ diff --git a/misc/benchmarking_20221111/n_flow_performance.py b/misc/benchmarking_20221111/n_flow_performance.py new file mode 100755 index 0000000..8ff77fc --- /dev/null +++ b/misc/benchmarking_20221111/n_flow_performance.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt + +labels = ['34', '66', '130', '258'] +nobatch_many_jq = [2087720, 4070806, 7964851, 15761758] +nobatch_one_jq = [1075662, 2096756, 4110590, 8167249] +batch_many_jq = [(69832+74551+70318)/3, + (75263+78929+78031)/3, + (99978+96594+95671)/3, + (130711+127055+129459)/3] +batch_one_jq = [(36970+38420+34403)/3, + (40202+39713+39580)/3, + (44911+45686+47030)/3, + (52168+56735+51616)/3] + +x = np.arange(len(labels)) +width = 0.2 +fig, ax = plt.subplots() +rects1 = ax.bar(x - 1.5*width/1, nobatch_many_jq, width, label='nobatch-1shell-8jq') +rects2 = ax.bar(x - width/2, nobatch_one_jq, width, label='nobatch-1shell-1jq') +rects3 = ax.bar(x + width/2, batch_many_jq, width, label='batch-1shell-8jq') +rects4 = ax.bar(x + 1.5*width/1, batch_one_jq, width, label='batch-1shell-1jq') + +ax.set_ylabel('latency [usec]') +ax.set_xticks(x, labels) +ax.set_title('#Flows Performance') +ax.legend() + +ax.bar_label(rects1, padding=3) +ax.bar_label(rects2, padding=3) +ax.bar_label(rects3, padding=3) +ax.bar_label(rects4, padding=3) +fig.tight_layout() +plt.savefig("n_flow_performance.png") diff --git a/misc/benchmarking_20221111/n_hook_performance.png b/misc/benchmarking_20221111/n_hook_performance.png new file mode 100644 index 0000000..7cda344 Binary files /dev/null and b/misc/benchmarking_20221111/n_hook_performance.png differ diff --git a/misc/benchmarking_20221111/n_hook_performance.py b/misc/benchmarking_20221111/n_hook_performance.py new file mode 100755 index 0000000..8cca451 --- /dev/null +++ b/misc/benchmarking_20221111/n_hook_performance.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt + +labels = ['nothing', '1 hook', '2 hooks', '8 hooks'] +shell_means = [5772, 8335522, 16449901, 65299620] +command_means = [5772, 8111420, 16282779, 64994027] + +x = np.arange(len(labels)) +width = 0.35 +fig, ax = plt.subplots() +rects1 = ax.bar(x - width/2, shell_means, width, label='shell') +rects2 = ax.bar(x + width/2, command_means, width, label='command') + +ax.set_ylabel('latency [usec]') +ax.set_xticks(x, labels) +ax.set_title('#Hooks Performance') +ax.legend() + +ax.bar_label(rects1, padding=3) +ax.bar_label(rects2, padding=3) +fig.tight_layout() +plt.savefig("n_hook_performance.png") diff --git a/misc/benchmarking_20221111/optimization.png b/misc/benchmarking_20221111/optimization.png new file mode 100644 index 0000000..53a2157 Binary files /dev/null and b/misc/benchmarking_20221111/optimization.png differ diff --git a/misc/benchmarking_20221111/optimization.py b/misc/benchmarking_20221111/optimization.py new file mode 100755 index 0000000..b1ea903 --- /dev/null +++ b/misc/benchmarking_20221111/optimization.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import numpy as np +import matplotlib.pyplot as plt + +labels = ['(shell-1jq)x8', '(shell-8jq)x1', '(shell-1jq)x1'] +shell_means = [65299620, 15761758, 8167249] + +x = np.arange(len(labels)) +width = 0.35 +fig, ax = plt.subplots() +rects1 = ax.bar(x, shell_means, width, label='shell') + +ax.set_ylabel('latency [usec]') +ax.set_xticks(x, labels) +ax.set_title('Optimize Shell Hooks') +ax.legend() + +ax.bar_label(rects1, padding=3) +#ax.bar_label(rects2, padding=3) +fig.tight_layout() +plt.savefig("optimization.png")