From 7188d4a14cbe64e34cc0b3d22eb78beb9c503cc5 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 14 Aug 2024 17:34:11 -0700 Subject: [PATCH 01/24] Update existing benchmarks workflow to copy, upload and inject PGO profile. --- .github/workflows/benchmarks.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index ed1435ddfdf..2a2ee96d1de 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -25,6 +25,7 @@ on: env: PNG_REPORT_FILE: out.png + BENCHMARK_CPU_OUT: default.pgo BENCHMARK_RESULT: benchmark-result.txt WORKING_DIRECTORY: testing/benchmark @@ -156,6 +157,32 @@ jobs: path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} if-no-files-found: error + # The next section injects CPU profile collected by apmbench into the build. + # By copying the profile, uploading it to the artifacts and pushing it + # via a PR to update default.pgo. + + - name: Copy CPU profile + run: make cp-cpuprof + + - name: Upload CPU profile + uses: actions/upload-artifact@v4 + with: + name: cpu-profile + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + if-no-files-found: error + + - name: Open update PGO PR + run: | + git config user.username "apm-managed-service github actions" + git config user.email "obs-ds-intake-services-team@elastic.co" + BRANCH="update-pgo-$(date +%s)" + git checkout -b "$BRANCH" + git add default.pgo + git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." + git push -u origin "$BRANCH" + gh auth status + gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server + - name: Tear down benchmark environment if: always() run: make destroy From 39ca00b1c94b4e672d524169f4b21246673ae73a Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 21 Aug 2024 16:44:09 -0700 Subject: [PATCH 02/24] Only upload benchmarks result from main branch. --- .github/workflows/benchmarks.yml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 2a2ee96d1de..e7a27c866ab 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -119,13 +119,20 @@ jobs: - name: Run benchmarks autotuned if: ${{ inputs.benchmarkAgents == '' }} - run: make run-benchmark-autotuned index-benchmark-results + run: make run-benchmark-autotuned - name: Run benchmarks self tuned if: ${{ inputs.benchmarkAgents != '' }} - run: make run-benchmark index-benchmark-results + run: make run-benchmark + + # Results are only indexed and uploaded if the run happens on the main branch. + + - name: Index benchmarks result + if: github.ref == 'refs/heads/main' + run: make index-benchmark-results - name: Download PNG + if: github.ref == 'refs/heads/main' run: >- ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh ${{ secrets.KIBANA_BENCH_ENDPOINT }} @@ -134,6 +141,7 @@ jobs: $PNG_REPORT_FILE - name: Upload PNG + if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: kibana-png-report @@ -141,6 +149,7 @@ jobs: if-no-files-found: error - name: Upload PNG to AWS S3 + if: github.ref == 'refs/heads/main' id: s3-upload-png env: AWS_DEFAULT_REGION: us-east-1 @@ -150,8 +159,8 @@ jobs: echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - name: Upload benchmark result + if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 - if: always() with: name: benchmark-result path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} @@ -173,8 +182,8 @@ jobs: - name: Open update PGO PR run: | - git config user.username "apm-managed-service github actions" - git config user.email "obs-ds-intake-services-team@elastic.co" + git config user.username "apm-server github actions" + git config user.email "apm-server@elastic.co" BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" git add default.pgo From 2ac9c68f5585f61a947f25dbc72bbf26334fd72f Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 22 Aug 2024 14:17:45 -0700 Subject: [PATCH 03/24] Test benchmarks open PGO action. --- .github/workflows/benchmarks.yml | 342 ++++++++++++++++--------------- 1 file changed, 172 insertions(+), 170 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e7a27c866ab..b5a7b2597e7 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -56,209 +56,211 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: actions/setup-go@v5 - with: - go-version-file: 'go.mod' + # - uses: actions/setup-go@v5 + # with: + # go-version-file: 'go.mod' - - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 + # - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 - - name: Set up env - run: | - SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} - CREATED_AT=$(date +%s) - echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" - echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + # - name: Set up env + # run: | + # SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} + # CREATED_AT=$(date +%s) + # echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" + # echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" + # echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" - if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then - echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" - fi - if [ ! -z "${{ inputs.benchmarkRun }}" ]; then - echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" - fi + # if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then + # echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" + # fi + # if [ ! -z "${{ inputs.benchmarkRun }}" ]; then + # echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" + # fi - - name: Log in to the Elastic Container registry - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 - with: - registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} - username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} - password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} + # - name: Log in to the Elastic Container registry + # uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + # with: + # registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} + # username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} + # password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} - - uses: elastic/oblt-actions/google/auth@v1 + # - uses: elastic/oblt-actions/google/auth@v1 - - uses: elastic/oblt-actions/aws/auth@v1 - with: - role-duration-seconds: 18000 # 5 hours + # - uses: elastic/oblt-actions/aws/auth@v1 + # with: + # role-duration-seconds: 18000 # 5 hours - - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 - with: - export_to_environment: true - secrets: |- - EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key + # - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 + # with: + # export_to_environment: true + # secrets: |- + # EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key - - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: 1.3.7 - terraform_wrapper: false + # - uses: hashicorp/setup-terraform@v3 + # with: + # terraform_version: 1.3.7 + # terraform_wrapper: false - - name: Build apmbench - run: make apmbench $SSH_KEY terraform.tfvars + # - name: Build apmbench + # run: make apmbench $SSH_KEY terraform.tfvars - - name: Override docker committed version - if: ${{ ! inputs.runOnStable }} - run: make docker-override-committed-version + # - name: Override docker committed version + # if: ${{ ! inputs.runOnStable }} + # run: make docker-override-committed-version - - name: Spin up benchmark environment - id: deploy - run: | - make init apply - admin_console_url=$(terraform output -raw admin_console_url) - echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" - echo "-> infra setup done" + # - name: Spin up benchmark environment + # id: deploy + # run: | + # make init apply + # admin_console_url=$(terraform output -raw admin_console_url) + # echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" + # echo "-> infra setup done" - - name: Run benchmarks autotuned - if: ${{ inputs.benchmarkAgents == '' }} - run: make run-benchmark-autotuned + # - name: Run benchmarks autotuned + # if: ${{ inputs.benchmarkAgents == '' }} + # run: make run-benchmark-autotuned - - name: Run benchmarks self tuned - if: ${{ inputs.benchmarkAgents != '' }} - run: make run-benchmark + # - name: Run benchmarks self tuned + # if: ${{ inputs.benchmarkAgents != '' }} + # run: make run-benchmark - # Results are only indexed and uploaded if the run happens on the main branch. + # # Results are only indexed and uploaded if the run happens on the main branch. - - name: Index benchmarks result - if: github.ref == 'refs/heads/main' - run: make index-benchmark-results + # - name: Index benchmarks result + # if: github.ref == 'refs/heads/main' + # run: make index-benchmark-results - - name: Download PNG - if: github.ref == 'refs/heads/main' - run: >- - ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh - ${{ secrets.KIBANA_BENCH_ENDPOINT }} - ${{ secrets.KIBANA_BENCH_USERNAME }} - ${{ secrets.KIBANA_BENCH_PASSWORD }} - $PNG_REPORT_FILE + # - name: Download PNG + # if: github.ref == 'refs/heads/main' + # run: >- + # ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh + # ${{ secrets.KIBANA_BENCH_ENDPOINT }} + # ${{ secrets.KIBANA_BENCH_USERNAME }} + # ${{ secrets.KIBANA_BENCH_PASSWORD }} + # $PNG_REPORT_FILE - - name: Upload PNG - if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: kibana-png-report - path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} - if-no-files-found: error + # - name: Upload PNG + # if: github.ref == 'refs/heads/main' + # uses: actions/upload-artifact@v4 + # with: + # name: kibana-png-report + # path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} + # if-no-files-found: error - - name: Upload PNG to AWS S3 - if: github.ref == 'refs/heads/main' - id: s3-upload-png - env: - AWS_DEFAULT_REGION: us-east-1 - run: | - DEST_NAME="github-run-id-${{ github.run_id }}.png" - aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} - echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" + # - name: Upload PNG to AWS S3 + # if: github.ref == 'refs/heads/main' + # id: s3-upload-png + # env: + # AWS_DEFAULT_REGION: us-east-1 + # run: | + # DEST_NAME="github-run-id-${{ github.run_id }}.png" + # aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} + # echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - - name: Upload benchmark result - if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: benchmark-result - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} - if-no-files-found: error + # - name: Upload benchmark result + # if: github.ref == 'refs/heads/main' + # uses: actions/upload-artifact@v4 + # with: + # name: benchmark-result + # path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} + # if-no-files-found: error - # The next section injects CPU profile collected by apmbench into the build. - # By copying the profile, uploading it to the artifacts and pushing it - # via a PR to update default.pgo. + # # The next section injects CPU profile collected by apmbench into the build. + # # By copying the profile, uploading it to the artifacts and pushing it + # # via a PR to update default.pgo. - - name: Copy CPU profile - run: make cp-cpuprof + # - name: Copy CPU profile + # run: make cp-cpuprof - - name: Upload CPU profile - uses: actions/upload-artifact@v4 - with: - name: cpu-profile - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - if-no-files-found: error + # - name: Upload CPU profile + # uses: actions/upload-artifact@v4 + # with: + # name: cpu-profile + # path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + # if-no-files-found: error - name: Open update PGO PR run: | - git config user.username "apm-server github actions" - git config user.email "apm-server@elastic.co" + cd ${{ github.workspace }} + git config user.email "apm@elastic.co" + git config user.name "APM Server" BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" + echo "test" > default.pgo git add default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" gh auth status gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server - - name: Tear down benchmark environment - if: always() - run: make destroy + # - name: Tear down benchmark environment + # if: always() + # run: make destroy - # Notify failure to Slack only on schedule (nightly run) - - if: failure() && github.event_name == 'schedule' - uses: elastic/oblt-actions/slack/notify-result@v1 - with: - bot-token: ${{ secrets.SLACK_BOT_TOKEN }} - channel-id: "#apm-server" - message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! + # # Notify failure to Slack only on schedule (nightly run) + # - if: failure() && github.event_name == 'schedule' + # uses: elastic/oblt-actions/slack/notify-result@v1 + # with: + # bot-token: ${{ secrets.SLACK_BOT_TOKEN }} + # channel-id: "#apm-server" + # message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! - # Notify result to Slack only on schedule (nightly run) - - if: github.event_name == 'schedule' - uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 - env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - with: - channel-id: "#apm-server" - payload: | - { - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" - }, - "accessory": { - "type": "button", - "style": "primary", - "text": { - "type": "plain_text", - "text": "Workflow Run #${{ github.run_id }}", - "emoji": true - }, - "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", - "action_id": "workflow-run-button" - } - }, - { - "type": "image", - "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", - "alt_text": "kibana-png-report" - }, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Benchmarks dashboard" - }, - "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", - "action_id": "kibana-dashboard-button" - }, - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Elastic Cloud deployment" - }, - "url": "${{ steps.deploy.outputs.admin_console_url }}", - "action_id": "admin-console-button" - } - ] - } - ] - } + # # Notify result to Slack only on schedule (nightly run) + # - if: github.event_name == 'schedule' + # uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 + # env: + # SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + # with: + # channel-id: "#apm-server" + # payload: | + # { + # "blocks": [ + # { + # "type": "section", + # "text": { + # "type": "mrkdwn", + # "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" + # }, + # "accessory": { + # "type": "button", + # "style": "primary", + # "text": { + # "type": "plain_text", + # "text": "Workflow Run #${{ github.run_id }}", + # "emoji": true + # }, + # "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", + # "action_id": "workflow-run-button" + # } + # }, + # { + # "type": "image", + # "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", + # "alt_text": "kibana-png-report" + # }, + # { + # "type": "actions", + # "elements": [ + # { + # "type": "button", + # "text": { + # "type": "plain_text", + # "text": "Benchmarks dashboard" + # }, + # "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", + # "action_id": "kibana-dashboard-button" + # }, + # { + # "type": "button", + # "text": { + # "type": "plain_text", + # "text": "Elastic Cloud deployment" + # }, + # "url": "${{ steps.deploy.outputs.admin_console_url }}", + # "action_id": "admin-console-button" + # } + # ] + # } + # ] + # } From a320c3d5c3950124e74ed70b6fcb68377c97151f Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 22 Aug 2024 14:26:18 -0700 Subject: [PATCH 04/24] Test benchmarks workflow add permissions for pull requests. --- .github/workflows/benchmarks.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index b5a7b2597e7..78181f2f99f 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -30,7 +30,8 @@ env: WORKING_DIRECTORY: testing/benchmark permissions: - contents: read + contents: write + pull-requests: write jobs: benchmarks: @@ -39,8 +40,9 @@ jobs: run: working-directory: ${{ env.WORKING_DIRECTORY }} permissions: - contents: read + contents: write id-token: write + pull-requests: write env: SSH_KEY: ./id_rsa_terraform TF_VAR_private_key: ./id_rsa_terraform @@ -185,14 +187,18 @@ jobs: cd ${{ github.workspace }} git config user.email "apm@elastic.co" git config user.name "APM Server" + git fetch origin main + git checkout main BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" echo "test" > default.pgo git add default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" - gh auth status gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} # - name: Tear down benchmark environment # if: always() From a2c42ea7cf385183702bed789624897c68359513 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 22 Aug 2024 15:00:36 -0700 Subject: [PATCH 05/24] Finalize PGO benchmark pipeline update. --- .github/workflows/benchmarks.yml | 340 +++++++++++++++---------------- 1 file changed, 169 insertions(+), 171 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 78181f2f99f..0a563e2c59a 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -30,8 +30,7 @@ env: WORKING_DIRECTORY: testing/benchmark permissions: - contents: write - pull-requests: write + contents: read jobs: benchmarks: @@ -58,129 +57,129 @@ jobs: steps: - uses: actions/checkout@v4 - # - uses: actions/setup-go@v5 - # with: - # go-version-file: 'go.mod' + - uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' - # - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 + - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 - # - name: Set up env - # run: | - # SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} - # CREATED_AT=$(date +%s) - # echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" - # echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - # echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + - name: Set up env + run: | + SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} + CREATED_AT=$(date +%s) + echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" + echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" + echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" - # if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then - # echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" - # fi - # if [ ! -z "${{ inputs.benchmarkRun }}" ]; then - # echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" - # fi + if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then + echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" + fi + if [ ! -z "${{ inputs.benchmarkRun }}" ]; then + echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" + fi - # - name: Log in to the Elastic Container registry - # uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 - # with: - # registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} - # username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} - # password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} + - name: Log in to the Elastic Container registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} + username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} + password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} - # - uses: elastic/oblt-actions/google/auth@v1 + - uses: elastic/oblt-actions/google/auth@v1 - # - uses: elastic/oblt-actions/aws/auth@v1 - # with: - # role-duration-seconds: 18000 # 5 hours + - uses: elastic/oblt-actions/aws/auth@v1 + with: + role-duration-seconds: 18000 # 5 hours - # - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 - # with: - # export_to_environment: true - # secrets: |- - # EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key + - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 + with: + export_to_environment: true + secrets: |- + EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key - # - uses: hashicorp/setup-terraform@v3 - # with: - # terraform_version: 1.3.7 - # terraform_wrapper: false + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: 1.3.7 + terraform_wrapper: false - # - name: Build apmbench - # run: make apmbench $SSH_KEY terraform.tfvars + - name: Build apmbench + run: make apmbench $SSH_KEY terraform.tfvars - # - name: Override docker committed version - # if: ${{ ! inputs.runOnStable }} - # run: make docker-override-committed-version + - name: Override docker committed version + if: ${{ ! inputs.runOnStable }} + run: make docker-override-committed-version - # - name: Spin up benchmark environment - # id: deploy - # run: | - # make init apply - # admin_console_url=$(terraform output -raw admin_console_url) - # echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" - # echo "-> infra setup done" + - name: Spin up benchmark environment + id: deploy + run: | + make init apply + admin_console_url=$(terraform output -raw admin_console_url) + echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" + echo "-> infra setup done" - # - name: Run benchmarks autotuned - # if: ${{ inputs.benchmarkAgents == '' }} - # run: make run-benchmark-autotuned + - name: Run benchmarks autotuned + if: ${{ inputs.benchmarkAgents == '' }} + run: make run-benchmark-autotuned - # - name: Run benchmarks self tuned - # if: ${{ inputs.benchmarkAgents != '' }} - # run: make run-benchmark + - name: Run benchmarks self tuned + if: ${{ inputs.benchmarkAgents != '' }} + run: make run-benchmark - # # Results are only indexed and uploaded if the run happens on the main branch. + # Results are only indexed and uploaded if the run happens on the main branch. - # - name: Index benchmarks result - # if: github.ref == 'refs/heads/main' - # run: make index-benchmark-results + - name: Index benchmarks result + if: github.ref == 'refs/heads/main' + run: make index-benchmark-results - # - name: Download PNG - # if: github.ref == 'refs/heads/main' - # run: >- - # ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh - # ${{ secrets.KIBANA_BENCH_ENDPOINT }} - # ${{ secrets.KIBANA_BENCH_USERNAME }} - # ${{ secrets.KIBANA_BENCH_PASSWORD }} - # $PNG_REPORT_FILE + - name: Download PNG + if: github.ref == 'refs/heads/main' + run: >- + ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh + ${{ secrets.KIBANA_BENCH_ENDPOINT }} + ${{ secrets.KIBANA_BENCH_USERNAME }} + ${{ secrets.KIBANA_BENCH_PASSWORD }} + $PNG_REPORT_FILE - # - name: Upload PNG - # if: github.ref == 'refs/heads/main' - # uses: actions/upload-artifact@v4 - # with: - # name: kibana-png-report - # path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} - # if-no-files-found: error + - name: Upload PNG + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: kibana-png-report + path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} + if-no-files-found: error - # - name: Upload PNG to AWS S3 - # if: github.ref == 'refs/heads/main' - # id: s3-upload-png - # env: - # AWS_DEFAULT_REGION: us-east-1 - # run: | - # DEST_NAME="github-run-id-${{ github.run_id }}.png" - # aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} - # echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" + - name: Upload PNG to AWS S3 + if: github.ref == 'refs/heads/main' + id: s3-upload-png + env: + AWS_DEFAULT_REGION: us-east-1 + run: | + DEST_NAME="github-run-id-${{ github.run_id }}.png" + aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} + echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - # - name: Upload benchmark result - # if: github.ref == 'refs/heads/main' - # uses: actions/upload-artifact@v4 - # with: - # name: benchmark-result - # path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} - # if-no-files-found: error + - name: Upload benchmark result + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: benchmark-result + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} + if-no-files-found: error - # # The next section injects CPU profile collected by apmbench into the build. - # # By copying the profile, uploading it to the artifacts and pushing it - # # via a PR to update default.pgo. + # The next section injects CPU profile collected by apmbench into the build. + # By copying the profile, uploading it to the artifacts and pushing it + # via a PR to update default.pgo. - # - name: Copy CPU profile - # run: make cp-cpuprof + - name: Copy CPU profile + run: make cp-cpuprof - # - name: Upload CPU profile - # uses: actions/upload-artifact@v4 - # with: - # name: cpu-profile - # path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - # if-no-files-found: error + - name: Upload CPU profile + uses: actions/upload-artifact@v4 + with: + name: cpu-profile + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + if-no-files-found: error - name: Open update PGO PR run: | @@ -191,7 +190,6 @@ jobs: git checkout main BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" - echo "test" > default.pgo git add default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" @@ -200,73 +198,73 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} - # - name: Tear down benchmark environment - # if: always() - # run: make destroy + - name: Tear down benchmark environment + if: always() + run: make destroy - # # Notify failure to Slack only on schedule (nightly run) - # - if: failure() && github.event_name == 'schedule' - # uses: elastic/oblt-actions/slack/notify-result@v1 - # with: - # bot-token: ${{ secrets.SLACK_BOT_TOKEN }} - # channel-id: "#apm-server" - # message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! + # Notify failure to Slack only on schedule (nightly run) + - if: failure() && github.event_name == 'schedule' + uses: elastic/oblt-actions/slack/notify-result@v1 + with: + bot-token: ${{ secrets.SLACK_BOT_TOKEN }} + channel-id: "#apm-server" + message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! - # # Notify result to Slack only on schedule (nightly run) - # - if: github.event_name == 'schedule' - # uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 - # env: - # SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - # with: - # channel-id: "#apm-server" - # payload: | - # { - # "blocks": [ - # { - # "type": "section", - # "text": { - # "type": "mrkdwn", - # "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" - # }, - # "accessory": { - # "type": "button", - # "style": "primary", - # "text": { - # "type": "plain_text", - # "text": "Workflow Run #${{ github.run_id }}", - # "emoji": true - # }, - # "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", - # "action_id": "workflow-run-button" - # } - # }, - # { - # "type": "image", - # "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", - # "alt_text": "kibana-png-report" - # }, - # { - # "type": "actions", - # "elements": [ - # { - # "type": "button", - # "text": { - # "type": "plain_text", - # "text": "Benchmarks dashboard" - # }, - # "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", - # "action_id": "kibana-dashboard-button" - # }, - # { - # "type": "button", - # "text": { - # "type": "plain_text", - # "text": "Elastic Cloud deployment" - # }, - # "url": "${{ steps.deploy.outputs.admin_console_url }}", - # "action_id": "admin-console-button" - # } - # ] - # } - # ] - # } + # Notify result to Slack only on schedule (nightly run) + - if: github.event_name == 'schedule' + uses: slackapi/slack-github-action@70cd7be8e40a46e8b0eced40b0de447bdb42f68e # v1.26.0 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + with: + channel-id: "#apm-server" + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" + }, + "accessory": { + "type": "button", + "style": "primary", + "text": { + "type": "plain_text", + "text": "Workflow Run #${{ github.run_id }}", + "emoji": true + }, + "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", + "action_id": "workflow-run-button" + } + }, + { + "type": "image", + "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", + "alt_text": "kibana-png-report" + }, + { + "type": "actions", + "elements": [ + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Benchmarks dashboard" + }, + "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", + "action_id": "kibana-dashboard-button" + }, + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Elastic Cloud deployment" + }, + "url": "${{ steps.deploy.outputs.admin_console_url }}", + "action_id": "admin-console-button" + } + ] + } + ] + } From cd3c7e1fd09005f5e8702da14b3af8e4e9ef0905 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 22 Aug 2024 19:21:47 -0700 Subject: [PATCH 06/24] Copy CPU profile to the workspace dir. --- .github/workflows/benchmarks.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 0a563e2c59a..54b411d8acd 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -183,7 +183,8 @@ jobs: - name: Open update PGO PR run: | - cd ${{ github.workspace }} + cd "${{ github.workspace }}" + mv "$PROFILE_PATH" default.pgo git config user.email "apm@elastic.co" git config user.name "APM Server" git fetch origin main @@ -195,6 +196,7 @@ jobs: git push -u origin "$BRANCH" gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server env: + PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} From ef3ca419f85a58cf5f529d5fd5cb131b053019bc Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Mon, 26 Aug 2024 10:24:18 -0700 Subject: [PATCH 07/24] Put PGO profile into main pkg. --- .github/workflows/benchmarks.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 54b411d8acd..e798054d314 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -184,14 +184,15 @@ jobs: - name: Open update PGO PR run: | cd "${{ github.workspace }}" - mv "$PROFILE_PATH" default.pgo + cp "$PROFILE_PATH" x-pack/apm-server/default.pgo + cp "$PROFILE_PATH" cmd/apm-server/default.pgo git config user.email "apm@elastic.co" git config user.name "APM Server" git fetch origin main git checkout main BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" - git add default.pgo + git add x-pack/apm-server/default.pgo cmd/apm-server/default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server From eed73735a572bc17749af74a8c17133a6d7ffa8a Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Mon, 26 Aug 2024 10:30:01 -0700 Subject: [PATCH 08/24] Use more self-descriptive title and body for PGO PR. --- .github/workflows/benchmarks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e798054d314..751cb260182 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -181,7 +181,7 @@ jobs: path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} if-no-files-found: error - - name: Open update PGO PR + - name: Open PGO PR run: | cd "${{ github.workspace }}" cp "$PROFILE_PATH" x-pack/apm-server/default.pgo @@ -195,7 +195,7 @@ jobs: git add x-pack/apm-server/default.pgo cmd/apm-server/default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" - gh pr create -B main -H "$BRANCH" -f -R elastic/apm-server + gh pr create -B main -H "$BRANCH" --t "PGO: Update default.pgo" --b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server env: PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 6bbd47d5c3742f162c4d617b86fe995b7f103246 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Mon, 9 Sep 2024 17:41:49 -0700 Subject: [PATCH 09/24] Limit cpu profile size in benchtest. --- .github/workflows/benchmarks.yml | 11 +++++++---- systemtest/benchtest/profiles.go | 7 ++++++- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index dbbb3334429..c1f15a580e8 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -54,6 +54,10 @@ jobs: GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} + # temporarily override to get faster feedback + BENCHMARK_WARMUP_TIME: 1m + BENCHMARK_COUNT: 2 + BENCHMARK_TIME: 1m steps: - uses: actions/checkout@v4 @@ -184,18 +188,17 @@ jobs: - name: Open PGO PR run: | cd "${{ github.workspace }}" - cp "$PROFILE_PATH" x-pack/apm-server/default.pgo - cp "$PROFILE_PATH" cmd/apm-server/default.pgo + mv "$PROFILE_PATH" x-pack/apm-server/default.pgo git config user.email "apm@elastic.co" git config user.name "APM Server" git fetch origin main git checkout main BRANCH="update-pgo-$(date +%s)" git checkout -b "$BRANCH" - git add x-pack/apm-server/default.pgo cmd/apm-server/default.pgo + git add x-pack/apm-server/default.pgo git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." git push -u origin "$BRANCH" - gh pr create -B main -H "$BRANCH" --t "PGO: Update default.pgo" --b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server + gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server env: PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/systemtest/benchtest/profiles.go b/systemtest/benchtest/profiles.go index 9e2ee89b43a..2214c7e764e 100644 --- a/systemtest/benchtest/profiles.go +++ b/systemtest/benchtest/profiles.go @@ -88,7 +88,12 @@ func (p *profiles) recordCPU() error { if benchConfig.CPUProfile == "" { return nil } - duration := 2 * benchConfig.Benchtime + // Limit the CPU profile collection to static 1 minute interval per a benchmark. + // Otherwise the profile will be too heavy and over influenced by the "longest" benchmark. + duration := time.Minute + if duration > benchConfig.Benchtime { + duration = benchConfig.Benchtime + } profile, err := fetchProfile("/debug/pprof/profile", duration) if err != nil { return fmt.Errorf("failed to fetch CPU profile: %w", err) From 807a23852c550ef5e413ef875809219594a47055 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 16:21:25 -0700 Subject: [PATCH 10/24] add TF module for standalone benchmarks workflow --- systemtest/cmd/moxy/go.mod | 10 ++ systemtest/cmd/moxy/go.sum | 16 +++ systemtest/cmd/moxy/main.go | 102 +++++++++++++++ testing/benchmark-standalone/.gitignore | 3 + testing/benchmark-standalone/Makefile | 123 ++++++++++++++++++ testing/benchmark-standalone/main.tf | 89 +++++++++++++ testing/benchmark-standalone/outputs.tf | 15 +++ testing/benchmark-standalone/variables.tf | 86 ++++++++++++ testing/infra/terraform/modules/moxy/main.tf | 89 +++++++++++++ .../infra/terraform/modules/moxy/outputs.tf | 4 + .../infra/terraform/modules/moxy/provider.tf | 6 + .../infra/terraform/modules/moxy/variables.tf | 27 ++++ .../modules/standalone_apm_server/main.tf | 30 ++++- .../standalone_apm_server/variables.tf | 36 ++--- 14 files changed, 605 insertions(+), 31 deletions(-) create mode 100644 systemtest/cmd/moxy/go.mod create mode 100644 systemtest/cmd/moxy/go.sum create mode 100644 systemtest/cmd/moxy/main.go create mode 100644 testing/benchmark-standalone/.gitignore create mode 100644 testing/benchmark-standalone/Makefile create mode 100644 testing/benchmark-standalone/main.tf create mode 100644 testing/benchmark-standalone/outputs.tf create mode 100644 testing/benchmark-standalone/variables.tf create mode 100644 testing/infra/terraform/modules/moxy/main.tf create mode 100644 testing/infra/terraform/modules/moxy/outputs.tf create mode 100644 testing/infra/terraform/modules/moxy/provider.tf create mode 100644 testing/infra/terraform/modules/moxy/variables.tf diff --git a/systemtest/cmd/moxy/go.mod b/systemtest/cmd/moxy/go.mod new file mode 100644 index 00000000000..66d6f326211 --- /dev/null +++ b/systemtest/cmd/moxy/go.mod @@ -0,0 +1,10 @@ +module moxy + +go 1.22.5 + +require ( + github.com/klauspost/compress v1.17.9 + go.uber.org/zap v1.27.0 +) + +require go.uber.org/multierr v1.10.0 // indirect diff --git a/systemtest/cmd/moxy/go.sum b/systemtest/cmd/moxy/go.sum new file mode 100644 index 00000000000..ba30898e48e --- /dev/null +++ b/systemtest/cmd/moxy/go.sum @@ -0,0 +1,16 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= +go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= +go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go new file mode 100644 index 00000000000..ed7c63d42c4 --- /dev/null +++ b/systemtest/cmd/moxy/main.go @@ -0,0 +1,102 @@ +package main + +import ( + "bufio" + "bytes" + "flag" + "fmt" + "io" + "net/http" + + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" +) + +func main() { + logLevel := zap.LevelFlag( + "loglevel", zapcore.InfoLevel, + "set log level to one of: DEBUG, INFO (default), WARN, ERROR, DPANIC, PANIC, FATAL", + ) + flag.Parse() + zapcfg := zap.NewProductionConfig() + zapcfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder + zapcfg.EncoderConfig.EncodeLevel = zapcore.CapitalColorLevelEncoder + zapcfg.Encoding = "console" + zapcfg.Level = zap.NewAtomicLevelAt(*logLevel) + logger, err := zapcfg.Build() + if err != nil { + panic(err) + } + defer logger.Sync() + s := http.Server{ + Addr: ":9200", + Handler: handler(logger), + } + if err := s.ListenAndServe(); err != nil { + logger.Fatal("listen error", zap.Error(err)) + } +} + +func handler(logger *zap.Logger) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Elastic-Product", "Elasticsearch") + first := true + switch r.URL.Path { + case "/_security/user/_has_privileges": + w.Write([]byte(`{"username":"admin","has_all_requested":true,"cluster":{},"index":{},"application":{"apm":{"-":{"event:write":true}}}}`)) + case "/_bulk": + var body io.Reader + switch r.Header.Get("Content-Encoding") { + case "gzip": + r, err := gzip.NewReader(r.Body) + if err != nil { + logger.Error("gzip reader err", zap.Error(err)) + http.Error(w, fmt.Sprintf("reader error: %v", err), http.StatusInternalServerError) + return + } + defer r.Close() + body = r + case "zstd": + r, err := zstd.NewReader(r.Body) + if err != nil { + logger.Error("zstd reader err", zap.Error(err)) + http.Error(w, fmt.Sprintf("reader error: %v", err), http.StatusInternalServerError) + return + } + defer r.Close() + body = r + default: + body = r.Body + } + + var jsonw bytes.Buffer + jsonw.Write([]byte(`{"items":[`)) + scanner := bufio.NewScanner(body) + for scanner.Scan() { + // Action is always "create", skip decoding. + if !scanner.Scan() { + logger.Error("unexpected payload") + http.Error(w, "expected source", http.StatusInternalServerError) + return + } + if first { + first = false + } else { + jsonw.WriteByte(',') + } + jsonw.Write([]byte(`{"create":{"status":201}}`)) + } + if err := scanner.Err(); err != nil { + logger.Error("scanner error", zap.Error(err)) + http.Error(w, fmt.Sprintf("scanner error: %v", err), http.StatusInternalServerError) + } else { + jsonw.Write([]byte(`]}`)) + w.Write(jsonw.Bytes()) + } + default: + logger.Error("unknown path", zap.String("path", r.URL.Path)) + } + }) +} diff --git a/testing/benchmark-standalone/.gitignore b/testing/benchmark-standalone/.gitignore new file mode 100644 index 00000000000..13bc2184027 --- /dev/null +++ b/testing/benchmark-standalone/.gitignore @@ -0,0 +1,3 @@ +docker_image.auto.tfvars +.envrc +benchmark-result.txt \ No newline at end of file diff --git a/testing/benchmark-standalone/Makefile b/testing/benchmark-standalone/Makefile new file mode 100644 index 00000000000..932a6773af4 --- /dev/null +++ b/testing/benchmark-standalone/Makefile @@ -0,0 +1,123 @@ +APMBENCH_PATH ?= ../../systemtest/cmd/apmbench +APMBENCH_GOOS ?= linux +APMBENCH_GOARCH ?= amd64 + +APM_SERVER_PATH ?= ../../systemtest/cmd/apmbench +APM_SERVER_GOOS ?= linux +APM_SERVER_GOARCH ?= amd64 + +BENCHMARK_WARMUP_TIME ?= 5m +BENCHMARK_AGENTS ?= 64 +BENCHMARK_COUNT ?= 6 +BENCHMARK_TIME ?= 2m +BENCHMARK_RUN ?= Benchmark +BENCHMARK_RESULT ?= benchmark-result.txt +BENCHMARK_DETAILED ?= true +BENCHMARK_EVENT_RATE ?= 0/s + +GOBENCH_INDEX ?= apmbench-standalone-v2 +GOBENCH_USERNAME ?= admin +GOBENCH_PASSWORD ?= changeme +GOBENCH_HOST ?= http://localhost:9200 +GOBENCH_DEFAULT_TAGS = apm_server_version=$(APM_SERVER_VERSION),apm_server_type=standalone + +SSH_USER ?= ec2-user +SSH_OPTS ?= -o LogLevel=ERROR -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=10 +SSH_KEY ?= ~/.ssh/id_rsa_terraform +WORKER_IP = $(shell terraform output -raw public_ip) + +SHELL = /bin/bash +.SHELLFLAGS = -o pipefail -c + +# This profile will also be used by the Terraform provider. + +# export AWS_PROFILE if CI is not defined +ifeq ($(CI),) + export AWS_PROFILE ?= default +endif + + +.default: all + +.PHONY: all +all: $(SSH_KEY) terraform.tfvars apmbench auth apply + +MAKEFILE_PATH:=$(abspath $(lastword ${MAKEFILE_LIST})) +MAKEFILE_DIR:=$(dir ${MAKEFILE_PATH}) +REPO_ROOT:=$(abspath ${MAKEFILE_DIR}/../../) + +include ${MAKEFILE_DIR}/../../go.mk + +.PHONY: auth +auth: init-aws-profile + @okta-awscli --profile $(AWS_PROFILE) + +.PHONY: init-aws-profile +init-aws-profile: ~/.aws/credentials + @grep $(AWS_PROFILE) ~/.aws/credentials > /dev/null || \ + echo "[$(AWS_PROFILE)]\naws_access_key_id = x\naws_secret_access_key = x" >> ~/.aws/credentials + +~/.aws/credentials: + @mkdir -p ~/.aws + @touch $@ + +terraform.tfvars: + @sed "s/USER/$(USER)/" $(TFVARS_SOURCE) > terraform.tfvars + +.PHONY: apmbench +apmbench: + @echo "-> Building apmbench..." + @cd $(APMBENCH_PATH) && CGO_ENABLED=0 GOOS=$(APMBENCH_GOOS) GOARCH=$(APMBENCH_GOARCH) go build . + +.PHONY: init +init: + @terraform init + +.PHONY: apply +apply: + @terraform apply -auto-approve + +.PHONY: destroy +destroy: + @terraform destroy -auto-approve + +cp-cpuprof: + @[ "${BENCHMARK_CPU_OUT}" ] && scp $(SSH_OPTS) -i $(SSH_KEY) "$(SSH_USER)@$(WORKER_IP):./$(BENCHMARK_CPU_OUT)" $(BENCHMARK_CPU_OUT) || echo "skipping cpu out copy" + +.PHONY: log-benckmark-profile +log-benckmark-profile: + @echo "Running benchmarks..." + @echo "Benchmark warmup time: $(BENCHMARK_WARMUP_TIME)" + @echo "Benchmark agents: $(BENCHMARK_AGENTS)" + @echo "Benchmark event rate: $(BENCHMARK_EVENT_RATE)" + @echo "Benchmark count: $(BENCHMARK_COUNT)" + @echo "Benchmark duration: $(BENCHMARK_TIME)" + @echo "Benchmark run expression : $(BENCHMARK_RUN)" + +.PHONY: run-benchmark +run-benchmark: log-benckmark-profile + @ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(WORKER_IP) ". .envrc && bin/apmbench -run='$(BENCHMARK_RUN)' \ + -benchtime=$(BENCHMARK_TIME) -count=$(BENCHMARK_COUNT) -warmup-time=$(BENCHMARK_WARMUP_TIME) \ + -agents=$(BENCHMARK_AGENTS) -detailed=$(BENCHMARK_DETAILED) -event-rate=$(BENCHMARK_EVENT_RATE) -cpuprofile=$(BENCHMARK_CPU_OUT)" 2>&1 | tee $(BENCHMARK_RESULT) + +.PHONY: run-benchmark-autotuned +run-benchmark-autotuned: + $(eval APM_SERVER_SIZE:=$(shell echo var.apm_server_size | terraform console | tr -d '"'| tr -d 'g')) + @ $(MAKE) run-benchmark BENCHMARK_AGENTS=$(shell echo $$(( $(BENCHMARK_AGENTS) * $(APM_SERVER_SIZE) )) ) + +.PHONY: index-benchmark-results +index-benchmark-results: _default-gobench-vars + @cat $(BENCHMARK_RESULT) | go run -modfile=$(GITROOT)/tools/go.mod github.com/elastic/gobench -es $(GOBENCH_HOST) -es-username $(GOBENCH_USERNAME) -es-password $(GOBENCH_PASSWORD) \ + -index $(GOBENCH_INDEX) -tag "$(GOBENCH_DEFAULT_TAGS),$(GOBENCH_TAGS)" + +.PHONY: _default-gobench-vars +_default-gobench-vars: + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),apm_server_size=$(shell echo var.apm_server_size | terraform console | tr -d '"')) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),build_sha=$(shell curl -sL -H "Authorization: Bearer $(shell terraform output -raw apm_secret_token )" $(shell terraform output -raw apm_server_url ) | jq -r '.build_sha')) + +$(SSH_KEY): + @ssh-keygen -t rsa -b 4096 -C "$(USER)@elastic.co" -N "" -f $(SSH_KEY) + +.PHONY: ssh +ssh: + @ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(WORKER_IP) diff --git a/testing/benchmark-standalone/main.tf b/testing/benchmark-standalone/main.tf new file mode 100644 index 00000000000..4f95ce372ab --- /dev/null +++ b/testing/benchmark-standalone/main.tf @@ -0,0 +1,89 @@ +terraform { + required_version = ">= 1.1.8, < 2.0.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = "~>4.17" + } + time = { + source = "hashicorp/time" + version = ">=0.9.1" + } + } +} + +resource "time_static" "created_date" {} + +locals { + ci_tags = { + environment = var.ENVIRONMENT + repo = var.REPO + branch = var.BRANCH + build = var.BUILD_ID + created_date = coalesce(var.CREATED_DATE, time_static.created_date.unix) + } +} + +module "tags" { + source = "../infra/terraform/modules/tags" + # use the convention for team/shared owned resources if we are running in CI. + # assume this is an individually owned resource otherwise. + project = startswith(var.user_name, "benchci") ? "benchmarks" : var.user_name +} + +provider "aws" { + region = var.worker_region +} + +locals { + name_prefix = "${coalesce(var.user_name, "unknown-user")}-bench" +} + +module "benchmark_worker" { + source = "../infra/terraform/modules/benchmark_executor" + region = var.worker_region + + user_name = var.user_name + + apm_server_url = module.standalone_apm_server.apm_server_url + apm_secret_token = module.standalone_apm_server.apm_secret_token + + apmbench_bin_path = var.apmbench_bin_path + instance_type = var.worker_instance_type + + public_key = var.public_key + private_key = var.private_key + + tags = merge(local.ci_tags, module.tags.tags) +} + +module "moxy" { + source = "../infra/terraform/modules/moxy" + worker_region = var.worker_region + + moxy_bin_path = var.moxy_bin_path + instance_type = var.moxy_instance_type + + aws_provisioner_key_name = var.private_key + + tags = merge(local.ci_tags, module.tags.tags) +} + + +module "standalone_apm_server" { + source = "../infra/terraform/modules/standalone_apm_server" + worker_region = var.worker_region + aws_os = "amzn2-ami-kernel-5.10" + + ea_managed = false + apm_server_bin_path = var.apm_server_bin_path + apm_instance_type = var.apm_instance_type + + aws_provisioner_key_name = var.private_key + + elasticsearch_url = module.moxy.moxy_url + elasticsearch_username = "" + elasticsearch_password = "" + + tags = merge(local.ci_tags, module.tags.tags) +} diff --git a/testing/benchmark-standalone/outputs.tf b/testing/benchmark-standalone/outputs.tf new file mode 100644 index 00000000000..d8459ce0108 --- /dev/null +++ b/testing/benchmark-standalone/outputs.tf @@ -0,0 +1,15 @@ +output "public_ip" { + value = module.benchmark_worker.public_ip + description = "The worker public IP" +} + +output "apm_secret_token" { + value = module.standalone_apm_server.apm_secret_token + description = "The APM Server secret token" + sensitive = true +} + +output "apm_server_url" { + value = module.standalone_apm_server.apm_server_url + description = "The APM Server URL" +} diff --git a/testing/benchmark-standalone/variables.tf b/testing/benchmark-standalone/variables.tf new file mode 100644 index 00000000000..ae88ceb3cb0 --- /dev/null +++ b/testing/benchmark-standalone/variables.tf @@ -0,0 +1,86 @@ +## General configuration + +variable "user_name" { + description = "Required username to use for prefixes" + type = string +} + +## Deployment configuration + +variable "apm_instance_type" { + default = "c6i.large" + type = string + description = "Optional apm server instance type" +} + +variable "apm_server_bin_path" { + default = "../../build/apm-server-linux-amd64" + type = string + description = "Optional path to the apm-server binary" +} + +variable "moxy_instance_type" { + default = "c6i.large" + type = string + description = "Optional moxy instance type" +} + +variable "moxy_bin_path" { + default = "../../systemtest/cmd/moxy" + type = string + description = "Optional path to the moxy binary" +} + +## Worker configuraiton + +variable "worker_region" { + default = "us-west-2" + description = "Optional ESS region where the deployment will be created. Defaults to us-west-2 (AWS)" + type = string +} + +variable "apmbench_bin_path" { + default = "../../systemtest/cmd/apmbench" + type = string + description = "Optional path to the apmbench binary" +} + +variable "worker_instance_type" { + default = "c6i.large" + type = string + description = "Optional instance type to use for the worker VM" +} + +variable "private_key" { + default = "~/.ssh/id_rsa_terraform" + type = string +} + +variable "public_key" { + default = "~/.ssh/id_rsa_terraform.pub" + type = string +} + +# CI variables +variable "BRANCH" { + description = "Branch name or pull request for tagging purposes" + default = "unknown-branch" +} + +variable "BUILD_ID" { + description = "Build ID in the CI for tagging purposes" + default = "unknown-build" +} + +variable "CREATED_DATE" { + description = "Creation date in epoch time for tagging purposes" + default = "" +} + +variable "ENVIRONMENT" { + default = "unknown-environment" +} + +variable "REPO" { + default = "unknown-repo-name" +} diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf new file mode 100644 index 00000000000..b7fe27d30dd --- /dev/null +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -0,0 +1,89 @@ +locals { + moxy_port = "9200" + bin_path = "/tmp/moxy" +} + +data "aws_ami" "worker_ami" { + owners = ["amazon"] + most_recent = true + + filter { + name = "name" + values = ["amzn2-ami-hvm-*-x86_64-ebs"] + } +} + +resource "aws_security_group" "main" { + egress = [ + { + cidr_blocks = ["0.0.0.0/0", ] + description = "" + from_port = 0 + ipv6_cidr_blocks = [] + prefix_list_ids = [] + protocol = "-1" + security_groups = [] + self = false + to_port = 0 + } + ] + ingress = [ + { + cidr_blocks = ["0.0.0.0/0", ] + description = "" + from_port = 22 + ipv6_cidr_blocks = [] + prefix_list_ids = [] + protocol = "tcp" + security_groups = [] + self = false + to_port = 22 + }, + { + cidr_blocks = ["0.0.0.0/0", ] + description = "" + from_port = local.moxy_port + ipv6_cidr_blocks = [] + prefix_list_ids = [] + protocol = "tcp" + security_groups = [] + self = false + to_port = local.moxy_port + } + ] +} + +resource "aws_instance" "moxy" { + ami = data.aws_ami.worker_ami.id + instance_type = var.instance_type + monitoring = false + key_name = aws_key_pair.provisioner_key.key_name + + connection { + type = "ssh" + user = "ec2-user" + host = self.public_ip + private_key = file("${var.aws_provisioner_key_name}") + } + + provisioner "file" { + source = var.moxy_bin_path + destination = local.bin_path + } + provisioner "remote-exec" { + inline = [ + "sudo cp ${local.bin_path} moxy", + "chmod +x moxy", + "./moxy" + ] + } + + vpc_security_group_ids = [aws_security_group.main.id] + + tags = var.tags +} + +resource "aws_key_pair" "provisioner_key" { + key_name = var.aws_provisioner_key_name + public_key = file("${var.aws_provisioner_key_name}.pub") +} diff --git a/testing/infra/terraform/modules/moxy/outputs.tf b/testing/infra/terraform/modules/moxy/outputs.tf new file mode 100644 index 00000000000..8f8465f9bc5 --- /dev/null +++ b/testing/infra/terraform/modules/moxy/outputs.tf @@ -0,0 +1,4 @@ +output "moxy_url" { + value = "${aws_instance.moxy.public_ip}:${local.moxy_port}" + description = "The Moxy Server URL" +} diff --git a/testing/infra/terraform/modules/moxy/provider.tf b/testing/infra/terraform/modules/moxy/provider.tf new file mode 100644 index 00000000000..3d860298151 --- /dev/null +++ b/testing/infra/terraform/modules/moxy/provider.tf @@ -0,0 +1,6 @@ +provider "aws" { + region = var.worker_region + default_tags { + tags = var.tags + } +} diff --git a/testing/infra/terraform/modules/moxy/variables.tf b/testing/infra/terraform/modules/moxy/variables.tf new file mode 100644 index 00000000000..2b20ba2fce9 --- /dev/null +++ b/testing/infra/terraform/modules/moxy/variables.tf @@ -0,0 +1,27 @@ +variable "worker_region" { + default = "us-west-2" + description = "Optional AWS region where the workers will be created. Defaults to us-west-2 (AWS)" + type = string +} + +variable "instance_type" { + type = string + description = "Moxy instance type" +} + +variable "moxy_bin_path" { + type = string + description = "Optionally use the apm-server binary from the specified path to the worker machine" +} + +variable "aws_provisioner_key_name" { + default = "" + description = "Optional ssh key name to create the aws key pair and remote provision the ec2 instance" + type = string +} + +variable "tags" { + type = map(string) + default = {} + description = "Optional set of tags to use for all resources" +} diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 251251181b0..134dbdd9944 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -71,8 +71,10 @@ locals { "RHEL-8" = "ec2-user" "RHEL-9" = "ec2-user" } + apm_port = "8200" conf_path = "/tmp/local-apm-config.yml" + bin_path = "/tmp/apm-server" } data "aws_ami" "os" { @@ -143,7 +145,7 @@ resource "aws_security_group" "main" { resource "aws_instance" "apm" { ami = data.aws_ami.os.id - instance_type = local.instance_types[var.aws_os] + instance_type = var.apm_instance_type == "" ? local.instance_types[var.aws_os] : var.apm_instance_type key_name = aws_key_pair.provisioner_key.key_name connection { @@ -153,6 +155,11 @@ resource "aws_instance" "apm" { private_key = file("${var.aws_provisioner_key_name}") } + provisioner "file" { + source = var.apm_server_bin_path + destination = local.bin_path + } + provisioner "file" { destination = local.conf_path content = templatefile(var.ea_managed ? "${path.module}/elastic-agent.yml.tftpl" : "${path.module}/apm-server.yml.tftpl", { @@ -172,15 +179,24 @@ resource "aws_instance" "apm" { "sudo cp ${local.conf_path} /etc/elastic-agent/elastic-agent.yml", "sudo systemctl start elastic-agent", "sleep 1", - ] : [ - local.instance_standalone_provision_cmd[var.aws_os], - "sudo cp ${local.conf_path} /etc/apm-server/apm-server.yml", - "sudo systemctl start apm-server", - "sleep 1", - ] + ] : ( + var.apm_server_bin_path == "" ? [ + local.instance_standalone_provision_cmd[var.aws_os], + "sudo cp ${local.conf_path} /etc/apm-server/apm-server.yml", + "sudo systemctl start apm-server", + "sleep 1", + ] : [ + "sudo cp ${local.bin_path} apm-server", + "chmod +x apm-server", + "sudo cp ${local.conf_path} apm-server.yml", + "./apm-server" + ] + ) } vpc_security_group_ids = [aws_security_group.main.id] + + tags = var.tags } resource "null_resource" "apm_server_log" { diff --git a/testing/infra/terraform/modules/standalone_apm_server/variables.tf b/testing/infra/terraform/modules/standalone_apm_server/variables.tf index d0e11890832..02bbf4930ef 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/variables.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/variables.tf @@ -4,6 +4,12 @@ variable "aws_os" { type = string } +variable "apm_instance_type" { + default = "" + type = string + description = "Optional apm server instance type" +} + variable "aws_provisioner_key_name" { default = "" description = "Optional ssh key name to create the aws key pair and remote provision the ec2 instance" @@ -51,32 +57,14 @@ variable "ea_managed" { type = bool } +variable "apm_server_bin_path" { + default = "" + type = string + description = "Optionally use the apm-server binary from the specified path to the worker machine" +} + variable "tags" { type = map(string) default = {} description = "Optional set of tags to use for all deployments" } - -# CI variables -variable "BRANCH" { - description = "Branch name or pull request for tagging purposes" - default = "unknown" -} - -variable "BUILD_ID" { - description = "Build ID in the CI for tagging purposes" - default = "unknown" -} - -variable "CREATED_DATE" { - description = "Creation date in epoch time for tagging purposes" - default = "unknown" -} - -variable "ENVIRONMENT" { - default = "unknown" -} - -variable "REPO" { - default = "unknown" -} From 9f2e1defa2a011d60046d0d00f7768d9dfc3456b Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 16:37:18 -0700 Subject: [PATCH 11/24] add benchmarks-standalone pipeline workflow --- .github/workflows/benchmarks-standalone.yml | 182 ++++++++++++++++++++ .github/workflows/benchmarks.yml | 43 +---- testing/benchmark-standalone/Makefile | 14 +- 3 files changed, 191 insertions(+), 48 deletions(-) create mode 100644 .github/workflows/benchmarks-standalone.yml diff --git a/.github/workflows/benchmarks-standalone.yml b/.github/workflows/benchmarks-standalone.yml new file mode 100644 index 00000000000..e521c34131b --- /dev/null +++ b/.github/workflows/benchmarks-standalone.yml @@ -0,0 +1,182 @@ +name: benchmarks + +on: + workflow_dispatch: + inputs: + benchmarkAgents: + description: 'Set the number of agents to send data to the APM Server' + required: false + type: string + benchmarkRun: + description: 'Set the expression that matches the benchmark scenarios to run' + required: false + type: string + +env: + PNG_REPORT_FILE: out.png + BENCHMARK_CPU_OUT: default.pgo + BENCHMARK_RESULT: benchmark-result.txt + WORKING_DIRECTORY: testing/benchmark-standalone + +permissions: + contents: read + +jobs: + benchmarks: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ${{ env.WORKING_DIRECTORY }} + permissions: + contents: write + id-token: write + pull-requests: write + env: + SSH_KEY: ./id_rsa_terraform + TF_VAR_private_key: ./id_rsa_terraform + TF_VAR_public_key: ./id_rsa_terraform.pub + TF_VAR_BUILD_ID: ${{ github.run_id }} + TF_VAR_ENVIRONMENT: ci + TF_VAR_REPO: ${{ github.repository }} + GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} + GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} + GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} + GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} + # temporarily override to get faster feedback + BENCHMARK_WARMUP_TIME: 1m + BENCHMARK_COUNT: 2 + BENCHMARK_TIME: 1m + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 + + - name: Set up env + run: | + SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} + CREATED_AT=$(date +%s) + echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" + echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" + echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + + if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then + echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" + fi + if [ ! -z "${{ inputs.benchmarkRun }}" ]; then + echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" + fi + + - uses: elastic/oblt-actions/google/auth@v1 + + - uses: elastic/oblt-actions/aws/auth@v1 + with: + role-duration-seconds: 18000 # 5 hours + + - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 + with: + export_to_environment: true + secrets: |- + EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: 1.3.7 + terraform_wrapper: false + + - name: Build APM Server + run: cd ../.. && make build/apm-server-linux-amd64 + + - name: Build moxy + run: make moxy + + - name: Build apmbench + run: make apmbench + + - name: Spin up benchmark environment + id: deploy + run: make init apply + + - name: Run benchmarks autotuned + if: ${{ inputs.benchmarkAgents == '' }} + run: make run-benchmark-autotuned + + - name: Run benchmarks self tuned + if: ${{ inputs.benchmarkAgents != '' }} + run: make run-benchmark + + # Results are only indexed and uploaded if the run happens on the main branch. + + - name: Index benchmarks result + run: make index-benchmark-results + + - name: Download PNG + run: >- + ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh + ${{ secrets.KIBANA_BENCH_ENDPOINT }} + ${{ secrets.KIBANA_BENCH_USERNAME }} + ${{ secrets.KIBANA_BENCH_PASSWORD }} + $PNG_REPORT_FILE + + - name: Upload PNG + uses: actions/upload-artifact@v4 + with: + name: kibana-png-report + path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} + if-no-files-found: error + + - name: Upload PNG to AWS S3 + id: s3-upload-png + env: + AWS_DEFAULT_REGION: us-east-1 + run: | + DEST_NAME="github-run-id-${{ github.run_id }}.png" + aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} + echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" + + - name: Upload benchmark result + uses: actions/upload-artifact@v4 + with: + name: benchmark-result + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} + if-no-files-found: error + + # The next section injects CPU profile collected by apmbench into the build. + # By copying the profile, uploading it to the artifacts and pushing it + # via a PR to update default.pgo. + + - name: Copy CPU profile + run: make cp-cpuprof + + - name: Upload CPU profile + uses: actions/upload-artifact@v4 + with: + name: cpu-profile + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + if-no-files-found: error + + - name: Open PGO PR + run: | + cd "${{ github.workspace }}" + mv "$PROFILE_PATH" x-pack/apm-server/default.pgo + git config user.email "apm@elastic.co" + git config user.name "APM Server" + git fetch origin main + git checkout main + BRANCH="update-pgo-$(date +%s)" + git checkout -b "$BRANCH" + git add x-pack/apm-server/default.pgo + git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." + git push -u origin "$BRANCH" + gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server + env: + PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} + + - name: Tear down benchmark environment + if: always() + run: make destroy \ No newline at end of file diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index c1f15a580e8..50387e94e7a 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -25,7 +25,6 @@ on: env: PNG_REPORT_FILE: out.png - BENCHMARK_CPU_OUT: default.pgo BENCHMARK_RESULT: benchmark-result.txt WORKING_DIRECTORY: testing/benchmark @@ -39,9 +38,8 @@ jobs: run: working-directory: ${{ env.WORKING_DIRECTORY }} permissions: - contents: write + contents: read id-token: write - pull-requests: write env: SSH_KEY: ./id_rsa_terraform TF_VAR_private_key: ./id_rsa_terraform @@ -54,10 +52,6 @@ jobs: GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} - # temporarily override to get faster feedback - BENCHMARK_WARMUP_TIME: 1m - BENCHMARK_COUNT: 2 - BENCHMARK_TIME: 1m steps: - uses: actions/checkout@v4 @@ -130,8 +124,6 @@ jobs: if: ${{ inputs.benchmarkAgents != '' }} run: make run-benchmark - # Results are only indexed and uploaded if the run happens on the main branch. - - name: Index benchmarks result if: github.ref == 'refs/heads/main' run: make index-benchmark-results @@ -171,39 +163,6 @@ jobs: path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} if-no-files-found: error - # The next section injects CPU profile collected by apmbench into the build. - # By copying the profile, uploading it to the artifacts and pushing it - # via a PR to update default.pgo. - - - name: Copy CPU profile - run: make cp-cpuprof - - - name: Upload CPU profile - uses: actions/upload-artifact@v4 - with: - name: cpu-profile - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - if-no-files-found: error - - - name: Open PGO PR - run: | - cd "${{ github.workspace }}" - mv "$PROFILE_PATH" x-pack/apm-server/default.pgo - git config user.email "apm@elastic.co" - git config user.name "APM Server" - git fetch origin main - git checkout main - BRANCH="update-pgo-$(date +%s)" - git checkout -b "$BRANCH" - git add x-pack/apm-server/default.pgo - git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." - git push -u origin "$BRANCH" - gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server - env: - PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} - - name: Tear down benchmark environment if: always() run: make destroy diff --git a/testing/benchmark-standalone/Makefile b/testing/benchmark-standalone/Makefile index 932a6773af4..adebfd32590 100644 --- a/testing/benchmark-standalone/Makefile +++ b/testing/benchmark-standalone/Makefile @@ -2,9 +2,9 @@ APMBENCH_PATH ?= ../../systemtest/cmd/apmbench APMBENCH_GOOS ?= linux APMBENCH_GOARCH ?= amd64 -APM_SERVER_PATH ?= ../../systemtest/cmd/apmbench -APM_SERVER_GOOS ?= linux -APM_SERVER_GOARCH ?= amd64 +MOXY_PATH ?= ../../systemtest/cmd/moxy +MOXY_GOOS ?= linux +MOXY_GOARCH ?= amd64 BENCHMARK_WARMUP_TIME ?= 5m BENCHMARK_AGENTS ?= 64 @@ -61,14 +61,16 @@ init-aws-profile: ~/.aws/credentials @mkdir -p ~/.aws @touch $@ -terraform.tfvars: - @sed "s/USER/$(USER)/" $(TFVARS_SOURCE) > terraform.tfvars - .PHONY: apmbench apmbench: @echo "-> Building apmbench..." @cd $(APMBENCH_PATH) && CGO_ENABLED=0 GOOS=$(APMBENCH_GOOS) GOARCH=$(APMBENCH_GOARCH) go build . +.PHONY: moxy +moxy: + @echo "-> Building moxy..." + @cd $(MOXY_PATH) && CGO_ENABLED=0 GOOS=$(MOXY_GOOS) GOARCH=$(MOXY_GOARCH) go build . + .PHONY: init init: @terraform init From 20f9456a72aafdf53a7a3c3decbdc255dedec6f6 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 16:41:55 -0700 Subject: [PATCH 12/24] auto trigger standalone workflow from the branch --- .github/workflows/benchmarks-elastic.yml | 235 ++++++++++++++++++++ .github/workflows/benchmarks-standalone.yml | 182 --------------- .github/workflows/benchmarks.yml | 159 +++++-------- 3 files changed, 293 insertions(+), 283 deletions(-) create mode 100644 .github/workflows/benchmarks-elastic.yml delete mode 100644 .github/workflows/benchmarks-standalone.yml diff --git a/.github/workflows/benchmarks-elastic.yml b/.github/workflows/benchmarks-elastic.yml new file mode 100644 index 00000000000..50387e94e7a --- /dev/null +++ b/.github/workflows/benchmarks-elastic.yml @@ -0,0 +1,235 @@ +name: benchmarks + +on: + workflow_dispatch: + inputs: + profile: + description: 'The system profile used to run the benchmarks' + required: false + type: string + runOnStable: + description: 'Run the benchmarks on the latest stable version' + required: false + type: boolean + default: false + benchmarkAgents: + description: 'Set the number of agents to send data to the APM Server' + required: false + type: string + benchmarkRun: + description: 'Set the expression that matches the benchmark scenarios to run' + required: false + type: string + schedule: + - cron: '0 17 * * *' + +env: + PNG_REPORT_FILE: out.png + BENCHMARK_RESULT: benchmark-result.txt + WORKING_DIRECTORY: testing/benchmark + +permissions: + contents: read + +jobs: + benchmarks: + runs-on: ubuntu-latest + defaults: + run: + working-directory: ${{ env.WORKING_DIRECTORY }} + permissions: + contents: read + id-token: write + env: + SSH_KEY: ./id_rsa_terraform + TF_VAR_private_key: ./id_rsa_terraform + TF_VAR_public_key: ./id_rsa_terraform.pub + TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile + TF_VAR_BUILD_ID: ${{ github.run_id }} + TF_VAR_ENVIRONMENT: ci + TF_VAR_REPO: ${{ github.repository }} + GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} + GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} + GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} + GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: 'go.mod' + + - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 + + - name: Set up env + run: | + SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} + CREATED_AT=$(date +%s) + echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" + echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" + echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + + if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then + echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" + fi + if [ ! -z "${{ inputs.benchmarkRun }}" ]; then + echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" + fi + + - name: Log in to the Elastic Container registry + uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} + username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} + password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} + + + - uses: elastic/oblt-actions/google/auth@v1 + + - uses: elastic/oblt-actions/aws/auth@v1 + with: + role-duration-seconds: 18000 # 5 hours + + - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 + with: + export_to_environment: true + secrets: |- + EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key + + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: 1.3.7 + terraform_wrapper: false + + - name: Build apmbench + run: make apmbench $SSH_KEY terraform.tfvars + + - name: Override docker committed version + if: ${{ ! inputs.runOnStable }} + run: make docker-override-committed-version + + - name: Spin up benchmark environment + id: deploy + run: | + make init apply + admin_console_url=$(terraform output -raw admin_console_url) + echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" + echo "-> infra setup done" + + - name: Run benchmarks autotuned + if: ${{ inputs.benchmarkAgents == '' }} + run: make run-benchmark-autotuned + + - name: Run benchmarks self tuned + if: ${{ inputs.benchmarkAgents != '' }} + run: make run-benchmark + + - name: Index benchmarks result + if: github.ref == 'refs/heads/main' + run: make index-benchmark-results + + - name: Download PNG + if: github.ref == 'refs/heads/main' + run: >- + ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh + ${{ secrets.KIBANA_BENCH_ENDPOINT }} + ${{ secrets.KIBANA_BENCH_USERNAME }} + ${{ secrets.KIBANA_BENCH_PASSWORD }} + $PNG_REPORT_FILE + + - name: Upload PNG + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: kibana-png-report + path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} + if-no-files-found: error + + - name: Upload PNG to AWS S3 + if: github.ref == 'refs/heads/main' + id: s3-upload-png + env: + AWS_DEFAULT_REGION: us-east-1 + run: | + DEST_NAME="github-run-id-${{ github.run_id }}.png" + aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} + echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" + + - name: Upload benchmark result + if: github.ref == 'refs/heads/main' + uses: actions/upload-artifact@v4 + with: + name: benchmark-result + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} + if-no-files-found: error + + - name: Tear down benchmark environment + if: always() + run: make destroy + + # Notify failure to Slack only on schedule (nightly run) + - if: failure() && github.event_name == 'schedule' + uses: elastic/oblt-actions/slack/notify-result@v1 + with: + bot-token: ${{ secrets.SLACK_BOT_TOKEN }} + channel-id: "#apm-server" + message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! + + # Notify result to Slack only on schedule (nightly run) + - if: github.event_name == 'schedule' + uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + with: + channel-id: "#apm-server" + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" + }, + "accessory": { + "type": "button", + "style": "primary", + "text": { + "type": "plain_text", + "text": "Workflow Run #${{ github.run_id }}", + "emoji": true + }, + "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", + "action_id": "workflow-run-button" + } + }, + { + "type": "image", + "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", + "alt_text": "kibana-png-report" + }, + { + "type": "actions", + "elements": [ + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Benchmarks dashboard" + }, + "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", + "action_id": "kibana-dashboard-button" + }, + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Elastic Cloud deployment" + }, + "url": "${{ steps.deploy.outputs.admin_console_url }}", + "action_id": "admin-console-button" + } + ] + } + ] + } diff --git a/.github/workflows/benchmarks-standalone.yml b/.github/workflows/benchmarks-standalone.yml deleted file mode 100644 index e521c34131b..00000000000 --- a/.github/workflows/benchmarks-standalone.yml +++ /dev/null @@ -1,182 +0,0 @@ -name: benchmarks - -on: - workflow_dispatch: - inputs: - benchmarkAgents: - description: 'Set the number of agents to send data to the APM Server' - required: false - type: string - benchmarkRun: - description: 'Set the expression that matches the benchmark scenarios to run' - required: false - type: string - -env: - PNG_REPORT_FILE: out.png - BENCHMARK_CPU_OUT: default.pgo - BENCHMARK_RESULT: benchmark-result.txt - WORKING_DIRECTORY: testing/benchmark-standalone - -permissions: - contents: read - -jobs: - benchmarks: - runs-on: ubuntu-latest - defaults: - run: - working-directory: ${{ env.WORKING_DIRECTORY }} - permissions: - contents: write - id-token: write - pull-requests: write - env: - SSH_KEY: ./id_rsa_terraform - TF_VAR_private_key: ./id_rsa_terraform - TF_VAR_public_key: ./id_rsa_terraform.pub - TF_VAR_BUILD_ID: ${{ github.run_id }} - TF_VAR_ENVIRONMENT: ci - TF_VAR_REPO: ${{ github.repository }} - GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} - GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} - GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} - GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} - # temporarily override to get faster feedback - BENCHMARK_WARMUP_TIME: 1m - BENCHMARK_COUNT: 2 - BENCHMARK_TIME: 1m - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-go@v5 - with: - go-version-file: 'go.mod' - - - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 - - - name: Set up env - run: | - SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} - CREATED_AT=$(date +%s) - echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" - echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" - - if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then - echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" - fi - if [ ! -z "${{ inputs.benchmarkRun }}" ]; then - echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" - fi - - - uses: elastic/oblt-actions/google/auth@v1 - - - uses: elastic/oblt-actions/aws/auth@v1 - with: - role-duration-seconds: 18000 # 5 hours - - - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 - with: - export_to_environment: true - secrets: |- - EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key - - - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: 1.3.7 - terraform_wrapper: false - - - name: Build APM Server - run: cd ../.. && make build/apm-server-linux-amd64 - - - name: Build moxy - run: make moxy - - - name: Build apmbench - run: make apmbench - - - name: Spin up benchmark environment - id: deploy - run: make init apply - - - name: Run benchmarks autotuned - if: ${{ inputs.benchmarkAgents == '' }} - run: make run-benchmark-autotuned - - - name: Run benchmarks self tuned - if: ${{ inputs.benchmarkAgents != '' }} - run: make run-benchmark - - # Results are only indexed and uploaded if the run happens on the main branch. - - - name: Index benchmarks result - run: make index-benchmark-results - - - name: Download PNG - run: >- - ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh - ${{ secrets.KIBANA_BENCH_ENDPOINT }} - ${{ secrets.KIBANA_BENCH_USERNAME }} - ${{ secrets.KIBANA_BENCH_PASSWORD }} - $PNG_REPORT_FILE - - - name: Upload PNG - uses: actions/upload-artifact@v4 - with: - name: kibana-png-report - path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} - if-no-files-found: error - - - name: Upload PNG to AWS S3 - id: s3-upload-png - env: - AWS_DEFAULT_REGION: us-east-1 - run: | - DEST_NAME="github-run-id-${{ github.run_id }}.png" - aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} - echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - - - name: Upload benchmark result - uses: actions/upload-artifact@v4 - with: - name: benchmark-result - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} - if-no-files-found: error - - # The next section injects CPU profile collected by apmbench into the build. - # By copying the profile, uploading it to the artifacts and pushing it - # via a PR to update default.pgo. - - - name: Copy CPU profile - run: make cp-cpuprof - - - name: Upload CPU profile - uses: actions/upload-artifact@v4 - with: - name: cpu-profile - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - if-no-files-found: error - - - name: Open PGO PR - run: | - cd "${{ github.workspace }}" - mv "$PROFILE_PATH" x-pack/apm-server/default.pgo - git config user.email "apm@elastic.co" - git config user.name "APM Server" - git fetch origin main - git checkout main - BRANCH="update-pgo-$(date +%s)" - git checkout -b "$BRANCH" - git add x-pack/apm-server/default.pgo - git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." - git push -u origin "$BRANCH" - gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server - env: - PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} - - - name: Tear down benchmark environment - if: always() - run: make destroy \ No newline at end of file diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 50387e94e7a..e9e923fe68e 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -1,17 +1,11 @@ name: benchmarks on: + push: + branches: + - inject-build-pgo-profile workflow_dispatch: inputs: - profile: - description: 'The system profile used to run the benchmarks' - required: false - type: string - runOnStable: - description: 'Run the benchmarks on the latest stable version' - required: false - type: boolean - default: false benchmarkAgents: description: 'Set the number of agents to send data to the APM Server' required: false @@ -20,13 +14,12 @@ on: description: 'Set the expression that matches the benchmark scenarios to run' required: false type: string - schedule: - - cron: '0 17 * * *' env: PNG_REPORT_FILE: out.png + BENCHMARK_CPU_OUT: default.pgo BENCHMARK_RESULT: benchmark-result.txt - WORKING_DIRECTORY: testing/benchmark + WORKING_DIRECTORY: testing/benchmark-standalone permissions: contents: read @@ -38,13 +31,13 @@ jobs: run: working-directory: ${{ env.WORKING_DIRECTORY }} permissions: - contents: read + contents: write id-token: write + pull-requests: write env: - SSH_KEY: ./id_rsa_terraform - TF_VAR_private_key: ./id_rsa_terraform - TF_VAR_public_key: ./id_rsa_terraform.pub - TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile + SSH_KEY: ../benchmark/id_rsa_terraform + TF_VAR_private_key: ../benchmark/id_rsa_terraform + TF_VAR_public_key: ../benchmark/id_rsa_terraform.pub TF_VAR_BUILD_ID: ${{ github.run_id }} TF_VAR_ENVIRONMENT: ci TF_VAR_REPO: ${{ github.repository }} @@ -52,6 +45,10 @@ jobs: GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} + # temporarily override to get faster feedback + BENCHMARK_WARMUP_TIME: 1m + BENCHMARK_COUNT: 2 + BENCHMARK_TIME: 1m steps: - uses: actions/checkout@v4 @@ -67,7 +64,7 @@ jobs: CREATED_AT=$(date +%s) echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + echo "TF_VAR_user_name=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" @@ -75,7 +72,7 @@ jobs: if [ ! -z "${{ inputs.benchmarkRun }}" ]; then echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" fi - + - name: Log in to the Elastic Container registry uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: @@ -83,7 +80,6 @@ jobs: username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} - - uses: elastic/oblt-actions/google/auth@v1 - uses: elastic/oblt-actions/aws/auth@v1 @@ -101,20 +97,18 @@ jobs: terraform_version: 1.3.7 terraform_wrapper: false - - name: Build apmbench - run: make apmbench $SSH_KEY terraform.tfvars + - name: Build APM Server + run: cd ../.. && make build/apm-server-linux-arm64 + + - name: Build moxy + run: make moxy - - name: Override docker committed version - if: ${{ ! inputs.runOnStable }} - run: make docker-override-committed-version + - name: Build apmbench + run: make apmbench - name: Spin up benchmark environment id: deploy - run: | - make init apply - admin_console_url=$(terraform output -raw admin_console_url) - echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" - echo "-> infra setup done" + run: make init apply - name: Run benchmarks autotuned if: ${{ inputs.benchmarkAgents == '' }} @@ -124,12 +118,12 @@ jobs: if: ${{ inputs.benchmarkAgents != '' }} run: make run-benchmark + # Results are only indexed and uploaded if the run happens on the main branch. + - name: Index benchmarks result - if: github.ref == 'refs/heads/main' run: make index-benchmark-results - name: Download PNG - if: github.ref == 'refs/heads/main' run: >- ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh ${{ secrets.KIBANA_BENCH_ENDPOINT }} @@ -138,7 +132,6 @@ jobs: $PNG_REPORT_FILE - name: Upload PNG - if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: kibana-png-report @@ -146,7 +139,6 @@ jobs: if-no-files-found: error - name: Upload PNG to AWS S3 - if: github.ref == 'refs/heads/main' id: s3-upload-png env: AWS_DEFAULT_REGION: us-east-1 @@ -156,80 +148,45 @@ jobs: echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - name: Upload benchmark result - if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: benchmark-result path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} if-no-files-found: error - - name: Tear down benchmark environment - if: always() - run: make destroy + # The next section injects CPU profile collected by apmbench into the build. + # By copying the profile, uploading it to the artifacts and pushing it + # via a PR to update default.pgo. - # Notify failure to Slack only on schedule (nightly run) - - if: failure() && github.event_name == 'schedule' - uses: elastic/oblt-actions/slack/notify-result@v1 + - name: Copy CPU profile + run: make cp-cpuprof + + - name: Upload CPU profile + uses: actions/upload-artifact@v4 with: - bot-token: ${{ secrets.SLACK_BOT_TOKEN }} - channel-id: "#apm-server" - message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! + name: cpu-profile + path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + if-no-files-found: error - # Notify result to Slack only on schedule (nightly run) - - if: github.event_name == 'schedule' - uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0 + - name: Open PGO PR + run: | + cd "${{ github.workspace }}" + mv "$PROFILE_PATH" x-pack/apm-server/default.pgo + git config user.email "apm@elastic.co" + git config user.name "APM Server" + git fetch origin main + git checkout main + BRANCH="update-pgo-$(date +%s)" + git checkout -b "$BRANCH" + git add x-pack/apm-server/default.pgo + git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW." + git push -u origin "$BRANCH" + gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - with: - channel-id: "#apm-server" - payload: | - { - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" - }, - "accessory": { - "type": "button", - "style": "primary", - "text": { - "type": "plain_text", - "text": "Workflow Run #${{ github.run_id }}", - "emoji": true - }, - "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", - "action_id": "workflow-run-button" - } - }, - { - "type": "image", - "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", - "alt_text": "kibana-png-report" - }, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Benchmarks dashboard" - }, - "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", - "action_id": "kibana-dashboard-button" - }, - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Elastic Cloud deployment" - }, - "url": "${{ steps.deploy.outputs.admin_console_url }}", - "action_id": "admin-console-button" - } - ] - } - ] - } + PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }} + + - name: Tear down benchmark environment + if: always() + run: make destroy \ No newline at end of file From 85c0dfbd05218c1df1dc0739bbd93562a258c4b1 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 17:42:07 -0700 Subject: [PATCH 13/24] merge benchmarks and benchmarks-standalone into one workflow --- .github/workflows/benchmarks-elastic.yml | 235 ------------------ .github/workflows/benchmarks.yml | 133 ++++++++-- testing/benchmark-standalone/.gitignore | 3 - testing/benchmark-standalone/Makefile | 125 ---------- testing/benchmark-standalone/main.tf | 89 ------- testing/benchmark-standalone/outputs.tf | 15 -- testing/benchmark-standalone/variables.tf | 86 ------- testing/benchmark/Makefile | 9 + testing/benchmark/main.tf | 36 ++- testing/benchmark/variables.tf | 18 ++ .../infra/terraform/modules/moxy/provider.tf | 6 - .../infra/terraform/modules/moxy/variables.tf | 6 - .../modules/standalone_apm_server/provider.tf | 6 - .../standalone_apm_server/variables.tf | 6 - 14 files changed, 177 insertions(+), 596 deletions(-) delete mode 100644 .github/workflows/benchmarks-elastic.yml delete mode 100644 testing/benchmark-standalone/.gitignore delete mode 100644 testing/benchmark-standalone/Makefile delete mode 100644 testing/benchmark-standalone/main.tf delete mode 100644 testing/benchmark-standalone/outputs.tf delete mode 100644 testing/benchmark-standalone/variables.tf delete mode 100644 testing/infra/terraform/modules/moxy/provider.tf delete mode 100644 testing/infra/terraform/modules/standalone_apm_server/provider.tf diff --git a/.github/workflows/benchmarks-elastic.yml b/.github/workflows/benchmarks-elastic.yml deleted file mode 100644 index 50387e94e7a..00000000000 --- a/.github/workflows/benchmarks-elastic.yml +++ /dev/null @@ -1,235 +0,0 @@ -name: benchmarks - -on: - workflow_dispatch: - inputs: - profile: - description: 'The system profile used to run the benchmarks' - required: false - type: string - runOnStable: - description: 'Run the benchmarks on the latest stable version' - required: false - type: boolean - default: false - benchmarkAgents: - description: 'Set the number of agents to send data to the APM Server' - required: false - type: string - benchmarkRun: - description: 'Set the expression that matches the benchmark scenarios to run' - required: false - type: string - schedule: - - cron: '0 17 * * *' - -env: - PNG_REPORT_FILE: out.png - BENCHMARK_RESULT: benchmark-result.txt - WORKING_DIRECTORY: testing/benchmark - -permissions: - contents: read - -jobs: - benchmarks: - runs-on: ubuntu-latest - defaults: - run: - working-directory: ${{ env.WORKING_DIRECTORY }} - permissions: - contents: read - id-token: write - env: - SSH_KEY: ./id_rsa_terraform - TF_VAR_private_key: ./id_rsa_terraform - TF_VAR_public_key: ./id_rsa_terraform.pub - TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile - TF_VAR_BUILD_ID: ${{ github.run_id }} - TF_VAR_ENVIRONMENT: ci - TF_VAR_REPO: ${{ github.repository }} - GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }} - GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }} - GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} - GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-go@v5 - with: - go-version-file: 'go.mod' - - - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302 - - - name: Set up env - run: | - SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }} - CREATED_AT=$(date +%s) - echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" - echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" - - if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then - echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" - fi - if [ ! -z "${{ inputs.benchmarkRun }}" ]; then - echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" - fi - - - name: Log in to the Elastic Container registry - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 - with: - registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }} - username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} - password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} - - - - uses: elastic/oblt-actions/google/auth@v1 - - - uses: elastic/oblt-actions/aws/auth@v1 - with: - role-duration-seconds: 18000 # 5 hours - - - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4 - with: - export_to_environment: true - secrets: |- - EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key - - - uses: hashicorp/setup-terraform@v3 - with: - terraform_version: 1.3.7 - terraform_wrapper: false - - - name: Build apmbench - run: make apmbench $SSH_KEY terraform.tfvars - - - name: Override docker committed version - if: ${{ ! inputs.runOnStable }} - run: make docker-override-committed-version - - - name: Spin up benchmark environment - id: deploy - run: | - make init apply - admin_console_url=$(terraform output -raw admin_console_url) - echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" - echo "-> infra setup done" - - - name: Run benchmarks autotuned - if: ${{ inputs.benchmarkAgents == '' }} - run: make run-benchmark-autotuned - - - name: Run benchmarks self tuned - if: ${{ inputs.benchmarkAgents != '' }} - run: make run-benchmark - - - name: Index benchmarks result - if: github.ref == 'refs/heads/main' - run: make index-benchmark-results - - - name: Download PNG - if: github.ref == 'refs/heads/main' - run: >- - ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh - ${{ secrets.KIBANA_BENCH_ENDPOINT }} - ${{ secrets.KIBANA_BENCH_USERNAME }} - ${{ secrets.KIBANA_BENCH_PASSWORD }} - $PNG_REPORT_FILE - - - name: Upload PNG - if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: kibana-png-report - path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }} - if-no-files-found: error - - - name: Upload PNG to AWS S3 - if: github.ref == 'refs/heads/main' - id: s3-upload-png - env: - AWS_DEFAULT_REGION: us-east-1 - run: | - DEST_NAME="github-run-id-${{ github.run_id }}.png" - aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME} - echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - - - name: Upload benchmark result - if: github.ref == 'refs/heads/main' - uses: actions/upload-artifact@v4 - with: - name: benchmark-result - path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }} - if-no-files-found: error - - - name: Tear down benchmark environment - if: always() - run: make destroy - - # Notify failure to Slack only on schedule (nightly run) - - if: failure() && github.event_name == 'schedule' - uses: elastic/oblt-actions/slack/notify-result@v1 - with: - bot-token: ${{ secrets.SLACK_BOT_TOKEN }} - channel-id: "#apm-server" - message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! - - # Notify result to Slack only on schedule (nightly run) - - if: github.event_name == 'schedule' - uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0 - env: - SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} - with: - channel-id: "#apm-server" - payload: | - { - "blocks": [ - { - "type": "section", - "text": { - "type": "mrkdwn", - "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" - }, - "accessory": { - "type": "button", - "style": "primary", - "text": { - "type": "plain_text", - "text": "Workflow Run #${{ github.run_id }}", - "emoji": true - }, - "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", - "action_id": "workflow-run-button" - } - }, - { - "type": "image", - "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", - "alt_text": "kibana-png-report" - }, - { - "type": "actions", - "elements": [ - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Benchmarks dashboard" - }, - "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", - "action_id": "kibana-dashboard-button" - }, - { - "type": "button", - "text": { - "type": "plain_text", - "text": "Elastic Cloud deployment" - }, - "url": "${{ steps.deploy.outputs.admin_console_url }}", - "action_id": "admin-console-button" - } - ] - } - ] - } diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index e9e923fe68e..16783c5f3b0 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -1,11 +1,22 @@ name: benchmarks on: - push: - branches: - - inject-build-pgo-profile workflow_dispatch: inputs: + runStandalone: + description: 'Run the benchmarks against standalone APM Server with Moxy' + required: false + type: boolean + default: false + profile: + description: 'The system profile used to run the benchmarks' + required: false + type: string + runOnStable: + description: 'Run the benchmarks on the latest stable version' + required: false + type: boolean + default: false benchmarkAgents: description: 'Set the number of agents to send data to the APM Server' required: false @@ -14,12 +25,14 @@ on: description: 'Set the expression that matches the benchmark scenarios to run' required: false type: string + schedule: + - cron: '0 17 * * *' env: PNG_REPORT_FILE: out.png BENCHMARK_CPU_OUT: default.pgo BENCHMARK_RESULT: benchmark-result.txt - WORKING_DIRECTORY: testing/benchmark-standalone + WORKING_DIRECTORY: testing/benchmark permissions: contents: read @@ -35,9 +48,11 @@ jobs: id-token: write pull-requests: write env: - SSH_KEY: ../benchmark/id_rsa_terraform - TF_VAR_private_key: ../benchmark/id_rsa_terraform - TF_VAR_public_key: ../benchmark/id_rsa_terraform.pub + SSH_KEY: ./id_rsa_terraform + TF_VAR_private_key: ./id_rsa_terraform + TF_VAR_public_key: ./id_rsa_terraform.pub + TF_VAR_run_standalone: ${{ inputs.runStandalone }} + TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile TF_VAR_BUILD_ID: ${{ github.run_id }} TF_VAR_ENVIRONMENT: ci TF_VAR_REPO: ${{ github.repository }} @@ -64,7 +79,7 @@ jobs: CREATED_AT=$(date +%s) echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV" echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV" - echo "TF_VAR_user_name=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" + echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV" if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV" @@ -72,7 +87,7 @@ jobs: if [ ! -z "${{ inputs.benchmarkRun }}" ]; then echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV" fi - + - name: Log in to the Elastic Container registry uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 with: @@ -80,6 +95,7 @@ jobs: username: ${{ secrets.ELASTIC_DOCKER_USERNAME }} password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }} + - uses: elastic/oblt-actions/google/auth@v1 - uses: elastic/oblt-actions/aws/auth@v1 @@ -97,18 +113,26 @@ jobs: terraform_version: 1.3.7 terraform_wrapper: false - - name: Build APM Server - run: cd ../.. && make build/apm-server-linux-arm64 + - name: Build apmbench + run: make apmbench $SSH_KEY terraform.tfvars - - name: Build moxy - run: make moxy + - name: Build APM Server and Moxy + if: ${{ inputs.runStandalone }} + run: | + make moxy + cd ../.. && make build/apm-server-linux-arm64 - - name: Build apmbench - run: make apmbench + - name: Override docker committed version + if: ${{ ! inputs.runOnStable && ! inputs.runStandalone}} + run: make docker-override-committed-version - name: Spin up benchmark environment id: deploy - run: make init apply + run: | + make init apply + admin_console_url=$(terraform output -raw admin_console_url) + echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT" + echo "-> infra setup done" - name: Run benchmarks autotuned if: ${{ inputs.benchmarkAgents == '' }} @@ -121,9 +145,11 @@ jobs: # Results are only indexed and uploaded if the run happens on the main branch. - name: Index benchmarks result + if: github.ref == 'refs/heads/main' run: make index-benchmark-results - name: Download PNG + if: github.ref == 'refs/heads/main' run: >- ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh ${{ secrets.KIBANA_BENCH_ENDPOINT }} @@ -132,6 +158,7 @@ jobs: $PNG_REPORT_FILE - name: Upload PNG + if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: kibana-png-report @@ -139,6 +166,7 @@ jobs: if-no-files-found: error - name: Upload PNG to AWS S3 + if: github.ref == 'refs/heads/main' id: s3-upload-png env: AWS_DEFAULT_REGION: us-east-1 @@ -148,6 +176,7 @@ jobs: echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - name: Upload benchmark result + if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: benchmark-result @@ -159,9 +188,11 @@ jobs: # via a PR to update default.pgo. - name: Copy CPU profile + if: ${{ inputs.runStandalone }} run: make cp-cpuprof - name: Upload CPU profile + if: ${{ inputs.runStandalone }} uses: actions/upload-artifact@v4 with: name: cpu-profile @@ -169,6 +200,7 @@ jobs: if-no-files-found: error - name: Open PGO PR + if: ${{ inputs.runStandalone }} run: | cd "${{ github.workspace }}" mv "$PROFILE_PATH" x-pack/apm-server/default.pgo @@ -189,4 +221,71 @@ jobs: - name: Tear down benchmark environment if: always() - run: make destroy \ No newline at end of file + run: make destroy + + # Notify failure to Slack only on schedule (nightly run) + - if: failure() && github.event_name == 'schedule' + uses: elastic/oblt-actions/slack/notify-result@v1 + with: + bot-token: ${{ secrets.SLACK_BOT_TOKEN }} + channel-id: "#apm-server" + message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this ! + + # Notify result to Slack only on schedule (nightly run) + - if: github.event_name == 'schedule' + uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + with: + channel-id: "#apm-server" + payload: | + { + "blocks": [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!" + }, + "accessory": { + "type": "button", + "style": "primary", + "text": { + "type": "plain_text", + "text": "Workflow Run #${{ github.run_id }}", + "emoji": true + }, + "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}", + "action_id": "workflow-run-button" + } + }, + { + "type": "image", + "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}", + "alt_text": "kibana-png-report" + }, + { + "type": "actions", + "elements": [ + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Benchmarks dashboard" + }, + "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}", + "action_id": "kibana-dashboard-button" + }, + { + "type": "button", + "text": { + "type": "plain_text", + "text": "Elastic Cloud deployment" + }, + "url": "${{ steps.deploy.outputs.admin_console_url }}", + "action_id": "admin-console-button" + } + ] + } + ] + } diff --git a/testing/benchmark-standalone/.gitignore b/testing/benchmark-standalone/.gitignore deleted file mode 100644 index 13bc2184027..00000000000 --- a/testing/benchmark-standalone/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -docker_image.auto.tfvars -.envrc -benchmark-result.txt \ No newline at end of file diff --git a/testing/benchmark-standalone/Makefile b/testing/benchmark-standalone/Makefile deleted file mode 100644 index adebfd32590..00000000000 --- a/testing/benchmark-standalone/Makefile +++ /dev/null @@ -1,125 +0,0 @@ -APMBENCH_PATH ?= ../../systemtest/cmd/apmbench -APMBENCH_GOOS ?= linux -APMBENCH_GOARCH ?= amd64 - -MOXY_PATH ?= ../../systemtest/cmd/moxy -MOXY_GOOS ?= linux -MOXY_GOARCH ?= amd64 - -BENCHMARK_WARMUP_TIME ?= 5m -BENCHMARK_AGENTS ?= 64 -BENCHMARK_COUNT ?= 6 -BENCHMARK_TIME ?= 2m -BENCHMARK_RUN ?= Benchmark -BENCHMARK_RESULT ?= benchmark-result.txt -BENCHMARK_DETAILED ?= true -BENCHMARK_EVENT_RATE ?= 0/s - -GOBENCH_INDEX ?= apmbench-standalone-v2 -GOBENCH_USERNAME ?= admin -GOBENCH_PASSWORD ?= changeme -GOBENCH_HOST ?= http://localhost:9200 -GOBENCH_DEFAULT_TAGS = apm_server_version=$(APM_SERVER_VERSION),apm_server_type=standalone - -SSH_USER ?= ec2-user -SSH_OPTS ?= -o LogLevel=ERROR -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=10 -SSH_KEY ?= ~/.ssh/id_rsa_terraform -WORKER_IP = $(shell terraform output -raw public_ip) - -SHELL = /bin/bash -.SHELLFLAGS = -o pipefail -c - -# This profile will also be used by the Terraform provider. - -# export AWS_PROFILE if CI is not defined -ifeq ($(CI),) - export AWS_PROFILE ?= default -endif - - -.default: all - -.PHONY: all -all: $(SSH_KEY) terraform.tfvars apmbench auth apply - -MAKEFILE_PATH:=$(abspath $(lastword ${MAKEFILE_LIST})) -MAKEFILE_DIR:=$(dir ${MAKEFILE_PATH}) -REPO_ROOT:=$(abspath ${MAKEFILE_DIR}/../../) - -include ${MAKEFILE_DIR}/../../go.mk - -.PHONY: auth -auth: init-aws-profile - @okta-awscli --profile $(AWS_PROFILE) - -.PHONY: init-aws-profile -init-aws-profile: ~/.aws/credentials - @grep $(AWS_PROFILE) ~/.aws/credentials > /dev/null || \ - echo "[$(AWS_PROFILE)]\naws_access_key_id = x\naws_secret_access_key = x" >> ~/.aws/credentials - -~/.aws/credentials: - @mkdir -p ~/.aws - @touch $@ - -.PHONY: apmbench -apmbench: - @echo "-> Building apmbench..." - @cd $(APMBENCH_PATH) && CGO_ENABLED=0 GOOS=$(APMBENCH_GOOS) GOARCH=$(APMBENCH_GOARCH) go build . - -.PHONY: moxy -moxy: - @echo "-> Building moxy..." - @cd $(MOXY_PATH) && CGO_ENABLED=0 GOOS=$(MOXY_GOOS) GOARCH=$(MOXY_GOARCH) go build . - -.PHONY: init -init: - @terraform init - -.PHONY: apply -apply: - @terraform apply -auto-approve - -.PHONY: destroy -destroy: - @terraform destroy -auto-approve - -cp-cpuprof: - @[ "${BENCHMARK_CPU_OUT}" ] && scp $(SSH_OPTS) -i $(SSH_KEY) "$(SSH_USER)@$(WORKER_IP):./$(BENCHMARK_CPU_OUT)" $(BENCHMARK_CPU_OUT) || echo "skipping cpu out copy" - -.PHONY: log-benckmark-profile -log-benckmark-profile: - @echo "Running benchmarks..." - @echo "Benchmark warmup time: $(BENCHMARK_WARMUP_TIME)" - @echo "Benchmark agents: $(BENCHMARK_AGENTS)" - @echo "Benchmark event rate: $(BENCHMARK_EVENT_RATE)" - @echo "Benchmark count: $(BENCHMARK_COUNT)" - @echo "Benchmark duration: $(BENCHMARK_TIME)" - @echo "Benchmark run expression : $(BENCHMARK_RUN)" - -.PHONY: run-benchmark -run-benchmark: log-benckmark-profile - @ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(WORKER_IP) ". .envrc && bin/apmbench -run='$(BENCHMARK_RUN)' \ - -benchtime=$(BENCHMARK_TIME) -count=$(BENCHMARK_COUNT) -warmup-time=$(BENCHMARK_WARMUP_TIME) \ - -agents=$(BENCHMARK_AGENTS) -detailed=$(BENCHMARK_DETAILED) -event-rate=$(BENCHMARK_EVENT_RATE) -cpuprofile=$(BENCHMARK_CPU_OUT)" 2>&1 | tee $(BENCHMARK_RESULT) - -.PHONY: run-benchmark-autotuned -run-benchmark-autotuned: - $(eval APM_SERVER_SIZE:=$(shell echo var.apm_server_size | terraform console | tr -d '"'| tr -d 'g')) - @ $(MAKE) run-benchmark BENCHMARK_AGENTS=$(shell echo $$(( $(BENCHMARK_AGENTS) * $(APM_SERVER_SIZE) )) ) - -.PHONY: index-benchmark-results -index-benchmark-results: _default-gobench-vars - @cat $(BENCHMARK_RESULT) | go run -modfile=$(GITROOT)/tools/go.mod github.com/elastic/gobench -es $(GOBENCH_HOST) -es-username $(GOBENCH_USERNAME) -es-password $(GOBENCH_PASSWORD) \ - -index $(GOBENCH_INDEX) -tag "$(GOBENCH_DEFAULT_TAGS),$(GOBENCH_TAGS)" - -.PHONY: _default-gobench-vars -_default-gobench-vars: - $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),apm_server_size=$(shell echo var.apm_server_size | terraform console | tr -d '"')) - $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),build_sha=$(shell curl -sL -H "Authorization: Bearer $(shell terraform output -raw apm_secret_token )" $(shell terraform output -raw apm_server_url ) | jq -r '.build_sha')) - -$(SSH_KEY): - @ssh-keygen -t rsa -b 4096 -C "$(USER)@elastic.co" -N "" -f $(SSH_KEY) - -.PHONY: ssh -ssh: - @ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(WORKER_IP) diff --git a/testing/benchmark-standalone/main.tf b/testing/benchmark-standalone/main.tf deleted file mode 100644 index 4f95ce372ab..00000000000 --- a/testing/benchmark-standalone/main.tf +++ /dev/null @@ -1,89 +0,0 @@ -terraform { - required_version = ">= 1.1.8, < 2.0.0" - required_providers { - aws = { - source = "hashicorp/aws" - version = "~>4.17" - } - time = { - source = "hashicorp/time" - version = ">=0.9.1" - } - } -} - -resource "time_static" "created_date" {} - -locals { - ci_tags = { - environment = var.ENVIRONMENT - repo = var.REPO - branch = var.BRANCH - build = var.BUILD_ID - created_date = coalesce(var.CREATED_DATE, time_static.created_date.unix) - } -} - -module "tags" { - source = "../infra/terraform/modules/tags" - # use the convention for team/shared owned resources if we are running in CI. - # assume this is an individually owned resource otherwise. - project = startswith(var.user_name, "benchci") ? "benchmarks" : var.user_name -} - -provider "aws" { - region = var.worker_region -} - -locals { - name_prefix = "${coalesce(var.user_name, "unknown-user")}-bench" -} - -module "benchmark_worker" { - source = "../infra/terraform/modules/benchmark_executor" - region = var.worker_region - - user_name = var.user_name - - apm_server_url = module.standalone_apm_server.apm_server_url - apm_secret_token = module.standalone_apm_server.apm_secret_token - - apmbench_bin_path = var.apmbench_bin_path - instance_type = var.worker_instance_type - - public_key = var.public_key - private_key = var.private_key - - tags = merge(local.ci_tags, module.tags.tags) -} - -module "moxy" { - source = "../infra/terraform/modules/moxy" - worker_region = var.worker_region - - moxy_bin_path = var.moxy_bin_path - instance_type = var.moxy_instance_type - - aws_provisioner_key_name = var.private_key - - tags = merge(local.ci_tags, module.tags.tags) -} - - -module "standalone_apm_server" { - source = "../infra/terraform/modules/standalone_apm_server" - worker_region = var.worker_region - aws_os = "amzn2-ami-kernel-5.10" - - ea_managed = false - apm_server_bin_path = var.apm_server_bin_path - apm_instance_type = var.apm_instance_type - - aws_provisioner_key_name = var.private_key - - elasticsearch_url = module.moxy.moxy_url - elasticsearch_username = "" - elasticsearch_password = "" - - tags = merge(local.ci_tags, module.tags.tags) -} diff --git a/testing/benchmark-standalone/outputs.tf b/testing/benchmark-standalone/outputs.tf deleted file mode 100644 index d8459ce0108..00000000000 --- a/testing/benchmark-standalone/outputs.tf +++ /dev/null @@ -1,15 +0,0 @@ -output "public_ip" { - value = module.benchmark_worker.public_ip - description = "The worker public IP" -} - -output "apm_secret_token" { - value = module.standalone_apm_server.apm_secret_token - description = "The APM Server secret token" - sensitive = true -} - -output "apm_server_url" { - value = module.standalone_apm_server.apm_server_url - description = "The APM Server URL" -} diff --git a/testing/benchmark-standalone/variables.tf b/testing/benchmark-standalone/variables.tf deleted file mode 100644 index ae88ceb3cb0..00000000000 --- a/testing/benchmark-standalone/variables.tf +++ /dev/null @@ -1,86 +0,0 @@ -## General configuration - -variable "user_name" { - description = "Required username to use for prefixes" - type = string -} - -## Deployment configuration - -variable "apm_instance_type" { - default = "c6i.large" - type = string - description = "Optional apm server instance type" -} - -variable "apm_server_bin_path" { - default = "../../build/apm-server-linux-amd64" - type = string - description = "Optional path to the apm-server binary" -} - -variable "moxy_instance_type" { - default = "c6i.large" - type = string - description = "Optional moxy instance type" -} - -variable "moxy_bin_path" { - default = "../../systemtest/cmd/moxy" - type = string - description = "Optional path to the moxy binary" -} - -## Worker configuraiton - -variable "worker_region" { - default = "us-west-2" - description = "Optional ESS region where the deployment will be created. Defaults to us-west-2 (AWS)" - type = string -} - -variable "apmbench_bin_path" { - default = "../../systemtest/cmd/apmbench" - type = string - description = "Optional path to the apmbench binary" -} - -variable "worker_instance_type" { - default = "c6i.large" - type = string - description = "Optional instance type to use for the worker VM" -} - -variable "private_key" { - default = "~/.ssh/id_rsa_terraform" - type = string -} - -variable "public_key" { - default = "~/.ssh/id_rsa_terraform.pub" - type = string -} - -# CI variables -variable "BRANCH" { - description = "Branch name or pull request for tagging purposes" - default = "unknown-branch" -} - -variable "BUILD_ID" { - description = "Build ID in the CI for tagging purposes" - default = "unknown-build" -} - -variable "CREATED_DATE" { - description = "Creation date in epoch time for tagging purposes" - default = "" -} - -variable "ENVIRONMENT" { - default = "unknown-environment" -} - -variable "REPO" { - default = "unknown-repo-name" -} diff --git a/testing/benchmark/Makefile b/testing/benchmark/Makefile index cf894325b58..b300ff0de0e 100644 --- a/testing/benchmark/Makefile +++ b/testing/benchmark/Makefile @@ -2,6 +2,10 @@ APMBENCH_PATH ?= ../../systemtest/cmd/apmbench APMBENCH_GOOS ?= linux APMBENCH_GOARCH ?= amd64 +MOXY_PATH ?= ../../systemtest/cmd/moxy +MOXY_GOOS ?= linux +MOXY_GOARCH ?= amd64 + TFVARS_SOURCE ?= terraform.tfvars.example BENCHMARK_WARMUP_TIME ?= 5m @@ -67,6 +71,11 @@ apmbench: @echo "-> Building apmbench..." @cd $(APMBENCH_PATH) && CGO_ENABLED=0 GOOS=$(APMBENCH_GOOS) GOARCH=$(APMBENCH_GOARCH) go build . +.PHONY: moxy +moxy: + @echo "-> Building moxy..." + @cd $(MOXY_PATH) && CGO_ENABLED=0 GOOS=$(MOXY_GOOS) GOARCH=$(MOXY_GOARCH) go build . + .PHONY: init init: @terraform init diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index e699f56485e..724d933e93f 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -46,7 +46,8 @@ locals { } module "ec_deployment" { - source = "../infra/terraform/modules/ec_deployment" + for_each = var.run_standalone ? [] : toset(["this"]) + source = "../infra/terraform/modules/ec_deployment" region = var.ess_region stack_version = var.stack_version @@ -73,8 +74,8 @@ module "ec_deployment" { module "benchmark_worker" { source = "../infra/terraform/modules/benchmark_executor" - region = var.worker_region + region = var.worker_region user_name = var.user_name apm_server_url = module.ec_deployment.apm_url @@ -88,3 +89,34 @@ module "benchmark_worker" { tags = merge(local.ci_tags, module.tags.tags) } + +module "moxy" { + for_each = var.run_standalone ? toset(["this"]) : [] + source = "../infra/terraform/modules/moxy" + + moxy_bin_path = var.moxy_bin_path + instance_type = var.worker_instance_type + + aws_provisioner_key_name = var.private_key + + tags = merge(local.ci_tags, module.tags.tags) +} + + +module "standalone_apm_server" { + for_each = var.run_standalone ? toset(["this"]) : [] + source = "../infra/terraform/modules/standalone_apm_server" + + aws_os = "amzn2-ami-kernel-5.10" + ea_managed = false + apm_server_bin_path = var.apm_server_bin_path + apm_instance_type = var.worker_instance_type + + aws_provisioner_key_name = var.private_key + + elasticsearch_url = module.moxy.moxy_url + elasticsearch_username = "" + elasticsearch_password = "" + + tags = merge(local.ci_tags, module.tags.tags) +} diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index deb2d05d6b0..411941ac88a 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -5,6 +5,12 @@ variable "user_name" { type = string } +variable "run_standalone" { + default = false + description = "If set run benchmarks against standalone APM Server conneted to moxy" + type = bool +} + ## Deployment configuration variable "ess_region" { @@ -86,6 +92,18 @@ variable "drop_pipeline" { type = bool } +variable "apm_server_bin_path" { + default = "../../build/apm-server-linux-amd64" + type = string + description = "Optional path to the apm-server binary" +} + +variable "moxy_bin_path" { + default = "../../systemtest/cmd/moxy" + type = string + description = "Optional path to the moxy binary" +} + ## Worker configuraiton variable "worker_region" { diff --git a/testing/infra/terraform/modules/moxy/provider.tf b/testing/infra/terraform/modules/moxy/provider.tf deleted file mode 100644 index 3d860298151..00000000000 --- a/testing/infra/terraform/modules/moxy/provider.tf +++ /dev/null @@ -1,6 +0,0 @@ -provider "aws" { - region = var.worker_region - default_tags { - tags = var.tags - } -} diff --git a/testing/infra/terraform/modules/moxy/variables.tf b/testing/infra/terraform/modules/moxy/variables.tf index 2b20ba2fce9..3c0c92363bd 100644 --- a/testing/infra/terraform/modules/moxy/variables.tf +++ b/testing/infra/terraform/modules/moxy/variables.tf @@ -1,9 +1,3 @@ -variable "worker_region" { - default = "us-west-2" - description = "Optional AWS region where the workers will be created. Defaults to us-west-2 (AWS)" - type = string -} - variable "instance_type" { type = string description = "Moxy instance type" diff --git a/testing/infra/terraform/modules/standalone_apm_server/provider.tf b/testing/infra/terraform/modules/standalone_apm_server/provider.tf deleted file mode 100644 index 3d860298151..00000000000 --- a/testing/infra/terraform/modules/standalone_apm_server/provider.tf +++ /dev/null @@ -1,6 +0,0 @@ -provider "aws" { - region = var.worker_region - default_tags { - tags = var.tags - } -} diff --git a/testing/infra/terraform/modules/standalone_apm_server/variables.tf b/testing/infra/terraform/modules/standalone_apm_server/variables.tf index 02bbf4930ef..5f68fcbaa84 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/variables.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/variables.tf @@ -45,12 +45,6 @@ variable "region" { type = string } -variable "worker_region" { - default = "us-west-2" - description = "Optional AWS region where the workers will be created. Defaults to us-west-2 (AWS)" - type = string -} - variable "ea_managed" { default = false description = "Whether or not install Elastic Agent managed APM Server" From 956ff2b87126fe7ff0fd691d3933c2ac8649a191 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 18:19:58 -0700 Subject: [PATCH 14/24] Add more conditional benchmark variable to TF --- testing/benchmark/main.tf | 18 +++++++++--------- testing/benchmark/outputs.tf | 16 +++++++++------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 724d933e93f..bacca618648 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -46,8 +46,8 @@ locals { } module "ec_deployment" { - for_each = var.run_standalone ? [] : toset(["this"]) - source = "../infra/terraform/modules/ec_deployment" + count = var.run_standalone ? 0 : 1 + source = "../infra/terraform/modules/ec_deployment" region = var.ess_region stack_version = var.stack_version @@ -78,8 +78,8 @@ module "benchmark_worker" { region = var.worker_region user_name = var.user_name - apm_server_url = module.ec_deployment.apm_url - apm_secret_token = module.ec_deployment.apm_secret_token + apm_server_url = var.run_standalone ? module.standalone_apm_server[0].apm_server_url : module.ec_deployment[0].apm_url + apm_secret_token = var.run_standalone ? module.standalone_apm_server[0].apm_secret_token : module.ec_deployment[0].apm_secret_token apmbench_bin_path = var.apmbench_bin_path instance_type = var.worker_instance_type @@ -91,8 +91,8 @@ module "benchmark_worker" { } module "moxy" { - for_each = var.run_standalone ? toset(["this"]) : [] - source = "../infra/terraform/modules/moxy" + count = var.run_standalone ? 1 : 0 + source = "../infra/terraform/modules/moxy" moxy_bin_path = var.moxy_bin_path instance_type = var.worker_instance_type @@ -104,8 +104,8 @@ module "moxy" { module "standalone_apm_server" { - for_each = var.run_standalone ? toset(["this"]) : [] - source = "../infra/terraform/modules/standalone_apm_server" + count = var.run_standalone ? 1 : 0 + source = "../infra/terraform/modules/standalone_apm_server" aws_os = "amzn2-ami-kernel-5.10" ea_managed = false @@ -114,7 +114,7 @@ module "standalone_apm_server" { aws_provisioner_key_name = var.private_key - elasticsearch_url = module.moxy.moxy_url + elasticsearch_url = module.moxy[0].moxy_url elasticsearch_username = "" elasticsearch_password = "" diff --git a/testing/benchmark/outputs.tf b/testing/benchmark/outputs.tf index 371d9c43760..d222ab4cf6b 100644 --- a/testing/benchmark/outputs.tf +++ b/testing/benchmark/outputs.tf @@ -4,38 +4,40 @@ output "public_ip" { } output "elasticsearch_url" { - value = module.ec_deployment.elasticsearch_url + value = !var.run_standalone ? module.ec_deployment[0].elasticsearch_url : "" description = "The secure Elasticsearch URL" } output "elasticsearch_username" { - value = module.ec_deployment.elasticsearch_username + value = !var.run_standalone ? module.ec_deployment[0].elasticsearch_username : "" description = "The Elasticsearch username" sensitive = true } output "elasticsearch_password" { - value = module.ec_deployment.elasticsearch_password + value = !var.run_standalone ? module.ec_deployment[0].elasticsearch_password : "" description = "The Elasticsearch password" sensitive = true } output "kibana_url" { - value = module.ec_deployment.kibana_url + value = !var.run_standalone ? module.ec_deployment[0].kibana_url : "" description = "The secure Kibana URL" } + output "apm_secret_token" { - value = module.ec_deployment.apm_secret_token + value = var.run_standalone ? module.standalone_apm_server[0].apm_server_url : module.ec_deployment[0].apm_url description = "The APM Server secret token" sensitive = true } output "apm_server_url" { - value = module.ec_deployment.apm_url + value = var.run_standalone ? module.standalone_apm_server[0].apm_secret_token : module.ec_deployment[0].apm_secret_token description = "The APM Server URL" + sensitive = true } output "admin_console_url" { - value = module.ec_deployment.admin_console_url + value = !var.run_standalone ? module.ec_deployment[0].admin_console_url : "" description = "The admin console URL" } From 2045d58720862ec2415e6db198ab8c8d01b1ab42 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Wed, 11 Sep 2024 18:35:50 -0700 Subject: [PATCH 15/24] start process in bg for standalone apm server bench --- testing/infra/terraform/modules/moxy/main.tf | 2 +- testing/infra/terraform/modules/standalone_apm_server/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index b7fe27d30dd..af90c135795 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -74,7 +74,7 @@ resource "aws_instance" "moxy" { inline = [ "sudo cp ${local.bin_path} moxy", "chmod +x moxy", - "./moxy" + "./moxy &" ] } diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 134dbdd9944..fde81e64248 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -189,7 +189,7 @@ resource "aws_instance" "apm" { "sudo cp ${local.bin_path} apm-server", "chmod +x apm-server", "sudo cp ${local.conf_path} apm-server.yml", - "./apm-server" + "./apm-server &" ] ) } From b81bf55c722f35484117895dccca5894a6a7146e Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 12 Sep 2024 16:33:35 -0700 Subject: [PATCH 16/24] Do not use explicit name for key pair rs --- testing/infra/terraform/modules/moxy/main.tf | 2 +- testing/infra/terraform/modules/standalone_apm_server/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index af90c135795..d95aec1f39b 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -84,6 +84,6 @@ resource "aws_instance" "moxy" { } resource "aws_key_pair" "provisioner_key" { - key_name = var.aws_provisioner_key_name public_key = file("${var.aws_provisioner_key_name}.pub") + tags = var.tags } diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index fde81e64248..01c7a44f3b9 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -224,8 +224,8 @@ data "external" "latest_apm_server" { } resource "aws_key_pair" "provisioner_key" { - key_name = var.aws_provisioner_key_name public_key = file("${var.aws_provisioner_key_name}.pub") + tags = var.tags } resource "random_password" "apm_secret_token" { From 3fc850b9c74073d68d6682ace729d6d90cfeadc0 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 12 Sep 2024 16:58:15 -0700 Subject: [PATCH 17/24] use x86_64 for standalone apm-server benchmark --- .github/workflows/benchmarks.yml | 2 +- testing/benchmark/main.tf | 2 +- testing/benchmark/variables.tf | 2 +- testing/infra/terraform/modules/moxy/main.tf | 2 +- testing/infra/terraform/modules/moxy/outputs.tf | 2 +- .../modules/standalone_apm_server/main.tf | 16 +++++++++++++++- .../modules/standalone_apm_server/outputs.tf | 2 +- 7 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 16783c5f3b0..fe19caf36b3 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -120,7 +120,7 @@ jobs: if: ${{ inputs.runStandalone }} run: | make moxy - cd ../.. && make build/apm-server-linux-arm64 + cd ../.. && make build/apm-server-linux-amd64 && mv build/apm-server-linux-amd64 build/apm-server - name: Override docker committed version if: ${{ ! inputs.runOnStable && ! inputs.runStandalone}} diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index bacca618648..2b337e346bc 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -107,7 +107,7 @@ module "standalone_apm_server" { count = var.run_standalone ? 1 : 0 source = "../infra/terraform/modules/standalone_apm_server" - aws_os = "amzn2-ami-kernel-5.10" + aws_os = "amzn2-ami-hvm-*-x86_64-ebs" ea_managed = false apm_server_bin_path = var.apm_server_bin_path apm_instance_type = var.worker_instance_type diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index 411941ac88a..bc36d36eaa7 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -93,7 +93,7 @@ variable "drop_pipeline" { } variable "apm_server_bin_path" { - default = "../../build/apm-server-linux-amd64" + default = "../../build" type = string description = "Optional path to the apm-server binary" } diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index d95aec1f39b..7141c2de5d4 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -67,7 +67,7 @@ resource "aws_instance" "moxy" { } provisioner "file" { - source = var.moxy_bin_path + source = "${var.moxy_bin_path}/moxy" destination = local.bin_path } provisioner "remote-exec" { diff --git a/testing/infra/terraform/modules/moxy/outputs.tf b/testing/infra/terraform/modules/moxy/outputs.tf index 8f8465f9bc5..a9c9844a84b 100644 --- a/testing/infra/terraform/modules/moxy/outputs.tf +++ b/testing/infra/terraform/modules/moxy/outputs.tf @@ -1,4 +1,4 @@ output "moxy_url" { - value = "${aws_instance.moxy.public_ip}:${local.moxy_port}" + value = "http://${aws_instance.moxy.public_ip}:${local.moxy_port}" description = "The Moxy Server URL" } diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 01c7a44f3b9..e353691be1a 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -6,6 +6,7 @@ locals { "debian-10-arm64" = "136693071363" # debian "debian-11-arm64" = "136693071363" # debian "amzn2-ami-kernel-5.10" = "137112412989" # amazon + "amzn2-ami-hvm-*-x86_64-ebs" = "137112412989" #amazon "al2023-ami-2023" = "137112412989" # amazon "RHEL-7" = "309956199498" # Red Hat "RHEL-8" = "309956199498" # Red Hat @@ -18,6 +19,7 @@ locals { "debian-10-arm64" = "t4g.nano" "debian-11-arm64" = "t4g.nano" "amzn2-ami-kernel-5.10" = "t4g.nano" + "amzn2-ami-hvm-*-x86_64-ebs" = "t4g.nano" "al2023-ami-2023" = "t4g.nano" "RHEL-7" = "t3a.micro" # RHEL-7 doesn't support arm "RHEL-8" = "t4g.micro" # RHEL doesn't support nano instances @@ -30,6 +32,7 @@ locals { "debian-10-arm64" = "arm64" "debian-11-arm64" = "arm64" "amzn2-ami-kernel-5.10" = "arm64" + "amzn2-ami-hvm-*-x86_64-ebs" = "x86_64" "al2023-ami-2023" = "arm64" "RHEL-7" = "x86_64" # RHEL-7 doesn't support arm "RHEL-8" = "arm64" @@ -66,6 +69,7 @@ locals { "debian-10-arm64" = "admin" "debian-11-arm64" = "admin" "amzn2-ami-kernel-5.10" = "ec2-user" + "amzn2-ami-hvm-*-x86_64-ebs" = "ec2-user" "al2023-ami-2023" = "ec2-user" "RHEL-7" = "ec2-user" "RHEL-8" = "ec2-user" @@ -103,6 +107,16 @@ data "aws_ami" "os" { owners = [local.image_owners[var.aws_os]] } +data "aws_ami" "worker_ami" { + owners = ["amazon"] + most_recent = true + + filter { + name = "name" + values = ["amzn2-ami-hvm-*-x86_64-ebs"] + } +} + resource "aws_security_group" "main" { egress = [ { @@ -156,7 +170,7 @@ resource "aws_instance" "apm" { } provisioner "file" { - source = var.apm_server_bin_path + source = "${var.apm_server_bin_path}/apm-server" destination = local.bin_path } diff --git a/testing/infra/terraform/modules/standalone_apm_server/outputs.tf b/testing/infra/terraform/modules/standalone_apm_server/outputs.tf index f3cda741079..7491343d314 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/outputs.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/outputs.tf @@ -5,6 +5,6 @@ output "apm_secret_token" { } output "apm_server_url" { - value = "${aws_instance.apm.public_ip}:${local.apm_port}" + value = "http://${aws_instance.apm.public_ip}:${local.apm_port}" description = "The APM Server URL" } From daba92190a432bb0b5d425618eca79e2d4edb240 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 12 Sep 2024 19:02:32 -0700 Subject: [PATCH 18/24] provision all standalone benchmark instances in the same vpc --- testing/benchmark/main.tf | 47 ++++++++++++- testing/benchmark/variables.tf | 16 +++++ .../modules/benchmark_executor/instance.tf | 66 ++++++------------- .../modules/benchmark_executor/variables.tf | 21 ++---- testing/infra/terraform/modules/moxy/main.tf | 20 ++++-- .../infra/terraform/modules/moxy/variables.tf | 12 ++-- .../apm-server.yml.tftpl | 5 ++ .../modules/standalone_apm_server/main.tf | 21 +++--- .../standalone_apm_server/variables.tf | 14 ++-- 9 files changed, 131 insertions(+), 91 deletions(-) diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 2b337e346bc..a47811a38a6 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -45,6 +45,44 @@ locals { name_prefix = "${coalesce(var.user_name, "unknown-user")}-bench" } +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "3.14.0" + + name = "${var.user_name}-worker" + cidr = var.vpc_cidr + + azs = [for letter in ["a", "b", "c"] : "${var.worker_region}${letter}"] + public_subnets = var.public_cidr + enable_ipv6 = false + enable_nat_gateway = false + single_nat_gateway = false + + manage_default_security_group = true + default_security_group_ingress = [ + { + "from_port" : 0, + "to_port" : 0, + "protocol" : -1, + "self" : true, + "cidr_blocks" : "0.0.0.0/0", + } + ] + default_security_group_egress = [ + { + "from_port" : 0, + "to_port" : 0, + "protocol" : -1, + "cidr_blocks" : "0.0.0.0/0", + } + ] + + tags = merge(local.ci_tags, module.tags.tags) + vpc_tags = { + Name = "vpc-${var.user_name}-worker" + } +} + module "ec_deployment" { count = var.run_standalone ? 0 : 1 source = "../infra/terraform/modules/ec_deployment" @@ -75,6 +113,7 @@ module "ec_deployment" { module "benchmark_worker" { source = "../infra/terraform/modules/benchmark_executor" + vpc_id = module.vpc.vpc_id region = var.worker_region user_name = var.user_name @@ -94,8 +133,9 @@ module "moxy" { count = var.run_standalone ? 1 : 0 source = "../infra/terraform/modules/moxy" - moxy_bin_path = var.moxy_bin_path + vpc_id = module.vpc.vpc_id instance_type = var.worker_instance_type + moxy_bin_path = var.moxy_bin_path aws_provisioner_key_name = var.private_key @@ -107,10 +147,11 @@ module "standalone_apm_server" { count = var.run_standalone ? 1 : 0 source = "../infra/terraform/modules/standalone_apm_server" + vpc_id = module.vpc.vpc_id aws_os = "amzn2-ami-hvm-*-x86_64-ebs" - ea_managed = false - apm_server_bin_path = var.apm_server_bin_path apm_instance_type = var.worker_instance_type + apm_server_bin_path = var.apm_server_bin_path + ea_managed = false aws_provisioner_key_name = var.private_key diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index bc36d36eaa7..fc12a72dd88 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -104,6 +104,22 @@ variable "moxy_bin_path" { description = "Optional path to the moxy binary" } +## VPC Network settings + +variable "vpc_cidr" { + default = "192.168.44.0/24" + type = string +} + +variable "public_cidr" { + default = [ + "192.168.44.0/26", + "192.168.44.64/26", + "192.168.44.128/26", + ] + type = list(string) +} + ## Worker configuraiton variable "worker_region" { diff --git a/testing/infra/terraform/modules/benchmark_executor/instance.tf b/testing/infra/terraform/modules/benchmark_executor/instance.tf index 18d55462c66..93a7becf651 100644 --- a/testing/infra/terraform/modules/benchmark_executor/instance.tf +++ b/testing/infra/terraform/modules/benchmark_executor/instance.tf @@ -6,50 +6,6 @@ locals { } } -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "3.14.0" - - name = "${var.user_name}-worker" - cidr = var.vpc_cidr - - azs = [for letter in ["a", "b", "c"] : "${var.region}${letter}"] - public_subnets = var.public_cidr - enable_ipv6 = false - enable_nat_gateway = false - single_nat_gateway = false - - manage_default_security_group = true - default_security_group_ingress = [ - { - "from_port" : 0, - "to_port" : 0, - "protocol" : -1, - "self" : true, - "cidr_blocks" : "0.0.0.0/0", - } - ] - default_security_group_egress = [ - { - "from_port" : 0, - "to_port" : 0, - "protocol" : -1, - "cidr_blocks" : "0.0.0.0/0", - } - ] - - tags = merge(var.tags, local.ec2_tags) - vpc_tags = { - Name = "vpc-${var.user_name}-worker" - } -} - -resource "aws_key_pair" "worker" { - key_name = "${var.user_name}_worker_key" - public_key = file(var.public_key) - tags = merge(var.tags, local.ec2_tags) -} - data "aws_ami" "worker_ami" { owners = ["amazon"] most_recent = true @@ -60,6 +16,18 @@ data "aws_ami" "worker_ami" { } } +data "aws_subnets" "public_subnets" { + filter { + name = "vpc-id" + values = [var.vpc_id] + } +} + +data "aws_security_group" "security_group" { + vpc_id = var.vpc_id + name = "default" +} + module "ec2_instance" { source = "terraform-aws-modules/ec2-instance/aws" @@ -68,9 +36,15 @@ module "ec2_instance" { ami = data.aws_ami.worker_ami.id instance_type = var.instance_type monitoring = false - vpc_security_group_ids = [module.vpc.default_security_group_id] - subnet_id = module.vpc.public_subnets[0] + vpc_security_group_ids = [data.aws_security_group.security_group.id] + subnet_id = data.aws_subnets.public_subnets.id associate_public_ip_address = true key_name = aws_key_pair.worker.id tags = merge(var.tags, local.ec2_tags) } + +resource "aws_key_pair" "worker" { + key_name = "${var.user_name}_worker_key" + public_key = file(var.public_key) + tags = merge(var.tags, local.ec2_tags) +} diff --git a/testing/infra/terraform/modules/benchmark_executor/variables.tf b/testing/infra/terraform/modules/benchmark_executor/variables.tf index 5edc17f5bae..79e21837e5e 100644 --- a/testing/infra/terraform/modules/benchmark_executor/variables.tf +++ b/testing/infra/terraform/modules/benchmark_executor/variables.tf @@ -18,6 +18,11 @@ variable "instance_type" { description = "Optional instance type to use for the worker VM" } +variable "vpc_id" { + description = "VPC ID to provision the EC2 instance" + type = string +} + variable "apm_secret_token" { default = "" type = string @@ -39,22 +44,6 @@ variable "tags" { description = "Optional set of tags to use for all resources" } -## VPC Network settings - -variable "vpc_cidr" { - default = "192.168.44.0/24" - type = string -} - -variable "public_cidr" { - default = [ - "192.168.44.0/26", - "192.168.44.64/26", - "192.168.44.128/26", - ] - type = list(string) -} - variable "region" { default = "us-west2" type = string diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index 7141c2de5d4..34b0f63369b 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -13,7 +13,15 @@ data "aws_ami" "worker_ami" { } } +data "aws_subnets" "public_subnets" { + filter { + name = "vpc-id" + values = [var.vpc_id] + } +} + resource "aws_security_group" "main" { + vpc_id = var.vpc_id egress = [ { cidr_blocks = ["0.0.0.0/0", ] @@ -54,10 +62,12 @@ resource "aws_security_group" "main" { } resource "aws_instance" "moxy" { - ami = data.aws_ami.worker_ami.id - instance_type = var.instance_type - monitoring = false - key_name = aws_key_pair.provisioner_key.key_name + ami = data.aws_ami.worker_ami.id + instance_type = var.instance_type + subnet_id = data.aws_subnets.public_subnets.id + vpc_security_group_ids = [aws_security_group.main.id] + key_name = aws_key_pair.provisioner_key.key_name + monitoring = false connection { type = "ssh" @@ -78,8 +88,6 @@ resource "aws_instance" "moxy" { ] } - vpc_security_group_ids = [aws_security_group.main.id] - tags = var.tags } diff --git a/testing/infra/terraform/modules/moxy/variables.tf b/testing/infra/terraform/modules/moxy/variables.tf index 3c0c92363bd..9b4cb7a4131 100644 --- a/testing/infra/terraform/modules/moxy/variables.tf +++ b/testing/infra/terraform/modules/moxy/variables.tf @@ -3,15 +3,19 @@ variable "instance_type" { description = "Moxy instance type" } -variable "moxy_bin_path" { +variable "vpc_id" { + description = "VPC ID to provision the EC2 instance" type = string - description = "Optionally use the apm-server binary from the specified path to the worker machine" } variable "aws_provisioner_key_name" { - default = "" - description = "Optional ssh key name to create the aws key pair and remote provision the ec2 instance" + description = "ssh key name to create the aws key pair and remote provision the EC2 instance" + type = string +} + +variable "moxy_bin_path" { type = string + description = "Path to moxy binary from to copy to the worker machine" } variable "tags" { diff --git a/testing/infra/terraform/modules/standalone_apm_server/apm-server.yml.tftpl b/testing/infra/terraform/modules/standalone_apm_server/apm-server.yml.tftpl index 5061811d904..e51007ccf5a 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/apm-server.yml.tftpl +++ b/testing/infra/terraform/modules/standalone_apm_server/apm-server.yml.tftpl @@ -6,6 +6,11 @@ apm-server: secret_token: ${apm_secret_token} rum: enabled: true + expvar: + enabled: true + pprof: + enabled: true + output: elasticsearch: hosts: [ ${elasticsearch_url} ] diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index e353691be1a..f1895371f92 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -107,17 +107,15 @@ data "aws_ami" "os" { owners = [local.image_owners[var.aws_os]] } -data "aws_ami" "worker_ami" { - owners = ["amazon"] - most_recent = true - +data "aws_subnets" "public_subnets" { filter { - name = "name" - values = ["amzn2-ami-hvm-*-x86_64-ebs"] + name = "vpc-id" + values = [var.vpc_id] } } resource "aws_security_group" "main" { + vpc_id = var.vpc_id egress = [ { cidr_blocks = ["0.0.0.0/0", ] @@ -158,9 +156,12 @@ resource "aws_security_group" "main" { } resource "aws_instance" "apm" { - ami = data.aws_ami.os.id - instance_type = var.apm_instance_type == "" ? local.instance_types[var.aws_os] : var.apm_instance_type - key_name = aws_key_pair.provisioner_key.key_name + ami = data.aws_ami.os.id + instance_type = var.apm_instance_type == "" ? local.instance_types[var.aws_os] : var.apm_instance_type + subnet_id = data.aws_subnets.public_subnets.id + vpc_security_group_ids = [aws_security_group.main.id] + key_name = aws_key_pair.provisioner_key.key_name + monitoring = false connection { type = "ssh" @@ -208,8 +209,6 @@ resource "aws_instance" "apm" { ) } - vpc_security_group_ids = [aws_security_group.main.id] - tags = var.tags } diff --git a/testing/infra/terraform/modules/standalone_apm_server/variables.tf b/testing/infra/terraform/modules/standalone_apm_server/variables.tf index 5f68fcbaa84..40b999c126c 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/variables.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/variables.tf @@ -1,18 +1,22 @@ variable "aws_os" { default = "" - description = "Optional aws ec2 instance OS" + description = "Optional aws EC2 instance OS" type = string } variable "apm_instance_type" { default = "" type = string - description = "Optional apm server instance type" + description = "Optional apm server instance type overide" +} + +variable "vpc_id" { + description = "VPC ID to provision the EC2 instance" + type = string } variable "aws_provisioner_key_name" { - default = "" - description = "Optional ssh key name to create the aws key pair and remote provision the ec2 instance" + description = "ssh key name to create the aws key pair and remote provision the EC2 instance" type = string } @@ -54,7 +58,7 @@ variable "ea_managed" { variable "apm_server_bin_path" { default = "" type = string - description = "Optionally use the apm-server binary from the specified path to the worker machine" + description = "Optionally use the apm-server binary from the specified path instead" } variable "tags" { From 350e30648483c4bf641952f72fc6808565beacec Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Thu, 12 Sep 2024 20:26:28 -0700 Subject: [PATCH 19/24] add explicit vpc depends --- testing/benchmark/main.tf | 9 ++++++--- .../terraform/modules/benchmark_executor/instance.tf | 2 +- testing/infra/terraform/modules/moxy/main.tf | 2 +- .../terraform/modules/standalone_apm_server/main.tf | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index a47811a38a6..240ac11a4b6 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -126,7 +126,8 @@ module "benchmark_worker" { public_key = var.public_key private_key = var.private_key - tags = merge(local.ci_tags, module.tags.tags) + tags = merge(local.ci_tags, module.tags.tags) + depends_on = [module.vpc] } module "moxy" { @@ -139,7 +140,8 @@ module "moxy" { aws_provisioner_key_name = var.private_key - tags = merge(local.ci_tags, module.tags.tags) + tags = merge(local.ci_tags, module.tags.tags) + depends_on = [module.vpc] } @@ -159,5 +161,6 @@ module "standalone_apm_server" { elasticsearch_username = "" elasticsearch_password = "" - tags = merge(local.ci_tags, module.tags.tags) + tags = merge(local.ci_tags, module.tags.tags) + depends_on = [module.vpc] } diff --git a/testing/infra/terraform/modules/benchmark_executor/instance.tf b/testing/infra/terraform/modules/benchmark_executor/instance.tf index 93a7becf651..7b41f643ebf 100644 --- a/testing/infra/terraform/modules/benchmark_executor/instance.tf +++ b/testing/infra/terraform/modules/benchmark_executor/instance.tf @@ -37,7 +37,7 @@ module "ec2_instance" { instance_type = var.instance_type monitoring = false vpc_security_group_ids = [data.aws_security_group.security_group.id] - subnet_id = data.aws_subnets.public_subnets.id + subnet_id = data.aws_subnets.public_subnets.ids[0] associate_public_ip_address = true key_name = aws_key_pair.worker.id tags = merge(var.tags, local.ec2_tags) diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index 34b0f63369b..ffbeb9cb3cf 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -64,7 +64,7 @@ resource "aws_security_group" "main" { resource "aws_instance" "moxy" { ami = data.aws_ami.worker_ami.id instance_type = var.instance_type - subnet_id = data.aws_subnets.public_subnets.id + subnet_id = data.aws_subnets.public_subnets.ids[0] vpc_security_group_ids = [aws_security_group.main.id] key_name = aws_key_pair.provisioner_key.key_name monitoring = false diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index f1895371f92..47a2e129a89 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -158,7 +158,7 @@ resource "aws_security_group" "main" { resource "aws_instance" "apm" { ami = data.aws_ami.os.id instance_type = var.apm_instance_type == "" ? local.instance_types[var.aws_os] : var.apm_instance_type - subnet_id = data.aws_subnets.public_subnets.id + subnet_id = data.aws_subnets.public_subnets.ids[0] vpc_security_group_ids = [aws_security_group.main.id] key_name = aws_key_pair.provisioner_key.key_name monitoring = false From 434226fd2c1c8a38110ac543547a5593508b6a05 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 13 Sep 2024 14:08:20 -0700 Subject: [PATCH 20/24] update moxy to support / and auth --- .github/workflows/benchmarks.yml | 4 +- systemtest/cmd/moxy/.gitignore | 1 + systemtest/cmd/moxy/main.go | 44 +++++++++++++++++-- testing/benchmark/Makefile | 7 +++ testing/benchmark/main.tf | 12 ++--- testing/benchmark/outputs.tf | 4 +- testing/benchmark/variables.tf | 3 +- testing/infra/terraform/modules/moxy/main.tf | 11 ++++- .../infra/terraform/modules/moxy/outputs.tf | 8 +++- .../modules/standalone_apm_server/main.tf | 6 ++- 10 files changed, 82 insertions(+), 18 deletions(-) create mode 100644 systemtest/cmd/moxy/.gitignore diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index fe19caf36b3..5959e300821 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -61,7 +61,7 @@ jobs: GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} # temporarily override to get faster feedback - BENCHMARK_WARMUP_TIME: 1m + BENCHMARK_WARMUP_TIME: 5m BENCHMARK_COUNT: 2 BENCHMARK_TIME: 1m steps: @@ -120,7 +120,7 @@ jobs: if: ${{ inputs.runStandalone }} run: | make moxy - cd ../.. && make build/apm-server-linux-amd64 && mv build/apm-server-linux-amd64 build/apm-server + make apm-server - name: Override docker committed version if: ${{ ! inputs.runOnStable && ! inputs.runStandalone}} diff --git a/systemtest/cmd/moxy/.gitignore b/systemtest/cmd/moxy/.gitignore new file mode 100644 index 00000000000..48537ea034d --- /dev/null +++ b/systemtest/cmd/moxy/.gitignore @@ -0,0 +1 @@ +moxy diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go index ed7c63d42c4..cf3c4cebd88 100644 --- a/systemtest/cmd/moxy/main.go +++ b/systemtest/cmd/moxy/main.go @@ -3,6 +3,7 @@ package main import ( "bufio" "bytes" + "encoding/base64" "flag" "fmt" "io" @@ -19,6 +20,8 @@ func main() { "loglevel", zapcore.InfoLevel, "set log level to one of: DEBUG, INFO (default), WARN, ERROR, DPANIC, PANIC, FATAL", ) + username := flag.String("username", "elastic", "authentication username to mimic ES") + password := flag.String("password", "", "authentication username to mimic ES") flag.Parse() zapcfg := zap.NewProductionConfig() zapcfg.EncoderConfig.EncodeTime = zapcore.RFC3339TimeEncoder @@ -29,24 +32,59 @@ func main() { if err != nil { panic(err) } + if *username == "" || *password == "" { + logger.Fatal("both username and password are required") + } defer logger.Sync() s := http.Server{ Addr: ":9200", - Handler: handler(logger), + Handler: handler(logger, *username, *password), } if err := s.ListenAndServe(); err != nil { logger.Fatal("listen error", zap.Error(err)) } } -func handler(logger *zap.Logger) http.Handler { +func handler(logger *zap.Logger, username, password string) http.Handler { + expectedAuth := fmt.Sprintf("%s:%s", username, password) return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("X-Elastic-Product", "Elasticsearch") - first := true + auth := r.Header.Get("Authorization") + actualAuth, err := base64.StdEncoding.DecodeString(auth) + if err != nil || string(actualAuth) != expectedAuth { + logger.Error( + "authentication failed", + zap.Error(err), + zap.String("actual", string(actualAuth)), + zap.String("expected", expectedAuth), + ) + // w.WriteHeader(http.StatusUnauthorized) + return + } switch r.URL.Path { + case "/": + // MIS doesn't use this route, but apm-server checks for cluster_uuid + w.Write([]byte(`{ + "name": "instance-0000000001", + "cluster_name": "eca3b3c3bbee4816bb92f82184e328dd", + "cluster_uuid": "cc49813b6b8e2138fbb8243ae2b3deed", + "version": { + "number": "8.15.1", + "build_flavor": "default", + "build_type": "docker", + "build_hash": "253e8544a65ad44581194068936f2a5d57c2c051", + "build_date": "2024-09-02T22:04:47.310170297Z", + "build_snapshot": false, + "lucene_version": "9.11.1", + "minimum_wire_compatibility_version": "7.17.0", + "minimum_index_compatibility_version": "7.0.0" + }, + "tagline": "You Know, for Search" + }`)) case "/_security/user/_has_privileges": w.Write([]byte(`{"username":"admin","has_all_requested":true,"cluster":{},"index":{},"application":{"apm":{"-":{"event:write":true}}}}`)) case "/_bulk": + first := true var body io.Reader switch r.Header.Get("Content-Encoding") { case "gzip": diff --git a/testing/benchmark/Makefile b/testing/benchmark/Makefile index b300ff0de0e..560e3567300 100644 --- a/testing/benchmark/Makefile +++ b/testing/benchmark/Makefile @@ -6,6 +6,9 @@ MOXY_PATH ?= ../../systemtest/cmd/moxy MOXY_GOOS ?= linux MOXY_GOARCH ?= amd64 +APM_SERVER_GOOS ?= linux +APM_SERVER_GOARCH ?= amd64 + TFVARS_SOURCE ?= terraform.tfvars.example BENCHMARK_WARMUP_TIME ?= 5m @@ -76,6 +79,10 @@ moxy: @echo "-> Building moxy..." @cd $(MOXY_PATH) && CGO_ENABLED=0 GOOS=$(MOXY_GOOS) GOARCH=$(MOXY_GOARCH) go build . +.PHONY: apm-server +apm-server: + @cd ../.. && make build/apm-server-$(APM_SERVER_GOOS)-$(APM_SERVER_GOARCH) && mv build/apm-server-$(APM_SERVER_GOOS)-$(APM_SERVER_GOARCH) build/apm-server + .PHONY: init init: @terraform init diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 240ac11a4b6..42ae7d4c968 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -35,7 +35,9 @@ module "tags" { project = startswith(var.user_name, "benchci") ? "benchmarks" : var.user_name } -provider "ec" {} +provider "ec" { + apikey = "aaa" +} provider "aws" { region = var.worker_region @@ -127,7 +129,7 @@ module "benchmark_worker" { private_key = var.private_key tags = merge(local.ci_tags, module.tags.tags) - depends_on = [module.vpc] + depends_on = [module.moxy, module.ec_deployment] } module "moxy" { @@ -158,9 +160,9 @@ module "standalone_apm_server" { aws_provisioner_key_name = var.private_key elasticsearch_url = module.moxy[0].moxy_url - elasticsearch_username = "" - elasticsearch_password = "" + elasticsearch_username = "elastic" + elasticsearch_password = module.moxy[0].moxy_password tags = merge(local.ci_tags, module.tags.tags) - depends_on = [module.vpc] + depends_on = [module.moxy] } diff --git a/testing/benchmark/outputs.tf b/testing/benchmark/outputs.tf index d222ab4cf6b..079aea7e934 100644 --- a/testing/benchmark/outputs.tf +++ b/testing/benchmark/outputs.tf @@ -26,13 +26,13 @@ output "kibana_url" { } output "apm_secret_token" { - value = var.run_standalone ? module.standalone_apm_server[0].apm_server_url : module.ec_deployment[0].apm_url + value = var.run_standalone ? module.standalone_apm_server[0].apm_secret_token : module.ec_deployment[0].apm_secret_token description = "The APM Server secret token" sensitive = true } output "apm_server_url" { - value = var.run_standalone ? module.standalone_apm_server[0].apm_secret_token : module.ec_deployment[0].apm_secret_token + value = var.run_standalone ? module.standalone_apm_server[0].apm_server_url : module.ec_deployment[0].apm_url description = "The APM Server URL" sensitive = true } diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index fc12a72dd88..43c7cb86a97 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -1,12 +1,13 @@ ## General configuration variable "user_name" { + default = "test-kostya-vpc-bench-apm" description = "Required username to use for prefixes" type = string } variable "run_standalone" { - default = false + default = true description = "If set run benchmarks against standalone APM Server conneted to moxy" type = bool } diff --git a/testing/infra/terraform/modules/moxy/main.tf b/testing/infra/terraform/modules/moxy/main.tf index ffbeb9cb3cf..0429d76d164 100644 --- a/testing/infra/terraform/modules/moxy/main.tf +++ b/testing/infra/terraform/modules/moxy/main.tf @@ -83,8 +83,9 @@ resource "aws_instance" "moxy" { provisioner "remote-exec" { inline = [ "sudo cp ${local.bin_path} moxy", - "chmod +x moxy", - "./moxy &" + "sudo chmod +x moxy", + "screen -d -m ./moxy -password=${random_password.moxy_password.result}", + "sleep 1" ] } @@ -95,3 +96,9 @@ resource "aws_key_pair" "provisioner_key" { public_key = file("${var.aws_provisioner_key_name}.pub") tags = var.tags } + + +resource "random_password" "moxy_password" { + length = 16 + special = false +} diff --git a/testing/infra/terraform/modules/moxy/outputs.tf b/testing/infra/terraform/modules/moxy/outputs.tf index a9c9844a84b..89414843b6b 100644 --- a/testing/infra/terraform/modules/moxy/outputs.tf +++ b/testing/infra/terraform/modules/moxy/outputs.tf @@ -1,4 +1,10 @@ output "moxy_url" { value = "http://${aws_instance.moxy.public_ip}:${local.moxy_port}" - description = "The Moxy Server URL" + description = "The Moxy server URL" +} + +output "moxy_password" { + value = random_password.moxy_password.result + description = "The Moxy password for communication" + sensitive = true } diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 47a2e129a89..c638a040808 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -202,9 +202,11 @@ resource "aws_instance" "apm" { "sleep 1", ] : [ "sudo cp ${local.bin_path} apm-server", - "chmod +x apm-server", + "sudo chmod +x apm-server", "sudo cp ${local.conf_path} apm-server.yml", - "./apm-server &" + "sudo mkdir -m 777 /var/log/apm-server", + "screen -d -m ./apm-server", + "sleep 1" ] ) } From 458c466e786045c0c6318481c9ead0b9fe47e55c Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 13 Sep 2024 15:46:08 -0700 Subject: [PATCH 21/24] add debug cat for apm-server logs --- .github/workflows/benchmarks.yml | 6 +++++- systemtest/cmd/moxy/main.go | 2 +- testing/benchmark/Makefile | 5 +++++ testing/benchmark/outputs.tf | 6 ++++++ .../terraform/modules/standalone_apm_server/outputs.tf | 5 +++++ 5 files changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 5959e300821..4676a4d5f28 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -61,7 +61,7 @@ jobs: GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }} GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }} # temporarily override to get faster feedback - BENCHMARK_WARMUP_TIME: 5m + BENCHMARK_WARMUP_TIME: 1m BENCHMARK_COUNT: 2 BENCHMARK_TIME: 1m steps: @@ -142,6 +142,10 @@ jobs: if: ${{ inputs.benchmarkAgents != '' }} run: make run-benchmark + - name: Cat APM Server logs + if: failure() + run: make cat-apm-server-logs + # Results are only indexed and uploaded if the run happens on the main branch. - name: Index benchmarks result diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go index cf3c4cebd88..c1d546dbd66 100644 --- a/systemtest/cmd/moxy/main.go +++ b/systemtest/cmd/moxy/main.go @@ -58,7 +58,7 @@ func handler(logger *zap.Logger, username, password string) http.Handler { zap.String("actual", string(actualAuth)), zap.String("expected", expectedAuth), ) - // w.WriteHeader(http.StatusUnauthorized) + w.WriteHeader(http.StatusUnauthorized) return } switch r.URL.Path { diff --git a/testing/benchmark/Makefile b/testing/benchmark/Makefile index 560e3567300..5f3df05df4e 100644 --- a/testing/benchmark/Makefile +++ b/testing/benchmark/Makefile @@ -30,6 +30,7 @@ SSH_USER ?= ec2-user SSH_OPTS ?= -o LogLevel=ERROR -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=10 SSH_KEY ?= ~/.ssh/id_rsa_terraform WORKER_IP = $(shell terraform output -raw public_ip) +APM_SERVER_IP = $(shell terraform output -raw apm_server_ip) SHELL = /bin/bash .SHELLFLAGS = -o pipefail -c @@ -134,6 +135,10 @@ _default-gobench-vars: $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),elasticsearch_zone_count=$(shell echo var.elasticsearch_zone_count | terraform console | tr -d '"')) $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),build_sha=$(shell curl -sL -H "Authorization: Bearer $(shell terraform output -raw apm_secret_token )" $(shell terraform output -raw apm_server_url ) | jq -r '.build_sha')) +.PHONY: cat-apm-server-logs +cat-apm-server-logs: + @ssh $(SSH_OPTS) -i $(SSH_KEY) $(SSH_USER)@$(APM_SERVER_IP) "cat /var/log/apm-server/*" + $(SSH_KEY): @ssh-keygen -t rsa -b 4096 -C "$(USER)@elastic.co" -N "" -f $(SSH_KEY) diff --git a/testing/benchmark/outputs.tf b/testing/benchmark/outputs.tf index 079aea7e934..247ad4fa2df 100644 --- a/testing/benchmark/outputs.tf +++ b/testing/benchmark/outputs.tf @@ -37,6 +37,12 @@ output "apm_server_url" { sensitive = true } +output "apm_server_ip" { + value = var.run_standalone ? module.standalone_apm_server[0].apm_server_ip : "" + description = "The APM Server EC2 IP address" + sensitive = true +} + output "admin_console_url" { value = !var.run_standalone ? module.ec_deployment[0].admin_console_url : "" description = "The admin console URL" diff --git a/testing/infra/terraform/modules/standalone_apm_server/outputs.tf b/testing/infra/terraform/modules/standalone_apm_server/outputs.tf index 7491343d314..138e496e592 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/outputs.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/outputs.tf @@ -8,3 +8,8 @@ output "apm_server_url" { value = "http://${aws_instance.apm.public_ip}:${local.apm_port}" description = "The APM Server URL" } + +output "apm_server_ip" { + value = aws_instance.apm.public_ip + description = "The APM Server EC2 IP address" +} From 196e3914c91ae29c3187943e870521e61f819f4e Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 13 Sep 2024 16:22:46 -0700 Subject: [PATCH 22/24] scale moxy up --- systemtest/cmd/moxy/main.go | 39 ++++++++++++++++++++++------------ testing/benchmark/variables.tf | 2 +- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go index c1d546dbd66..908b0020064 100644 --- a/systemtest/cmd/moxy/main.go +++ b/systemtest/cmd/moxy/main.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "net/http" + "sync" "github.com/klauspost/compress/gzip" "github.com/klauspost/compress/zstd" @@ -15,6 +16,12 @@ import ( "go.uber.org/zap/zapcore" ) +var memPool = sync.Pool{ + New: func() interface{} { + return new(bytes.Buffer) + }, +} + func main() { logLevel := zap.LevelFlag( "loglevel", zapcore.InfoLevel, @@ -49,18 +56,6 @@ func handler(logger *zap.Logger, username, password string) http.Handler { expectedAuth := fmt.Sprintf("%s:%s", username, password) return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("X-Elastic-Product", "Elasticsearch") - auth := r.Header.Get("Authorization") - actualAuth, err := base64.StdEncoding.DecodeString(auth) - if err != nil || string(actualAuth) != expectedAuth { - logger.Error( - "authentication failed", - zap.Error(err), - zap.String("actual", string(actualAuth)), - zap.String("expected", expectedAuth), - ) - w.WriteHeader(http.StatusUnauthorized) - return - } switch r.URL.Path { case "/": // MIS doesn't use this route, but apm-server checks for cluster_uuid @@ -81,9 +76,22 @@ func handler(logger *zap.Logger, username, password string) http.Handler { }, "tagline": "You Know, for Search" }`)) + return case "/_security/user/_has_privileges": w.Write([]byte(`{"username":"admin","has_all_requested":true,"cluster":{},"index":{},"application":{"apm":{"-":{"event:write":true}}}}`)) case "/_bulk": + auth := r.Header.Get("Authorization") + actualAuth, err := base64.StdEncoding.DecodeString(auth) + if err != nil || string(actualAuth) != expectedAuth { + logger.Error( + "authentication failed", + zap.Error(err), + zap.String("actual", string(actualAuth)), + zap.String("expected", expectedAuth), + ) + w.WriteHeader(http.StatusUnauthorized) + return + } first := true var body io.Reader switch r.Header.Get("Content-Encoding") { @@ -109,7 +117,12 @@ func handler(logger *zap.Logger, username, password string) http.Handler { body = r.Body } - var jsonw bytes.Buffer + jsonw := memPool.Get().(*bytes.Buffer) + defer func() { + jsonw.Reset() + memPool.Put(jsonw) + }() + jsonw.Write([]byte(`{"items":[`)) scanner := bufio.NewScanner(body) for scanner.Scan() { diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index 43c7cb86a97..80cd0950907 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -136,7 +136,7 @@ variable "apmbench_bin_path" { } variable "worker_instance_type" { - default = "c6i.large" + default = "c6i.2xlarge" type = string description = "Optional instance type to use for the worker VM" } From cbac4497f690bdb0aa88e3138af43478c068a921 Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 13 Sep 2024 16:51:07 -0700 Subject: [PATCH 23/24] benchmark standalone mode configurable profiles --- systemtest/cmd/moxy/main.go | 2 ++ testing/benchmark/main.tf | 4 ++-- .../benchmark/system-profiles/16GBx2zone.tfvars | 13 +++++++++++-- .../benchmark/system-profiles/1GBx1zone.tfvars | 11 +++++++++++ .../benchmark/system-profiles/2GBx1zone.tfvars | 13 ++++++++++++- .../benchmark/system-profiles/32GBx2zone.tfvars | 13 +++++++++++-- .../benchmark/system-profiles/4GBx1zone.tfvars | 13 ++++++++++++- .../benchmark/system-profiles/8GBx1zone.tfvars | 13 +++++++++++-- testing/benchmark/terraform.tfvars.example | 6 ++++++ testing/benchmark/variables.tf | 16 +++++++++++++++- 10 files changed, 93 insertions(+), 11 deletions(-) diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go index 908b0020064..557da1bf432 100644 --- a/systemtest/cmd/moxy/main.go +++ b/systemtest/cmd/moxy/main.go @@ -92,6 +92,7 @@ func handler(logger *zap.Logger, username, password string) http.Handler { w.WriteHeader(http.StatusUnauthorized) return } + first := true var body io.Reader switch r.Header.Get("Content-Encoding") { @@ -146,6 +147,7 @@ func handler(logger *zap.Logger, username, password string) http.Handler { jsonw.Write([]byte(`]}`)) w.Write(jsonw.Bytes()) } + // TODO additionally report events throughput metric here, to index into benchmarks. default: logger.Error("unknown path", zap.String("path", r.URL.Path)) } diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 42ae7d4c968..68f3f9bfbeb 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -137,7 +137,7 @@ module "moxy" { source = "../infra/terraform/modules/moxy" vpc_id = module.vpc.vpc_id - instance_type = var.worker_instance_type + instance_type = var.standalone_moxy_instance_size moxy_bin_path = var.moxy_bin_path aws_provisioner_key_name = var.private_key @@ -153,7 +153,7 @@ module "standalone_apm_server" { vpc_id = module.vpc.vpc_id aws_os = "amzn2-ami-hvm-*-x86_64-ebs" - apm_instance_type = var.worker_instance_type + apm_instance_type = var.standalone_apm_server_instance_size apm_server_bin_path = var.apm_server_bin_path ea_managed = false diff --git a/testing/benchmark/system-profiles/16GBx2zone.tfvars b/testing/benchmark/system-profiles/16GBx2zone.tfvars index d081604f332..e6ba492fa43 100644 --- a/testing/benchmark/system-profiles/16GBx2zone.tfvars +++ b/testing/benchmark/system-profiles/16GBx2zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.2xlarge" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -10,5 +16,8 @@ elasticsearch_zone_count = 2 apm_server_size = "16g" # Number of shards for the ES indices apm_shards = 4 -# Benchmarks executor size executor -worker_instance_type = "c6i.2xlarge" + +# Standalone + +standalone_apm_server_instance_size = "c6i.4xlarge" +standalone_moxy_instance_size = "c6i.8xlarge" diff --git a/testing/benchmark/system-profiles/1GBx1zone.tfvars b/testing/benchmark/system-profiles/1GBx1zone.tfvars index a2ca3dac002..8b1ff546e60 100644 --- a/testing/benchmark/system-profiles/1GBx1zone.tfvars +++ b/testing/benchmark/system-profiles/1GBx1zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -8,3 +14,8 @@ elasticsearch_size = "16g" elasticsearch_zone_count = 2 # APM server instance size apm_server_size = "1g" + +# Standalone + +standalone_apm_server_instance_size = "c6i.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/2GBx1zone.tfvars b/testing/benchmark/system-profiles/2GBx1zone.tfvars index 668f12f9edf..a3114b4b989 100644 --- a/testing/benchmark/system-profiles/2GBx1zone.tfvars +++ b/testing/benchmark/system-profiles/2GBx1zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -7,4 +13,9 @@ elasticsearch_size = "16g" # The number of AZs the Elasticsearch cluster should have. elasticsearch_zone_count = 2 # APM server instance size -apm_server_size = "2g" \ No newline at end of file +apm_server_size = "2g" + +# Standalone + +standalone_apm_server_instance_size = "c6i.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/32GBx2zone.tfvars b/testing/benchmark/system-profiles/32GBx2zone.tfvars index 10a9180257b..7735b9f7695 100644 --- a/testing/benchmark/system-profiles/32GBx2zone.tfvars +++ b/testing/benchmark/system-profiles/32GBx2zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.2xlarge" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -12,5 +18,8 @@ elasticsearch_dedicated_masters = true apm_server_size = "32g" # Number of shards for the ES indices apm_shards = 4 -# Benchmarks executor size executor -worker_instance_type = "c6i.2xlarge" + +# Standalone + +standalone_apm_server_instance_size = "c6i.8xlarge" +standalone_moxy_instance_size = "c6i.16xlarge" diff --git a/testing/benchmark/system-profiles/4GBx1zone.tfvars b/testing/benchmark/system-profiles/4GBx1zone.tfvars index f55f9099444..23732bb8448 100644 --- a/testing/benchmark/system-profiles/4GBx1zone.tfvars +++ b/testing/benchmark/system-profiles/4GBx1zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -7,4 +13,9 @@ elasticsearch_size = "32g" # The number of AZs the Elasticsearch cluster should have. elasticsearch_zone_count = 2 # APM server instance size -apm_server_size = "4g" \ No newline at end of file +apm_server_size = "4g" + +# Standalone + +standalone_apm_server_instance_size = "c6i.xlarge" +standalone_moxy_instance_size = "c6i.2xlarge" diff --git a/testing/benchmark/system-profiles/8GBx1zone.tfvars b/testing/benchmark/system-profiles/8GBx1zone.tfvars index 62719a89b15..a5802b19e29 100644 --- a/testing/benchmark/system-profiles/8GBx1zone.tfvars +++ b/testing/benchmark/system-profiles/8GBx1zone.tfvars @@ -1,5 +1,11 @@ user_name = "USER" +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + # The number of AZs the APM Server should span. apm_server_zone_count = 1 # The Elasticsearch cluster node size. @@ -8,5 +14,8 @@ elasticsearch_size = "64g" elasticsearch_zone_count = 2 # APM server instance size apm_server_size = "8g" -# Benchmarks executor size executor -worker_instance_type = "c6i.2xlarge" + +# Standalone + +standalone_apm_server_instance_size = "c6i.2xlarge" +standalone_moxy_instance_size = "c6i.4xlarge" diff --git a/testing/benchmark/terraform.tfvars.example b/testing/benchmark/terraform.tfvars.example index d58973b61b1..2b44a828d6c 100644 --- a/testing/benchmark/terraform.tfvars.example +++ b/testing/benchmark/terraform.tfvars.example @@ -67,3 +67,9 @@ user_name = "USER" # Override the default shard settings for APM indices. Defaults to 0, which doesn't # change the default shard settings. # apm_shards = 12 + +# Override the default APM Server VM size in standalone bench mode. +# standalone_apm_server_instance_size = "c6i.2xlarge" + +# Override the default Moxy VM size in standalone bench mode. +# standalone_moxy_instance_size = "c6i.4xlarge" \ No newline at end of file diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index 80cd0950907..e08406aaffa 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -93,6 +93,8 @@ variable "drop_pipeline" { type = bool } +# Standalone + variable "apm_server_bin_path" { default = "../../build" type = string @@ -105,6 +107,18 @@ variable "moxy_bin_path" { description = "Optional path to the moxy binary" } +variable "standalone_apm_server_instance_size" { + default = "c6i.2xlarge" + type = string + description = "Optional instance type to use for the APM Server VM" +} + +variable "standalone_moxy_instance_size" { + default = "c6i.4xlarge" + type = string + description = "Optional instance type to use for the Moxy VM" +} + ## VPC Network settings variable "vpc_cidr" { @@ -136,7 +150,7 @@ variable "apmbench_bin_path" { } variable "worker_instance_type" { - default = "c6i.2xlarge" + default = "c6i.xlarge" type = string description = "Optional instance type to use for the worker VM" } From 7bd059aea2f1275e61dac5431beec286563cad8b Mon Sep 17 00:00:00 2001 From: Kostiantyn Masliuk <1pkg@protonmail.com> Date: Fri, 13 Sep 2024 17:41:41 -0700 Subject: [PATCH 24/24] Index standalone benchmarks results + revert local changes --- .github/workflows/benchmarks.yml | 14 +++++++------- systemtest/benchtest/profiles.go | 7 +------ systemtest/cmd/moxy/main.go | 1 - testing/benchmark/Makefile | 10 ++++++++++ testing/benchmark/main.tf | 4 +--- testing/benchmark/variables.tf | 3 +-- .../modules/standalone_apm_server/main.tf | 2 +- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 4676a4d5f28..6407d19b69d 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -142,18 +142,18 @@ jobs: if: ${{ inputs.benchmarkAgents != '' }} run: make run-benchmark - - name: Cat APM Server logs - if: failure() + - name: Cat standalone server logs + if: ${{ inputs.runStandalone && failure() }} run: make cat-apm-server-logs # Results are only indexed and uploaded if the run happens on the main branch. - name: Index benchmarks result - if: github.ref == 'refs/heads/main' + # if: github.ref == 'refs/heads/main' run: make index-benchmark-results - name: Download PNG - if: github.ref == 'refs/heads/main' + # if: github.ref == 'refs/heads/main' run: >- ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh ${{ secrets.KIBANA_BENCH_ENDPOINT }} @@ -162,7 +162,7 @@ jobs: $PNG_REPORT_FILE - name: Upload PNG - if: github.ref == 'refs/heads/main' + # if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: kibana-png-report @@ -170,7 +170,7 @@ jobs: if-no-files-found: error - name: Upload PNG to AWS S3 - if: github.ref == 'refs/heads/main' + # if: github.ref == 'refs/heads/main' id: s3-upload-png env: AWS_DEFAULT_REGION: us-east-1 @@ -180,7 +180,7 @@ jobs: echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT" - name: Upload benchmark result - if: github.ref == 'refs/heads/main' + # if: github.ref == 'refs/heads/main' uses: actions/upload-artifact@v4 with: name: benchmark-result diff --git a/systemtest/benchtest/profiles.go b/systemtest/benchtest/profiles.go index 2214c7e764e..e3dd240df44 100644 --- a/systemtest/benchtest/profiles.go +++ b/systemtest/benchtest/profiles.go @@ -88,12 +88,7 @@ func (p *profiles) recordCPU() error { if benchConfig.CPUProfile == "" { return nil } - // Limit the CPU profile collection to static 1 minute interval per a benchmark. - // Otherwise the profile will be too heavy and over influenced by the "longest" benchmark. - duration := time.Minute - if duration > benchConfig.Benchtime { - duration = benchConfig.Benchtime - } + duration := benchConfig.Benchtime profile, err := fetchProfile("/debug/pprof/profile", duration) if err != nil { return fmt.Errorf("failed to fetch CPU profile: %w", err) diff --git a/systemtest/cmd/moxy/main.go b/systemtest/cmd/moxy/main.go index 557da1bf432..1c026b557f8 100644 --- a/systemtest/cmd/moxy/main.go +++ b/systemtest/cmd/moxy/main.go @@ -58,7 +58,6 @@ func handler(logger *zap.Logger, username, password string) http.Handler { w.Header().Set("X-Elastic-Product", "Elasticsearch") switch r.URL.Path { case "/": - // MIS doesn't use this route, but apm-server checks for cluster_uuid w.Write([]byte(`{ "name": "instance-0000000001", "cluster_name": "eca3b3c3bbee4816bb92f82184e328dd", diff --git a/testing/benchmark/Makefile b/testing/benchmark/Makefile index 5f3df05df4e..0c034355375 100644 --- a/testing/benchmark/Makefile +++ b/testing/benchmark/Makefile @@ -9,6 +9,8 @@ MOXY_GOARCH ?= amd64 APM_SERVER_GOOS ?= linux APM_SERVER_GOARCH ?= amd64 +RUN_STANDALONE ?= false + TFVARS_SOURCE ?= terraform.tfvars.example BENCHMARK_WARMUP_TIME ?= 5m @@ -127,6 +129,12 @@ index-benchmark-results: _default-gobench-vars .PHONY: _default-gobench-vars _default-gobench-vars: +ifeq ($(RUN_STANDALONE),true) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),apm_server_size=$(shell echo var.standalone_apm_server_instance_size | terraform console | tr -d '"')) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),moxy_size=$(shell echo var.standalone_moxy_instance_size | terraform console | tr -d '"')) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),build_sha=$(shell git rev-parse HEAD)) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),bench_mode=standalone) +else # TODO(marclop) Update code below to use a foor loop, rather than copying the lines. $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),apm_server_size=$(shell echo var.apm_server_size | terraform console | tr -d '"')) $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),elasticsearch_size=$(shell echo var.elasticsearch_size | terraform console | tr -d '"')) @@ -134,6 +142,8 @@ _default-gobench-vars: $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),apm_server_zone_count=$(shell echo var.apm_server_zone_count | terraform console | tr -d '"')) $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),elasticsearch_zone_count=$(shell echo var.elasticsearch_zone_count | terraform console | tr -d '"')) $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),build_sha=$(shell curl -sL -H "Authorization: Bearer $(shell terraform output -raw apm_secret_token )" $(shell terraform output -raw apm_server_url ) | jq -r '.build_sha')) + $(eval GOBENCH_DEFAULT_TAGS = $(GOBENCH_DEFAULT_TAGS),bench_mode=cloud) +endif .PHONY: cat-apm-server-logs cat-apm-server-logs: diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 68f3f9bfbeb..984ae7375c7 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -35,9 +35,7 @@ module "tags" { project = startswith(var.user_name, "benchci") ? "benchmarks" : var.user_name } -provider "ec" { - apikey = "aaa" -} +provider "ec" {} provider "aws" { region = var.worker_region diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index e08406aaffa..c658e63aab2 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -1,13 +1,12 @@ ## General configuration variable "user_name" { - default = "test-kostya-vpc-bench-apm" description = "Required username to use for prefixes" type = string } variable "run_standalone" { - default = true + default = false description = "If set run benchmarks against standalone APM Server conneted to moxy" type = bool } diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index c638a040808..a43f81b36da 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -6,7 +6,7 @@ locals { "debian-10-arm64" = "136693071363" # debian "debian-11-arm64" = "136693071363" # debian "amzn2-ami-kernel-5.10" = "137112412989" # amazon - "amzn2-ami-hvm-*-x86_64-ebs" = "137112412989" #amazon + "amzn2-ami-hvm-*-x86_64-ebs" = "137112412989" # amazon "al2023-ami-2023" = "137112412989" # amazon "RHEL-7" = "309956199498" # Red Hat "RHEL-8" = "309956199498" # Red Hat