.github/workflows/benchmarks.yml

name: benchmarks

on:
  workflow_dispatch:
    inputs:
      runStandalone:
        description: 'Run the benchmarks against standalone APM Server with Moxy'
        required: false
        type: boolean
        default: false
      profile:
        description: 'The system profile used to run the benchmarks'
        required: false
        type: string
      runOnStable:
        description: 'Run the benchmarks on the latest stable version'
        required: false
        type: boolean
        default: false
      benchmarkAgents:
        description: 'Set the number of agents to send data to the APM Server'
        required: false
        type: string
      benchmarkRun:
        description: 'Set the expression that matches the benchmark scenarios to run'
        required: false
        type: string
  schedule:
    - cron: '0 17 * * *'

env:
  PNG_REPORT_FILE: out.png
  BENCHMARK_CPU_OUT: default.pgo
  BENCHMARK_RESULT: benchmark-result.txt
  WORKING_DIRECTORY: testing/benchmark

permissions:
  contents: read

jobs:
  benchmarks:
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: ${{ env.WORKING_DIRECTORY }}
    permissions:
      contents: write
      id-token: write
      pull-requests: write
    env:
      SSH_KEY: ./id_rsa_terraform
      TF_VAR_private_key: ./id_rsa_terraform
      TF_VAR_public_key: ./id_rsa_terraform.pub
      TF_VAR_run_standalone: ${{ inputs.runStandalone }}
      TFVARS_SOURCE: ${{ inputs.profile || 'system-profiles/8GBx1zone.tfvars' }} # // Default to use an 8gb profile
      TF_VAR_BUILD_ID: ${{ github.run_id }}
      TF_VAR_ENVIRONMENT: ci
      TF_VAR_REPO: ${{ github.repository }}
      GOBENCH_TAGS: branch=${{ github.head_ref || github.ref }},commit=${{ github.sha }},target_branch=${{ github.base_ref }}
      GOBENCH_PASSWORD: ${{ secrets.GOBENCH_PASSWORD }}
      GOBENCH_USERNAME: ${{ secrets.GOBENCH_USERNAME }}
      GOBENCH_HOST: ${{ secrets.GOBENCH_HOST }}
      # temporarily override to get faster feedback
      BENCHMARK_WARMUP_TIME: 1m
      BENCHMARK_COUNT: 2
      BENCHMARK_TIME: 1m
    steps:
      - uses: actions/checkout@v4

      - uses: actions/setup-go@v5
        with:
          go-version-file: 'go.mod'

      - uses: rlespinasse/github-slug-action@797d68864753cbceedc271349d402da4590e6302

      - name: Set up env
        run: |
          SLUGGED_BRANCH_NAME=${{ env.GITHUB_HEAD_REF_SLUG || env.GITHUB_REF_SLUG }}
          CREATED_AT=$(date +%s)
          echo "TF_VAR_BRANCH=${SLUGGED_BRANCH_NAME}" >> "$GITHUB_ENV"
          echo "TF_VAR_CREATED_AT=${CREATED_AT}" >> "$GITHUB_ENV"
          echo "USER=benchci-$SLUGGED_BRANCH_NAME-$CREATED_AT" >> "$GITHUB_ENV"

          if [ ! -z "${{ inputs.benchmarkAgents }}" ]; then
            echo "BENCHMARK_AGENTS=${{ inputs.benchmarkAgents }}" >> "$GITHUB_ENV"
          fi
          if [ ! -z "${{ inputs.benchmarkRun }}" ]; then
            echo "BENCHMARK_RUN=${{ inputs.benchmarkRun }}" >> "$GITHUB_ENV"
          fi

      - name: Log in to the Elastic Container registry
        uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
        with:
          registry: ${{ secrets.ELASTIC_DOCKER_REGISTRY }}
          username: ${{ secrets.ELASTIC_DOCKER_USERNAME }}
          password: ${{ secrets.ELASTIC_DOCKER_PASSWORD }}


      - uses: elastic/oblt-actions/google/auth@v1

      - uses: elastic/oblt-actions/aws/auth@v1
        with:
          role-duration-seconds: 18000 # 5 hours

      - uses: google-github-actions/get-secretmanager-secrets@95a0b09b8348ef3d02c68c6ba5662a037e78d713 # v2.1.4
        with:
          export_to_environment: true
          secrets: |-
            EC_API_KEY:elastic-observability/elastic-cloud-observability-team-pro-api-key

      - uses: hashicorp/setup-terraform@v3
        with:
          terraform_version: 1.3.7
          terraform_wrapper: false

      - name: Build apmbench
        run: make apmbench $SSH_KEY terraform.tfvars

      - name: Build APM Server and Moxy
        if: ${{ inputs.runStandalone }}
        run: |
          make moxy
          cd ../.. && make build/apm-server-linux-amd64 && mv build/apm-server-linux-amd64 build/apm-server

      - name: Override docker committed version
        if: ${{ ! inputs.runOnStable && ! inputs.runStandalone}}
        run: make docker-override-committed-version

      - name: Spin up benchmark environment
        id: deploy
        run: |
          make init apply
          admin_console_url=$(terraform output -raw admin_console_url)
          echo "admin_console_url=$admin_console_url" >> "$GITHUB_OUTPUT"
          echo "-> infra setup done"

      - name: Run benchmarks autotuned
        if: ${{ inputs.benchmarkAgents == '' }}
        run: make run-benchmark-autotuned

      - name: Run benchmarks self tuned
        if: ${{ inputs.benchmarkAgents != '' }}
        run: make run-benchmark

      # Results are only indexed and uploaded if the run happens on the main branch.

      - name: Index benchmarks result
        if: github.ref == 'refs/heads/main'
        run: make index-benchmark-results

      - name: Download PNG
        if: github.ref == 'refs/heads/main'
        run: >-
          ${{ github.workspace }}/.ci/scripts/download-png-from-kibana.sh
          ${{ secrets.KIBANA_BENCH_ENDPOINT }}
          ${{ secrets.KIBANA_BENCH_USERNAME }}
          ${{ secrets.KIBANA_BENCH_PASSWORD }}
          $PNG_REPORT_FILE

      - name: Upload PNG
        if: github.ref == 'refs/heads/main'
        uses: actions/upload-artifact@v4
        with:
          name: kibana-png-report
          path: ${{ env.WORKING_DIRECTORY }}/${{ env.PNG_REPORT_FILE }}
          if-no-files-found: error

      - name: Upload PNG to AWS S3
        if: github.ref == 'refs/heads/main'
        id: s3-upload-png
        env:
          AWS_DEFAULT_REGION: us-east-1
        run: |
          DEST_NAME="github-run-id-${{ github.run_id }}.png"
          aws s3 --debug cp ${{ env.PNG_REPORT_FILE }} s3://elastic-apm-server-benchmark-reports/${DEST_NAME}
          echo "png_report_url=https://elastic-apm-server-benchmark-reports.s3.amazonaws.com/${DEST_NAME}" >> "$GITHUB_OUTPUT"

      - name: Upload benchmark result
        if: github.ref == 'refs/heads/main'
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-result
          path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_RESULT }}
          if-no-files-found: error

      # The next section injects CPU profile collected by apmbench into the build.
      # By copying the profile, uploading it to the artifacts and pushing it
      # via a PR to update default.pgo.

      - name: Copy CPU profile
        if: ${{ inputs.runStandalone }}
        run: make cp-cpuprof
      
      - name: Upload CPU profile
        if: ${{ inputs.runStandalone }}
        uses: actions/upload-artifact@v4
        with:
          name: cpu-profile
          path: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }}
          if-no-files-found: error

      - name: Open PGO PR
        if: ${{ inputs.runStandalone }}
        run: |
          cd "${{ github.workspace }}"
          mv "$PROFILE_PATH" x-pack/apm-server/default.pgo
          git config user.email "apm@elastic.co"
          git config user.name "APM Server"
          git fetch origin main
          git checkout main
          BRANCH="update-pgo-$(date +%s)"
          git checkout -b "$BRANCH"
          git add x-pack/apm-server/default.pgo
          git commit -m "PGO: Update default.pgo from benchmarks $WORKFLOW."
          git push -u origin "$BRANCH"
          gh pr create -B main -H "$BRANCH" -t "PGO: Update default.pgo" -b "Update default.pgo CPU profile from the benchmarks [workflow]($WORKFLOW)." -R elastic/apm-server
        env:
          PROFILE_PATH: ${{ env.WORKING_DIRECTORY }}/${{ env.BENCHMARK_CPU_OUT }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          WORKFLOW: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}/attempts/${{ github.run_attempt }}

      - name: Tear down benchmark environment
        if: always()
        run: make destroy

      # Notify failure to Slack only on schedule (nightly run)
      - if: failure() && github.event_name == 'schedule'
        uses: elastic/oblt-actions/slack/notify-result@v1
        with:
          bot-token: ${{ secrets.SLACK_BOT_TOKEN }}
          channel-id: "#apm-server"
          message: Nightly APM Server benchmarks failed! SDH Duty assignee, please have a look and follow this <https://github.com/elastic/observability-dev/blob/main/docs/apm/apm-server/runbooks/benchmarks.md|Runbook>!

      # Notify result to Slack only on schedule (nightly run)
      - if: github.event_name == 'schedule'
        uses: slackapi/slack-github-action@37ebaef184d7626c5f204ab8d3baff4262dd30f0 # v1.27.0
        env:
          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
        with:
          channel-id: "#apm-server"
          payload: |
            {
                "blocks": [
                    {
                        "type": "section",
                        "text": {
                            "type": "mrkdwn",
                            "text": "${{ github.event_name == 'schedule' && 'Nightly' || '' }} APM Server benchmarks succesfully executed!"
                        },
                        "accessory": {
                            "type": "button",
                            "style": "primary",
                            "text": {
                                "type": "plain_text",
                                "text": "Workflow Run #${{ github.run_id }}",
                                "emoji": true
                            },
                            "url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}",
                            "action_id": "workflow-run-button"
                        }
                    },
                    {
                        "type": "image",
                        "image_url": "${{ steps.s3-upload-png.outputs.png_report_url }}",
                        "alt_text": "kibana-png-report"
                    },
                    {
                        "type": "actions",
                        "elements": [
                            {
                                "type": "button",
                                "text": {
                                    "type": "plain_text",
                                    "text": "Benchmarks dashboard"
                                },
                                "url": "${{ secrets.KIBANA_BENCH_DASHBOARD }}",
                                "action_id": "kibana-dashboard-button"
                            },
                            {
                                "type": "button",
                                "text": {
                                    "type": "plain_text",
                                    "text": "Elastic Cloud deployment"
                                },
                                "url": "${{ steps.deploy.outputs.admin_console_url }}",
                                "action_id": "admin-console-button"
                            }
                        ]
                    }
                ]
            }